next.orly.dev/pkg/policy/policy.go

package policy

import (
	"bufio"
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"os"
	"os/exec"
	"path/filepath"
	"strings"
	"sync"
	"time"

	"github.com/adrg/xdg"
	"lol.mleku.dev/chk"
	"lol.mleku.dev/log"
	"next.orly.dev/pkg/encoders/event"
	"next.orly.dev/pkg/encoders/hex"
	"next.orly.dev/pkg/utils"
)

// Kinds defines whitelist and blacklist policies for event kinds.
// Whitelist takes precedence over blacklist - if whitelist is present, only whitelisted kinds are allowed.
// If only blacklist is present, all kinds except blacklisted ones are allowed.
type Kinds struct {
	// Whitelist is a list of event kinds that are allowed to be written to the relay. If any are present, implicitly all others are denied.
	Whitelist []int `json:"whitelist,omitempty"`
	// Blacklist is a list of event kinds that are not allowed to be written to the relay. If any are present, implicitly all others are allowed. Only takes effect in the absence of a Whitelist.
	Blacklist []int `json:"blacklist,omitempty"`
}

// Rule defines policy criteria for a specific event kind.
//
// Rules are evaluated in the following order:
// 1. If Script is present and running, it determines the outcome
// 2. If Script fails or is not running, falls back to default_policy
// 3. Otherwise, all specified criteria are evaluated as AND operations
//
// For pubkey allow/deny lists: whitelist takes precedence over blacklist.
// If whitelist has entries, only whitelisted pubkeys are allowed.
// If only blacklist has entries, all pubkeys except blacklisted ones are allowed.
type Rule struct {
	// Description is a human-readable description of the rule.
	Description string `json:"description"`
	// Script is a path to a script that will be used to determine if the event should be allowed to be written to the relay. The script should be a standard bash script or whatever is native to the platform. The script will return its opinion to be one of the criteria that must be met for the event to be allowed to be written to the relay (AND).
	Script string `json:"script,omitempty"`
	// WriteAllow is a list of pubkeys that are allowed to write this event kind to the relay. If any are present, implicitly all others are denied.
	WriteAllow []string `json:"write_allow,omitempty"`
	// WriteDeny is a list of pubkeys that are not allowed to write this event kind to the relay. If any are present, implicitly all others are allowed. Only takes effect in the absence of a WriteAllow.
	WriteDeny []string `json:"write_deny,omitempty"`
	// ReadAllow is a list of pubkeys that are allowed to read this event kind from the relay. If any are present, implicitly all others are denied.
	ReadAllow []string `json:"read_allow,omitempty"`
	// ReadDeny is a list of pubkeys that are not allowed to read this event kind from the relay. If any are present, implicitly all others are allowed. Only takes effect in the absence of a ReadAllow.
	ReadDeny []string `json:"read_deny,omitempty"`
	// MaxExpiry is the maximum expiry time in seconds for events written to the relay. If 0, there is no maximum expiry. Events must have an expiry time if this is set, and it must be no more than this value in the future compared to the event's created_at time.
	MaxExpiry *int64 `json:"max_expiry,omitempty"`
	// MustHaveTags is a list of tag key letters that must be present on the event for it to be allowed to be written to the relay.
	MustHaveTags []string `json:"must_have_tags,omitempty"`
	// SizeLimit is the maximum size in bytes for the event's total serialized size.
	SizeLimit *int64 `json:"size_limit,omitempty"`
	// ContentLimit is the maximum size in bytes for the event's content field.
	ContentLimit *int64 `json:"content_limit,omitempty"`
	// Privileged means that this event is either authored by the authenticated pubkey, or has a p tag that contains the authenticated pubkey. This type of event is only sent to users who are authenticated and are party to the event.
	Privileged bool `json:"privileged,omitempty"`
	// RateLimit is the amount of data can be written to the relay per second by the authenticated pubkey. If 0, there is no rate limit. This is applied via the use of an EWMA of the event publication history on the authenticated connection
	RateLimit *int64 `json:"rate_limit,omitempty"`
	// MaxAgeOfEvent is the offset in seconds that is the oldest timestamp allowed for an event's created_at time. If 0, there is no maximum age. Events must have a created_at time if this is set, and it must be no more than this value in the past compared to the current time.
	MaxAgeOfEvent *int64 `json:"max_age_of_event,omitempty"`
	// MaxAgeEventInFuture is the offset in seconds that is the newest timestamp allowed for an event's created_at time ahead of the current time.
	MaxAgeEventInFuture *int64 `json:"max_age_event_in_future,omitempty"`

	// Binary caches for faster comparison (populated from hex strings above)
	// These are not exported and not serialized to JSON
	writeAllowBin [][]byte
	writeDenyBin  [][]byte
	readAllowBin  [][]byte
	readDenyBin   [][]byte
}

// hasAnyRules checks if the rule has any constraints configured
func (r *Rule) hasAnyRules() bool {
	// Check for any configured constraints
	return len(r.WriteAllow) > 0 || len(r.WriteDeny) > 0 ||
		len(r.ReadAllow) > 0 || len(r.ReadDeny) > 0 ||
		len(r.writeAllowBin) > 0 || len(r.writeDenyBin) > 0 ||
		len(r.readAllowBin) > 0 || len(r.readDenyBin) > 0 ||
		r.SizeLimit != nil || r.ContentLimit != nil ||
		r.MaxAgeOfEvent != nil || r.MaxAgeEventInFuture != nil ||
		r.MaxExpiry != nil || len(r.MustHaveTags) > 0 ||
		r.Script != "" || r.Privileged
}

// populateBinaryCache converts hex-encoded pubkey strings to binary for faster comparison.
// This should be called after unmarshaling the policy from JSON.
func (r *Rule) populateBinaryCache() error {
	var err error

	// Convert WriteAllow hex strings to binary
	if len(r.WriteAllow) > 0 {
		r.writeAllowBin = make([][]byte, 0, len(r.WriteAllow))
		for _, hexPubkey := range r.WriteAllow {
			binPubkey, decErr := hex.Dec(hexPubkey)
			if decErr != nil {
				log.W.F("failed to decode WriteAllow pubkey %q: %v", hexPubkey, decErr)
				continue
			}
			r.writeAllowBin = append(r.writeAllowBin, binPubkey)
		}
	}

	// Convert WriteDeny hex strings to binary
	if len(r.WriteDeny) > 0 {
		r.writeDenyBin = make([][]byte, 0, len(r.WriteDeny))
		for _, hexPubkey := range r.WriteDeny {
			binPubkey, decErr := hex.Dec(hexPubkey)
			if decErr != nil {
				log.W.F("failed to decode WriteDeny pubkey %q: %v", hexPubkey, decErr)
				continue
			}
			r.writeDenyBin = append(r.writeDenyBin, binPubkey)
		}
	}

	// Convert ReadAllow hex strings to binary
	if len(r.ReadAllow) > 0 {
		r.readAllowBin = make([][]byte, 0, len(r.ReadAllow))
		for _, hexPubkey := range r.ReadAllow {
			binPubkey, decErr := hex.Dec(hexPubkey)
			if decErr != nil {
				log.W.F("failed to decode ReadAllow pubkey %q: %v", hexPubkey, decErr)
				continue
			}
			r.readAllowBin = append(r.readAllowBin, binPubkey)
		}
	}

	// Convert ReadDeny hex strings to binary
	if len(r.ReadDeny) > 0 {
		r.readDenyBin = make([][]byte, 0, len(r.ReadDeny))
		for _, hexPubkey := range r.ReadDeny {
			binPubkey, decErr := hex.Dec(hexPubkey)
			if decErr != nil {
				log.W.F("failed to decode ReadDeny pubkey %q: %v", hexPubkey, decErr)
				continue
			}
			r.readDenyBin = append(r.readDenyBin, binPubkey)
		}
	}

	return err
}

// PolicyEvent represents an event with additional context for policy scripts.
// It embeds the Nostr event and adds authentication and network context.
type PolicyEvent struct {
	*event.E
	LoggedInPubkey string `json:"logged_in_pubkey,omitempty"`
	IPAddress      string `json:"ip_address,omitempty"`
	AccessType     string `json:"access_type,omitempty"` // "read" or "write"
}

// MarshalJSON implements custom JSON marshaling for PolicyEvent.
// It safely serializes the embedded event and additional context fields.
func (pe *PolicyEvent) MarshalJSON() ([]byte, error) {
	if pe.E == nil {
		return json.Marshal(
			map[string]interface{}{
				"logged_in_pubkey": pe.LoggedInPubkey,
				"ip_address":       pe.IPAddress,
			},
		)
	}

	// Create a safe copy of the event for JSON marshaling
	safeEvent := map[string]interface{}{
		"id":         hex.Enc(pe.E.ID),
		"pubkey":     hex.Enc(pe.E.Pubkey),
		"created_at": pe.E.CreatedAt,
		"kind":       pe.E.Kind,
		"content":    string(pe.E.Content),
		"tags":       pe.E.Tags,
		"sig":        hex.Enc(pe.E.Sig),
	}

	// Add policy-specific fields
	if pe.LoggedInPubkey != "" {
		safeEvent["logged_in_pubkey"] = pe.LoggedInPubkey
	}
	if pe.IPAddress != "" {
		safeEvent["ip_address"] = pe.IPAddress
	}
	if pe.AccessType != "" {
		safeEvent["access_type"] = pe.AccessType
	}

	return json.Marshal(safeEvent)
}

// PolicyResponse represents a response from the policy script.
// The script should return JSON with these fields to indicate its decision.
type PolicyResponse struct {
	ID     string `json:"id"`
	Action string `json:"action"` // accept, reject, or shadowReject
	Msg    string `json:"msg"`    // NIP-20 response message (only used for reject)
}

// ScriptRunner manages a single policy script process.
// Each unique script path gets its own independent runner with its own goroutine.
type ScriptRunner struct {
	ctx           context.Context
	cancel        context.CancelFunc
	configDir     string
	scriptPath    string
	currentCmd    *exec.Cmd
	currentCancel context.CancelFunc
	mutex         sync.RWMutex
	isRunning     bool
	isStarting    bool
	stdin         io.WriteCloser
	stdout        io.ReadCloser
	stderr        io.ReadCloser
	responseChan  chan PolicyResponse
	startupChan   chan error
}

// PolicyManager handles multiple policy script runners.
// It manages the lifecycle of policy scripts, handles communication with them,
// and provides resilient operation with automatic restart capabilities.
// Each unique script path gets its own ScriptRunner instance.
type PolicyManager struct {
	ctx        context.Context
	cancel     context.CancelFunc
	configDir  string
	scriptPath string // Default script path for backward compatibility
	enabled    bool
	mutex      sync.RWMutex
	runners    map[string]*ScriptRunner // Map of script path -> runner
}

// P represents a complete policy configuration for a Nostr relay.
// It defines access control rules, kind filtering, and default behavior.
// Policies are evaluated in order: global rules, kind filtering, specific rules, then default policy.
type P struct {
	// Kind is policies for accepting or rejecting events by kind number.
	Kind Kinds `json:"kind"`
	// Rules is a map of rules for criteria that must be met for the event to be allowed to be written to the relay.
	Rules map[int]Rule `json:"rules"`
	// Global is a rule set that applies to all events.
	Global Rule `json:"global"`
	// DefaultPolicy determines the default behavior when no rules deny an event ("allow" or "deny", defaults to "allow")
	DefaultPolicy string `json:"default_policy"`
	// Manager handles policy script execution
	Manager *PolicyManager `json:"-"`
}

// New creates a new policy from JSON configuration.
// If policyJSON is empty, returns a policy with default settings.
// The default_policy field defaults to "allow" if not specified.
func New(policyJSON []byte) (p *P, err error) {
	p = &P{
		DefaultPolicy: "allow", // Set default value
	}
	if len(policyJSON) > 0 {
		if err = json.Unmarshal(policyJSON, p); chk.E(err) {
			return nil, fmt.Errorf("failed to unmarshal policy JSON: %v", err)
		}
	}
	// Ensure default policy is valid
	if p.DefaultPolicy == "" {
		p.DefaultPolicy = "allow"
	}

	// Populate binary caches for all rules (including global rule)
	p.Global.populateBinaryCache()
	for kind := range p.Rules {
		rule := p.Rules[kind]  // Get a copy
		rule.populateBinaryCache()
		p.Rules[kind] = rule  // Store the modified copy back
	}

	return
}

// IsPartyInvolved checks if the given pubkey is a party involved in the event.
// A party is involved if they are either:
// 1. The author of the event (ev.Pubkey == userPubkey)
// 2. Mentioned in a p-tag of the event
//
// Both ev.Pubkey and userPubkey must be binary ([]byte), not hex-encoded.
// P-tags are assumed to contain hex-encoded pubkeys that will be decoded.
//
// This is the single source of truth for "parties_involved" / "privileged" checks.
func IsPartyInvolved(ev *event.E, userPubkey []byte) bool {
	// Must be authenticated
	if len(userPubkey) == 0 {
		return false
	}

	// Check if user is the author
	if bytes.Equal(ev.Pubkey, userPubkey) {
		return true
	}

	// Check if user is in p tags
	pTags := ev.Tags.GetAll([]byte("p"))
	for _, pTag := range pTags {
		// pTag.Value() returns hex-encoded string; decode to bytes for comparison
		pt, err := hex.Dec(string(pTag.Value()))
		if err != nil {
			// Skip malformed tags
			continue
		}
		if bytes.Equal(pt, userPubkey) {
			return true
		}
	}

	return false
}

// getDefaultPolicyAction returns true if the default policy is "allow", false if "deny"
func (p *P) getDefaultPolicyAction() (allowed bool) {
	switch p.DefaultPolicy {
	case "deny":
		return false
	case "allow", "":
		return true
	default:
		// Invalid value, default to allow
		return true
	}
}

// NewWithManager creates a new policy with a policy manager for script execution.
// It initializes the policy manager, loads configuration from files, and starts
// background processes for script management and periodic health checks.
func NewWithManager(ctx context.Context, appName string, enabled bool) *P {
	configDir := filepath.Join(xdg.ConfigHome, appName)
	scriptPath := filepath.Join(configDir, "policy.sh")
	configPath := filepath.Join(configDir, "policy.json")

	ctx, cancel := context.WithCancel(ctx)

	manager := &PolicyManager{
		ctx:        ctx,
		cancel:     cancel,
		configDir:  configDir,
		scriptPath: scriptPath,
		enabled:    enabled,
		runners:    make(map[string]*ScriptRunner),
	}

	// Load policy configuration from JSON file
	policy := &P{
		DefaultPolicy: "allow", // Set default value
		Manager:       manager,
	}

	if enabled {
		if err := policy.LoadFromFile(configPath); err != nil {
			log.W.F(
				"failed to load policy configuration from %s: %v", configPath,
				err,
			)
			log.I.F("using default policy configuration")
		} else {
			log.I.F("loaded policy configuration from %s", configPath)
		}

		// Start the policy script if it exists and is enabled
		go manager.startPolicyIfExists()
		// Start periodic check for policy script availability
		go manager.periodicCheck()
	}

	return policy
}

// getOrCreateRunner gets an existing runner for the script path or creates a new one.
// This method is thread-safe and ensures only one runner exists per unique script path.
func (pm *PolicyManager) getOrCreateRunner(scriptPath string) *ScriptRunner {
	pm.mutex.Lock()
	defer pm.mutex.Unlock()

	// Check if runner already exists
	if runner, exists := pm.runners[scriptPath]; exists {
		return runner
	}

	// Create new runner
	runnerCtx, runnerCancel := context.WithCancel(pm.ctx)
	runner := &ScriptRunner{
		ctx:          runnerCtx,
		cancel:       runnerCancel,
		configDir:    pm.configDir,
		scriptPath:   scriptPath,
		responseChan: make(chan PolicyResponse, 100),
		startupChan:  make(chan error, 1),
	}

	pm.runners[scriptPath] = runner

	// Start periodic check for this runner
	go runner.periodicCheck()

	return runner
}

// ScriptRunner methods

// IsRunning returns whether the script is currently running.
func (sr *ScriptRunner) IsRunning() bool {
	sr.mutex.RLock()
	defer sr.mutex.RUnlock()
	return sr.isRunning
}

// ensureRunning ensures the script is running, starting it if necessary.
func (sr *ScriptRunner) ensureRunning() error {
	sr.mutex.Lock()
	// Check if already running
	if sr.isRunning {
		sr.mutex.Unlock()
		return nil
	}

	// Check if already starting
	if sr.isStarting {
		sr.mutex.Unlock()
		// Wait for startup to complete
		select {
		case err := <-sr.startupChan:
			if err != nil {
				return fmt.Errorf("script startup failed: %v", err)
			}
			// Double-check it's actually running after receiving signal
			sr.mutex.RLock()
			running := sr.isRunning
			sr.mutex.RUnlock()
			if !running {
				return fmt.Errorf("script startup completed but process is not running")
			}
			return nil
		case <-time.After(10 * time.Second):
			return fmt.Errorf("script startup timeout")
		case <-sr.ctx.Done():
			return fmt.Errorf("script context cancelled")
		}
	}

	// Mark as starting
	sr.isStarting = true
	sr.mutex.Unlock()

	// Start the script in a goroutine
	go func() {
		err := sr.Start()
		sr.mutex.Lock()
		sr.isStarting = false
		sr.mutex.Unlock()
		// Signal startup completion (non-blocking)
		// Drain any stale value first, then send
		select {
		case <-sr.startupChan:
		default:
		}
		select {
		case sr.startupChan <- err:
		default:
			// Channel should be empty now, but if it's full, try again
			sr.startupChan <- err
		}
	}()

	// Wait for startup to complete
	select {
	case err := <-sr.startupChan:
		if err != nil {
			return fmt.Errorf("script startup failed: %v", err)
		}
		// Double-check it's actually running after receiving signal
		sr.mutex.RLock()
		running := sr.isRunning
		sr.mutex.RUnlock()
		if !running {
			return fmt.Errorf("script startup completed but process is not running")
		}
		return nil
	case <-time.After(10 * time.Second):
		sr.mutex.Lock()
		sr.isStarting = false
		sr.mutex.Unlock()
		return fmt.Errorf("script startup timeout")
	case <-sr.ctx.Done():
		sr.mutex.Lock()
		sr.isStarting = false
		sr.mutex.Unlock()
		return fmt.Errorf("script context cancelled")
	}
}

// Start starts the script process.
func (sr *ScriptRunner) Start() error {
	sr.mutex.Lock()
	defer sr.mutex.Unlock()

	if sr.isRunning {
		return fmt.Errorf("script is already running")
	}

	if _, err := os.Stat(sr.scriptPath); os.IsNotExist(err) {
		return fmt.Errorf("script does not exist at %s", sr.scriptPath)
	}

	// Create a new context for this command
	cmdCtx, cmdCancel := context.WithCancel(sr.ctx)

	// Make the script executable
	if err := os.Chmod(sr.scriptPath, 0755); chk.E(err) {
		cmdCancel()
		return fmt.Errorf("failed to make script executable: %v", err)
	}

	// Start the script
	cmd := exec.CommandContext(cmdCtx, sr.scriptPath)
	cmd.Dir = sr.configDir

	// Set up stdio pipes for communication
	stdin, err := cmd.StdinPipe()
	if chk.E(err) {
		cmdCancel()
		return fmt.Errorf("failed to create stdin pipe: %v", err)
	}

	stdout, err := cmd.StdoutPipe()
	if chk.E(err) {
		cmdCancel()
		stdin.Close()
		return fmt.Errorf("failed to create stdout pipe: %v", err)
	}

	stderr, err := cmd.StderrPipe()
	if chk.E(err) {
		cmdCancel()
		stdin.Close()
		stdout.Close()
		return fmt.Errorf("failed to create stderr pipe: %v", err)
	}

	// Start the command
	if err := cmd.Start(); chk.E(err) {
		cmdCancel()
		stdin.Close()
		stdout.Close()
		stderr.Close()
		return fmt.Errorf("failed to start script: %v", err)
	}

	sr.currentCmd = cmd
	sr.currentCancel = cmdCancel
	sr.stdin = stdin
	sr.stdout = stdout
	sr.stderr = stderr
	sr.isRunning = true

	// Start response reader in background
	go sr.readResponses()

	// Log stderr output in background
	go sr.logOutput(stdout, stderr)

	// Monitor the process
	go sr.monitorProcess()

	log.I.F(
		"policy script started: %s (pid=%d)", sr.scriptPath, cmd.Process.Pid,
	)
	return nil
}

// Stop stops the script gracefully.
func (sr *ScriptRunner) Stop() error {
	sr.mutex.Lock()

	if !sr.isRunning || sr.currentCmd == nil {
		sr.mutex.Unlock()
		return fmt.Errorf("script is not running")
	}

	// Close stdin first to signal the script to exit
	if sr.stdin != nil {
		sr.stdin.Close()
	}

	// Cancel the context
	if sr.currentCancel != nil {
		sr.currentCancel()
	}

	// Get the process reference before releasing the lock
	process := sr.currentCmd.Process
	sr.mutex.Unlock()

	// Wait for graceful shutdown with timeout
	// Note: monitorProcess() is the one that calls cmd.Wait() and cleans up
	// We just wait for it to finish by polling isRunning
	gracefulShutdown := false
	for i := 0; i < 50; i++ { // 5 seconds total (50 * 100ms)
		time.Sleep(100 * time.Millisecond)
		sr.mutex.RLock()
		running := sr.isRunning
		sr.mutex.RUnlock()
		if !running {
			gracefulShutdown = true
			log.I.F("policy script stopped gracefully: %s", sr.scriptPath)
			break
		}
	}

	if !gracefulShutdown {
		// Force kill after timeout
		log.W.F(
			"policy script did not stop gracefully, sending SIGKILL: %s",
			sr.scriptPath,
		)
		if process != nil {
			if err := process.Kill(); chk.E(err) {
				log.E.F("failed to kill script process: %v", err)
			}
		}

		// Wait a bit more for monitorProcess to clean up
		for i := 0; i < 30; i++ { // 3 more seconds
			time.Sleep(100 * time.Millisecond)
			sr.mutex.RLock()
			running := sr.isRunning
			sr.mutex.RUnlock()
			if !running {
				break
			}
		}
	}

	return nil
}

// ProcessEvent sends an event to the script and waits for a response.
func (sr *ScriptRunner) ProcessEvent(evt *PolicyEvent) (
	*PolicyResponse, error,
) {
	log.D.F("processing event: %s", evt.Serialize())
	sr.mutex.RLock()
	if !sr.isRunning || sr.stdin == nil {
		sr.mutex.RUnlock()
		return nil, fmt.Errorf("script is not running")
	}
	stdin := sr.stdin
	sr.mutex.RUnlock()

	// Serialize the event to JSON
	eventJSON, err := json.Marshal(evt)
	if chk.E(err) {
		return nil, fmt.Errorf("failed to serialize event: %v", err)
	}

	// Send the event JSON to the script (newline-terminated)
	if _, err := stdin.Write(append(eventJSON, '\n')); chk.E(err) {
		// Check if it's a broken pipe error, which means the script has died
		if strings.Contains(err.Error(), "broken pipe") || strings.Contains(err.Error(), "closed pipe") {
			log.E.F(
				"policy script %s stdin closed (broken pipe) - script may have crashed or exited prematurely",
				sr.scriptPath,
			)
			// Mark as not running so it will be restarted on next periodic check
			sr.mutex.Lock()
			sr.isRunning = false
			sr.mutex.Unlock()
		}
		return nil, fmt.Errorf("failed to write event to script: %v", err)
	}

	// Wait for response with timeout
	select {
	case response := <-sr.responseChan:
		log.D.S("response", response)
		return &response, nil
	case <-time.After(5 * time.Second):
		log.W.F(
			"policy script %s response timeout - script may not be responding correctly (check for debug output on stdout)",
			sr.scriptPath,
		)
		return nil, fmt.Errorf("script response timeout")
	case <-sr.ctx.Done():
		return nil, fmt.Errorf("script context cancelled")
	}
}

// readResponses reads JSONL responses from the script
func (sr *ScriptRunner) readResponses() {
	if sr.stdout == nil {
		return
	}

	scanner := bufio.NewScanner(sr.stdout)
	nonJSONLineCount := 0
	for scanner.Scan() {
		line := scanner.Text()
		if line == "" {
			continue
		}
		log.D.F("policy response: %s", line)
		var response PolicyResponse
		if err := json.Unmarshal([]byte(line), &response); chk.E(err) {
			// Check if this looks like debug output
			if strings.HasPrefix(line, "{") {
				// Looks like JSON but failed to parse
				log.E.F(
					"failed to parse policy response from %s: %v\nLine: %s",
					sr.scriptPath, err, line,
				)
			} else {
				// Definitely not JSON - probably debug output
				nonJSONLineCount++
				if nonJSONLineCount <= 3 {
					log.W.F(
						"policy script %s produced non-JSON output on stdout (should only output JSONL): %q",
						sr.scriptPath, line,
					)
				} else if nonJSONLineCount == 4 {
					log.W.F(
						"policy script %s continues to produce non-JSON output - suppressing further warnings",
						sr.scriptPath,
					)
				}
				log.W.F(
					"IMPORTANT: Policy scripts must ONLY write JSON responses to stdout. Use stderr or a log file for debug output.",
				)
			}
			continue
		}

		// Send response to channel (non-blocking)
		select {
		case sr.responseChan <- response:
		default:
			log.W.F(
				"policy response channel full for %s, dropping response",
				sr.scriptPath,
			)
		}
	}

	if err := scanner.Err(); chk.E(err) {
		log.E.F(
			"error reading policy responses from %s: %v", sr.scriptPath, err,
		)
	}
}

// logOutput logs the output from stderr
func (sr *ScriptRunner) logOutput(stdout, stderr io.ReadCloser) {
	defer stderr.Close()

	// Only log stderr, stdout is used by readResponses
	go func() {
		scanner := bufio.NewScanner(stderr)
		for scanner.Scan() {
			line := scanner.Text()
			if line != "" {
				// Log script stderr output through relay logging system
				log.I.F("[policy script %s] %s", sr.scriptPath, line)
			}
		}
		if err := scanner.Err(); chk.E(err) {
			log.E.F("error reading stderr from policy script %s: %v", sr.scriptPath, err)
		}
	}()
}

// monitorProcess monitors the script process and cleans up when it exits
func (sr *ScriptRunner) monitorProcess() {
	if sr.currentCmd == nil {
		return
	}

	err := sr.currentCmd.Wait()

	sr.mutex.Lock()
	defer sr.mutex.Unlock()

	// Clean up pipes
	if sr.stdin != nil {
		sr.stdin.Close()
		sr.stdin = nil
	}
	if sr.stdout != nil {
		sr.stdout.Close()
		sr.stdout = nil
	}
	if sr.stderr != nil {
		sr.stderr.Close()
		sr.stderr = nil
	}

	sr.isRunning = false
	sr.currentCmd = nil
	sr.currentCancel = nil

	if err != nil {
		log.E.F(
			"policy script exited with error: %s: %v, will retry periodically",
			sr.scriptPath, err,
		)
	} else {
		log.I.F("policy script exited normally: %s", sr.scriptPath)
	}
}

// periodicCheck periodically checks if script becomes available and attempts to restart failed scripts.
func (sr *ScriptRunner) periodicCheck() {
	ticker := time.NewTicker(60 * time.Second)
	defer ticker.Stop()

	for {
		select {
		case <-sr.ctx.Done():
			return
		case <-ticker.C:
			sr.mutex.RLock()
			running := sr.isRunning
			sr.mutex.RUnlock()

			// Check if script is not running and try to start it
			if !running {
				if _, err := os.Stat(sr.scriptPath); err == nil {
					// Script exists but not running, try to start
					go func() {
						if err := sr.Start(); err != nil {
							log.E.F(
								"failed to restart policy script %s: %v, will retry in next cycle",
								sr.scriptPath, err,
							)
						} else {
							log.I.F(
								"policy script restarted successfully: %s",
								sr.scriptPath,
							)
						}
					}()
				}
			}
		}
	}
}

// LoadFromFile loads policy configuration from a JSON file.
// Returns an error if the file doesn't exist, can't be read, or contains invalid JSON.
func (p *P) LoadFromFile(configPath string) error {
	if _, err := os.Stat(configPath); os.IsNotExist(err) {
		return fmt.Errorf(
			"policy configuration file does not exist: %s", configPath,
		)
	}

	configData, err := os.ReadFile(configPath)
	if err != nil {
		return fmt.Errorf("failed to read policy configuration file: %v", err)
	}

	if len(configData) == 0 {
		return fmt.Errorf("policy configuration file is empty")
	}

	if err := json.Unmarshal(configData, p); err != nil {
		return fmt.Errorf("failed to parse policy configuration JSON: %v", err)
	}

	// Populate binary caches for all rules (including global rule)
	p.Global.populateBinaryCache()
	for kind, rule := range p.Rules {
		rule.populateBinaryCache()
		p.Rules[kind] = rule // Update the map with the modified rule
	}

	return nil
}

// CheckPolicy checks if an event is allowed based on the policy configuration.
// The access parameter should be "write" for accepting events or "read" for filtering events.
// Returns true if the event is allowed, false if denied, and an error if validation fails.
// Policy evaluation order: global rules → kind filtering → specific rules → default policy.
func (p *P) CheckPolicy(
	access string, ev *event.E, loggedInPubkey []byte, ipAddress string,
) (allowed bool, err error) {
	// Handle nil event
	if ev == nil {
		return false, fmt.Errorf("event cannot be nil")
	}

	// CRITICAL SECURITY: Reject all unauthenticated access
	// No authentication = no access, regardless of policy rules
	if len(loggedInPubkey) == 0 {
		return false, nil // Silently reject unauthenticated users
	}

	// First check global rule filter (applies to all events)
	if !p.checkGlobalRulePolicy(access, ev, loggedInPubkey) {
		return false, nil
	}

	// Then check kinds white/blacklist
	if !p.checkKindsPolicy(ev.Kind) {
		return false, nil
	}

	// Get rule for this kind
	rule, hasRule := p.Rules[int(ev.Kind)]
	if !hasRule {
		// No specific rule for this kind, use default policy
		return p.getDefaultPolicyAction(), nil
	}

	// Check if script is present and enabled
	if rule.Script != "" && p.Manager != nil {
		if p.Manager.IsEnabled() {
			// Check if script file exists before trying to use it
			if _, err := os.Stat(rule.Script); err == nil {
				// Script exists, try to use it
				log.D.F(
					"using policy script for kind %d: %s", ev.Kind, rule.Script,
				)
				allowed, err := p.checkScriptPolicy(
					access, ev, rule.Script, loggedInPubkey, ipAddress,
				)
				if err == nil {
					// Script ran successfully, return its decision
					return allowed, nil
				}
				// Script failed, fall through to apply other criteria
				log.W.F(
					"policy script check failed for kind %d: %v, applying other criteria",
					ev.Kind, err,
				)
			} else {
				// Script configured but doesn't exist
				log.W.F(
					"policy script configured for kind %d but not found at %s: %v, applying other criteria",
					ev.Kind, rule.Script, err,
				)
			}
			// Script doesn't exist or failed, fall through to apply other criteria
		} else {
			// Policy manager is disabled, fall back to default policy
			log.D.F(
				"policy manager is disabled for kind %d, falling back to default policy (%s)",
				ev.Kind, p.DefaultPolicy,
			)
			return p.getDefaultPolicyAction(), nil
		}
	}

	// Apply rule-based filtering
	return p.checkRulePolicy(access, ev, rule, loggedInPubkey)
}

// checkKindsPolicy checks if the event kind is allowed.
// Logic:
// 1. If explicit whitelist exists, use it (backwards compatibility)
// 2. If explicit blacklist exists, use it (backwards compatibility)
// 3. Otherwise, kinds with defined rules are implicitly allowed, others denied
func (p *P) checkKindsPolicy(kind uint16) bool {
	// If whitelist is present, only allow whitelisted kinds
	if len(p.Kind.Whitelist) > 0 {
		for _, allowedKind := range p.Kind.Whitelist {
			if kind == uint16(allowedKind) {
				return true
			}
		}
		return false
	}

	// If blacklist is present, deny blacklisted kinds
	if len(p.Kind.Blacklist) > 0 {
		for _, deniedKind := range p.Kind.Blacklist {
			if kind == uint16(deniedKind) {
				return false
			}
		}
		// Not in blacklist - check if rule exists for implicit whitelist
		_, hasRule := p.Rules[int(kind)]
		return hasRule // Only allow if there's a rule defined
	}

	// No explicit whitelist or blacklist
	// If there are specific rules defined, use implicit whitelist
	// If there's only a global rule (no specific rules), allow all kinds
	// If there are NO rules at all, allow all kinds (fall back to default policy)
	if len(p.Rules) > 0 {
		// Implicit whitelist mode - only allow kinds with specific rules
		_, hasRule := p.Rules[int(kind)]
		return hasRule
	}
	// No specific rules (maybe global rule exists) - allow all kinds
	return true
}

// checkGlobalRulePolicy checks if the event passes the global rule filter
func (p *P) checkGlobalRulePolicy(
	access string, ev *event.E, loggedInPubkey []byte,
) bool {
	// Skip if no global rules are configured
	if !p.Global.hasAnyRules() {
		return true
	}

	// Apply global rule filtering
	allowed, err := p.checkRulePolicy(access, ev, p.Global, loggedInPubkey)
	if err != nil {
		log.E.F("global rule policy check failed: %v", err)
		return false
	}
	return allowed
}

// checkRulePolicy evaluates rule-based access control with corrected evaluation order.
// Evaluation order:
// 1. Universal constraints (size, tags, age) - apply to everyone
// 2. Explicit denials (deny lists) - highest priority blacklist
// 3. Privileged access - parties involved get special access (ONLY if no allow lists)
// 4. Explicit allows (allow lists) - exclusive and authoritative when present
// 5. Default policy - fallback when no rules apply
//
// IMPORTANT: When both privileged AND allow lists are specified, allow lists are
// authoritative - even parties involved must be in the allow list.
func (p *P) checkRulePolicy(
	access string, ev *event.E, rule Rule, loggedInPubkey []byte,
) (allowed bool, err error) {
	// ===================================================================
	// STEP 1: Universal Constraints (apply to everyone)
	// ===================================================================

	// Check size limits
	if rule.SizeLimit != nil {
		eventSize := int64(len(ev.Serialize()))
		if eventSize > *rule.SizeLimit {
			return false, nil
		}
	}

	if rule.ContentLimit != nil {
		contentSize := int64(len(ev.Content))
		if contentSize > *rule.ContentLimit {
			return false, nil
		}
	}

	// Check required tags
	if len(rule.MustHaveTags) > 0 {
		for _, requiredTag := range rule.MustHaveTags {
			if ev.Tags.GetFirst([]byte(requiredTag)) == nil {
				return false, nil
			}
		}
	}

	// Check expiry time
	if rule.MaxExpiry != nil {
		expiryTag := ev.Tags.GetFirst([]byte("expiration"))
		if expiryTag == nil {
			return false, nil // Must have expiry if MaxExpiry is set
		}
		// TODO: Parse and validate expiry time
	}

	// Check MaxAgeOfEvent (maximum age of event in seconds)
	if rule.MaxAgeOfEvent != nil && *rule.MaxAgeOfEvent > 0 {
		currentTime := time.Now().Unix()
		maxAllowedTime := currentTime - *rule.MaxAgeOfEvent
		if ev.CreatedAt < maxAllowedTime {
			return false, nil // Event is too old
		}
	}

	// Check MaxAgeEventInFuture (maximum time event can be in the future in seconds)
	if rule.MaxAgeEventInFuture != nil && *rule.MaxAgeEventInFuture > 0 {
		currentTime := time.Now().Unix()
		maxFutureTime := currentTime + *rule.MaxAgeEventInFuture
		if ev.CreatedAt > maxFutureTime {
			return false, nil // Event is too far in the future
		}
	}

	// ===================================================================
	// STEP 2: Explicit Denials (highest priority blacklist)
	// ===================================================================

	if access == "write" {
		// Check write deny list - deny specific users from submitting events
		if len(rule.writeDenyBin) > 0 {
			for _, deniedPubkey := range rule.writeDenyBin {
				if utils.FastEqual(loggedInPubkey, deniedPubkey) {
					return false, nil // Submitter explicitly denied
				}
			}
		} else if len(rule.WriteDeny) > 0 {
			// Fallback: binary cache not populated, use hex comparison
			loggedInPubkeyHex := hex.Enc(loggedInPubkey)
			for _, deniedPubkey := range rule.WriteDeny {
				if loggedInPubkeyHex == deniedPubkey {
					return false, nil // Submitter explicitly denied
				}
			}
		}
	} else if access == "read" {
		// Check read deny list
		if len(rule.readDenyBin) > 0 {
			for _, deniedPubkey := range rule.readDenyBin {
				if utils.FastEqual(loggedInPubkey, deniedPubkey) {
					return false, nil // Explicitly denied
				}
			}
		} else if len(rule.ReadDeny) > 0 {
			// Fallback: binary cache not populated, use hex comparison
			loggedInPubkeyHex := hex.Enc(loggedInPubkey)
			for _, deniedPubkey := range rule.ReadDeny {
				if loggedInPubkeyHex == deniedPubkey {
					return false, nil // Explicitly denied
				}
			}
		}
	}

	// ===================================================================
	// STEP 3: Check Read Access with OR Logic (Allow List OR Privileged)
	// ===================================================================

	// For read operations, check if user has access via allow list OR privileged
	if access == "read" {
		hasAllowList := len(rule.readAllowBin) > 0 || len(rule.ReadAllow) > 0
		userInAllowList := false
		userIsPrivileged := rule.Privileged && IsPartyInvolved(ev, loggedInPubkey)

		// Check if user is in read allow list
		if len(rule.readAllowBin) > 0 {
			for _, allowedPubkey := range rule.readAllowBin {
				if utils.FastEqual(loggedInPubkey, allowedPubkey) {
					userInAllowList = true
					break
				}
			}
		} else if len(rule.ReadAllow) > 0 {
			loggedInPubkeyHex := hex.Enc(loggedInPubkey)
			for _, allowedPubkey := range rule.ReadAllow {
				if loggedInPubkeyHex == allowedPubkey {
					userInAllowList = true
					break
				}
			}
		}

		// Handle different cases:
		// 1. If there's an allow list: use OR logic (in list OR privileged)
		// 2. If no allow list but privileged: only involved parties allowed
		// 3. If no allow list and not privileged: continue to other checks

		if hasAllowList {
			// OR logic when allow list exists
			if userInAllowList || userIsPrivileged {
				return true, nil
			}
			// Not in allow list AND not privileged -> deny
			return false, nil
		} else if rule.Privileged {
			// No allow list but privileged -> only involved parties
			if userIsPrivileged {
				return true, nil
			}
			// Not involved in privileged event -> deny
			return false, nil
		}
		// No allow list and not privileged -> continue to other checks
	}

	// ===================================================================
	// STEP 4: Explicit Allows (exclusive access - ONLY these users)
	// ===================================================================

	if access == "write" {
		// Check write allow list (exclusive - ONLY these users can write)
		// Special case: empty list (but not nil) means allow all
		if rule.WriteAllow != nil && len(rule.WriteAllow) == 0 && len(rule.writeAllowBin) == 0 {
			// Empty allow list explicitly set - allow all writers
			return true, nil
		}

		if len(rule.writeAllowBin) > 0 {
			// Check if logged-in user (submitter) is allowed to write
			allowed = false
			for _, allowedPubkey := range rule.writeAllowBin {
				if utils.FastEqual(loggedInPubkey, allowedPubkey) {
					allowed = true
					break
				}
			}
			if !allowed {
				return false, nil // Submitter not in exclusive allow list
			}
			// Submitter is in allow list
			return true, nil
		} else if len(rule.WriteAllow) > 0 {
			// Fallback: binary cache not populated, use hex comparison
			// Check if logged-in user (submitter) is allowed to write
			loggedInPubkeyHex := hex.Enc(loggedInPubkey)
			allowed = false
			for _, allowedPubkey := range rule.WriteAllow {
				if loggedInPubkeyHex == allowedPubkey {
					allowed = true
					break
				}
			}
			if !allowed {
				return false, nil // Submitter not in exclusive allow list
			}
			// Submitter is in allow list
			return true, nil
		}

		// If we have ONLY a deny list (no allow list), and user is not denied, allow
		if (len(rule.WriteDeny) > 0 || len(rule.writeDenyBin) > 0) &&
			len(rule.WriteAllow) == 0 && len(rule.writeAllowBin) == 0 {
			// Only deny list exists, user wasn't denied above, so allow
			return true, nil
		}
	} else if access == "read" {
		// Read access already handled in STEP 3 with OR logic (allow list OR privileged)
		// Only need to handle special cases here

		// Special case: empty list (but not nil) means allow all
		// BUT if privileged, still need to check if user is involved
		if rule.ReadAllow != nil && len(rule.ReadAllow) == 0 && len(rule.readAllowBin) == 0 {
			if rule.Privileged {
				// Empty allow list with privileged - only involved parties
				return IsPartyInvolved(ev, loggedInPubkey), nil
			}
			// Empty allow list without privileged - allow all readers
			return true, nil
		}

		// If we have ONLY a deny list (no allow list), and user is not denied, allow
		if (len(rule.ReadDeny) > 0 || len(rule.readDenyBin) > 0) &&
			len(rule.ReadAllow) == 0 && len(rule.readAllowBin) == 0 {
			// Only deny list exists, user wasn't denied above, so allow
			return true, nil
		}
	}

	// ===================================================================
	// STEP 5: No Additional Privileged Check Needed
	// ===================================================================

	// Privileged access for read operations is already handled in STEP 3 with OR logic
	// No additional check needed here

	// ===================================================================
	// STEP 6: Default Policy
	// ===================================================================

	// If no specific rules matched, use the configured default policy
	return p.getDefaultPolicyAction(), nil
}

// checkScriptPolicy runs the policy script to determine if event should be allowed
func (p *P) checkScriptPolicy(
	access string, ev *event.E, scriptPath string, loggedInPubkey []byte,
	ipAddress string,
) (allowed bool, err error) {
	if p.Manager == nil {
		return false, fmt.Errorf("policy manager is not initialized")
	}

	// If policy is disabled, fall back to default policy immediately
	if !p.Manager.IsEnabled() {
		log.W.F(
			"policy rule for kind %d is inactive (policy disabled), falling back to default policy (%s)",
			ev.Kind, p.DefaultPolicy,
		)
		return p.getDefaultPolicyAction(), nil
	}

	// Check if script file exists
	if _, err := os.Stat(scriptPath); os.IsNotExist(err) {
		// Script doesn't exist, return error so caller can fall back to other criteria
		return false, fmt.Errorf(
			"policy script does not exist at %s", scriptPath,
		)
	}

	// Get or create a runner for this specific script path
	runner := p.Manager.getOrCreateRunner(scriptPath)

	// Policy is enabled, check if this runner is running
	if !runner.IsRunning() {
		// Try to start this runner and wait for it
		log.D.F("starting policy script for kind %d: %s", ev.Kind, scriptPath)
		if err := runner.ensureRunning(); err != nil {
			// Startup failed, return error so caller can fall back to other criteria
			return false, fmt.Errorf(
				"failed to start policy script %s: %v", scriptPath, err,
			)
		}
		log.I.F("policy script started for kind %d: %s", ev.Kind, scriptPath)
	}

	// Create policy event with additional context
	policyEvent := &PolicyEvent{
		E:              ev,
		LoggedInPubkey: hex.Enc(loggedInPubkey),
		IPAddress:      ipAddress,
		AccessType:     access,
	}

	// Process event through policy script
	response, scriptErr := runner.ProcessEvent(policyEvent)
	if chk.E(scriptErr) {
		log.E.F(
			"policy rule for kind %d failed (script processing error: %v), falling back to default policy (%s)",
			ev.Kind, scriptErr, p.DefaultPolicy,
		)
		// Fall back to default policy on script failure
		return p.getDefaultPolicyAction(), nil
	}

	// Handle script response
	switch response.Action {
	case "accept":
		return true, nil
	case "reject":
		return false, nil
	case "shadowReject":
		return false, nil // Treat as reject for policy purposes
	default:
		log.W.F(
			"policy rule for kind %d returned unknown action '%s', falling back to default policy (%s)",
			ev.Kind, response.Action, p.DefaultPolicy,
		)
		// Fall back to default policy for unknown actions
		return p.getDefaultPolicyAction(), nil
	}
}

// PolicyManager methods

// periodicCheck periodically checks if the default policy script becomes available.
// This is for backward compatibility with the default script path.
func (pm *PolicyManager) periodicCheck() {
	// Get or create runner for the default script path
	// This will also start its own periodic check
	pm.getOrCreateRunner(pm.scriptPath)
}

// startPolicyIfExists starts the default policy script if the file exists.
// This is for backward compatibility with the default script path.
// Only logs if the default script actually exists - missing default scripts are normal
// when users configure rule-specific scripts.
func (pm *PolicyManager) startPolicyIfExists() {
	if _, err := os.Stat(pm.scriptPath); err == nil {
		// Default script exists, try to start it
		log.I.F("found default policy script at %s, starting...", pm.scriptPath)
		runner := pm.getOrCreateRunner(pm.scriptPath)
		if err := runner.Start(); err != nil {
			log.E.F(
				"failed to start default policy script: %v, will retry periodically",
				err,
			)
		}
	}
	// Silently ignore if default script doesn't exist - it's fine if rules use custom scripts
}

// IsEnabled returns whether the policy manager is enabled.
func (pm *PolicyManager) IsEnabled() bool {
	return pm.enabled
}

// IsRunning returns whether the default policy script is currently running.
// Deprecated: Use getOrCreateRunner(scriptPath).IsRunning() for specific scripts.
func (pm *PolicyManager) IsRunning() bool {
	pm.mutex.RLock()
	defer pm.mutex.RUnlock()

	// Check if default script runner exists and is running
	if runner, exists := pm.runners[pm.scriptPath]; exists {
		return runner.IsRunning()
	}
	return false
}

// GetScriptPath returns the default script path.
func (pm *PolicyManager) GetScriptPath() string {
	return pm.scriptPath
}

// Shutdown gracefully shuts down the policy manager and all running scripts.
func (pm *PolicyManager) Shutdown() {
	pm.cancel()

	pm.mutex.Lock()
	defer pm.mutex.Unlock()

	// Stop all running scripts
	for path, runner := range pm.runners {
		if runner.IsRunning() {
			log.I.F("stopping policy script: %s", path)
			runner.Stop()
		}
		// Cancel the runner's context
		runner.cancel()
	}

	// Clear runners map
	pm.runners = make(map[string]*ScriptRunner)
}