From b70f03bce07530e8279ff0e9e18af4dbc29de9c0 Mon Sep 17 00:00:00 2001 From: mleku Date: Thu, 30 Oct 2025 18:22:56 +0000 Subject: [PATCH] Refactor policy script handling and improve fallback logic - Renamed test functions for clarity, changing "NotRunning" to "Disabled" to better reflect the policy state. - Updated policy checks to ensure that if the policy is disabled, it falls back to the default policy immediately. - Enhanced error handling in the policy manager to ensure proper startup and running state management. - Introduced a new method to ensure the policy is running, with timeout handling for startup completion. - Bumped version to v0.20.3 to reflect these changes. --- pkg/policy/policy.go | 144 ++++++++++++++++++++++++++++++++++++-- pkg/policy/policy_test.go | 29 ++++---- pkg/version/version | 2 +- 3 files changed, 154 insertions(+), 21 deletions(-) diff --git a/pkg/policy/policy.go b/pkg/policy/policy.go index e6e081f..9ed227f 100644 --- a/pkg/policy/policy.go +++ b/pkg/policy/policy.go @@ -10,6 +10,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "sync" "time" @@ -131,11 +132,13 @@ type PolicyManager struct { currentCancel context.CancelFunc mutex sync.RWMutex isRunning bool + isStarting bool enabled bool stdin io.WriteCloser stdout io.ReadCloser stderr io.ReadCloser responseChan chan PolicyResponse + startupChan chan error } // P represents a complete policy configuration for a Nostr relay. @@ -203,6 +206,7 @@ func NewWithManager(ctx context.Context, appName string, enabled bool) *P { scriptPath: scriptPath, enabled: enabled, responseChan: make(chan PolicyResponse, 100), // Buffered channel for responses + startupChan: make(chan error, 1), // Channel for startup completion } // Load policy configuration from JSON file @@ -279,8 +283,19 @@ func (p *P) CheckPolicy(access string, ev *event.E, loggedInPubkey []byte, ipAdd } // Check if script is present and enabled - if rule.Script != "" && p.Manager != nil && p.Manager.IsEnabled() { - return p.checkScriptPolicy(access, ev, rule.Script, loggedInPubkey, ipAddress) + if rule.Script != "" && p.Manager != nil { + if p.Manager.IsEnabled() { + return p.checkScriptPolicy(access, ev, rule.Script, loggedInPubkey, ipAddress) + } + // Script is configured but policy is disabled - use default policy if rule has no other restrictions + hasOtherRestrictions := len(rule.WriteAllow) > 0 || len(rule.WriteDeny) > 0 || len(rule.ReadAllow) > 0 || len(rule.ReadDeny) > 0 || + rule.SizeLimit != nil || rule.ContentLimit != nil || len(rule.MustHaveTags) > 0 || + rule.MaxExpiry != nil || rule.Privileged || rule.RateLimit != nil || + rule.MaxAgeOfEvent != nil || rule.MaxAgeEventInFuture != nil + if !hasOtherRestrictions { + // No other restrictions, use default policy + return p.getDefaultPolicyAction(), nil + } } // Apply rule-based filtering @@ -452,12 +467,41 @@ func (p *P) checkRulePolicy(access string, ev *event.E, rule Rule, loggedInPubke // checkScriptPolicy runs the policy script to determine if event should be allowed func (p *P) checkScriptPolicy(access string, ev *event.E, scriptPath string, loggedInPubkey []byte, ipAddress string) (allowed bool, err error) { - if p.Manager == nil || !p.Manager.IsRunning() { - // If script is not running, fall back to default policy - log.W.F("policy rule for kind %d is inactive (script not running), falling back to default policy (%s)", ev.Kind, p.DefaultPolicy) + if p.Manager == nil { + return false, fmt.Errorf("policy manager is not initialized") + } + + // If policy is disabled, fall back to default policy immediately + if !p.Manager.IsEnabled() { + log.W.F("policy rule for kind %d is inactive (policy disabled), falling back to default policy (%s)", ev.Kind, p.DefaultPolicy) return p.getDefaultPolicyAction(), nil } + // Policy is enabled, check if it's running + if !p.Manager.IsRunning() { + // Check if script file exists + if _, err := os.Stat(p.Manager.GetScriptPath()); os.IsNotExist(err) { + // Script doesn't exist, this is a fatal error + buf := make([]byte, 1024*1024) + n := runtime.Stack(buf, true) + log.E.F("policy script does not exist at %s", p.Manager.GetScriptPath()) + fmt.Fprintf(os.Stderr, "FATAL: Policy script required but not found at %s\n", p.Manager.GetScriptPath()) + fmt.Fprintf(os.Stderr, "Stack trace:\n%s\n", buf[:n]) + os.Exit(1) + } + + // Try to start the policy and wait for it + if err := p.Manager.ensureRunning(); err != nil { + // Startup failed, this is a fatal error + buf := make([]byte, 1024*1024) + n := runtime.Stack(buf, true) + log.E.F("failed to start policy script: %v", err) + fmt.Fprintf(os.Stderr, "FATAL: Failed to start policy script: %v\n", err) + fmt.Fprintf(os.Stderr, "Stack trace:\n%s\n", buf[:n]) + os.Exit(1) + } + } + // Create policy event with additional context policyEvent := &PolicyEvent{ E: ev, @@ -535,6 +579,91 @@ func (pm *PolicyManager) startPolicyIfExists() { } } +// ensureRunning ensures the policy is running, starting it if necessary. +// It waits for startup to complete with a timeout and returns an error if startup fails. +func (pm *PolicyManager) ensureRunning() error { + pm.mutex.Lock() + // Check if already running + if pm.isRunning { + pm.mutex.Unlock() + return nil + } + + // Check if already starting + if pm.isStarting { + pm.mutex.Unlock() + // Wait for startup to complete + select { + case err := <-pm.startupChan: + if err != nil { + return fmt.Errorf("policy startup failed: %v", err) + } + // Double-check it's actually running after receiving signal + pm.mutex.RLock() + running := pm.isRunning + pm.mutex.RUnlock() + if !running { + return fmt.Errorf("policy startup completed but process is not running") + } + return nil + case <-time.After(10 * time.Second): + return fmt.Errorf("policy startup timeout") + case <-pm.ctx.Done(): + return fmt.Errorf("policy context cancelled") + } + } + + // Mark as starting + pm.isStarting = true + pm.mutex.Unlock() + + // Start the policy in a goroutine + go func() { + err := pm.StartPolicy() + pm.mutex.Lock() + pm.isStarting = false + pm.mutex.Unlock() + // Signal startup completion (non-blocking) + // Drain any stale value first, then send + select { + case <-pm.startupChan: + default: + } + select { + case pm.startupChan <- err: + default: + // Channel should be empty now, but if it's full, try again + pm.startupChan <- err + } + }() + + // Wait for startup to complete + select { + case err := <-pm.startupChan: + if err != nil { + return fmt.Errorf("policy startup failed: %v", err) + } + // Double-check it's actually running after receiving signal + pm.mutex.RLock() + running := pm.isRunning + pm.mutex.RUnlock() + if !running { + return fmt.Errorf("policy startup completed but process is not running") + } + return nil + case <-time.After(10 * time.Second): + pm.mutex.Lock() + pm.isStarting = false + pm.mutex.Unlock() + return fmt.Errorf("policy startup timeout") + case <-pm.ctx.Done(): + pm.mutex.Lock() + pm.isStarting = false + pm.mutex.Unlock() + return fmt.Errorf("policy context cancelled") + } +} + // StartPolicy starts the policy script process. // Returns an error if the script doesn't exist, can't be executed, or is already running. func (pm *PolicyManager) StartPolicy() error { @@ -800,6 +929,11 @@ func (pm *PolicyManager) IsRunning() bool { return pm.isRunning } +// GetScriptPath returns the path to the policy script. +func (pm *PolicyManager) GetScriptPath() string { + return pm.scriptPath +} + // Shutdown gracefully shuts down the policy manager. // It cancels the context and stops any running policy script. func (pm *PolicyManager) Shutdown() { diff --git a/pkg/policy/policy_test.go b/pkg/policy/policy_test.go index b13febb..c56d874 100644 --- a/pkg/policy/policy_test.go +++ b/pkg/policy/policy_test.go @@ -1136,11 +1136,11 @@ func TestMaxAgeChecks(t *testing.T) { } } -func TestScriptPolicyNotRunningFallsBackToDefault(t *testing.T) { +func TestScriptPolicyDisabledFallsBackToDefault(t *testing.T) { // Generate real keypair for testing eventSigner, eventPubkey := generateTestKeypair(t) - // Create a policy with a script rule but no running manager, default policy is "allow" + // Create a policy with a script rule but policy is disabled, default policy is "allow" policy := &P{ DefaultPolicy: "allow", Rules: map[int]Rule{ @@ -1150,21 +1150,21 @@ func TestScriptPolicyNotRunningFallsBackToDefault(t *testing.T) { }, }, Manager: &PolicyManager{ - enabled: true, - isRunning: false, // Script is not running + enabled: false, // Policy is disabled + isRunning: false, }, } // Create real test event with proper signing testEvent := createTestEvent(t, eventSigner, "test content", 1) - // Should allow the event when script is configured but not running (falls back to default "allow") + // Should allow the event when policy is disabled (falls back to default "allow") allowed, err := policy.CheckPolicy("write", testEvent, eventPubkey, "127.0.0.1") if err != nil { t.Errorf("Unexpected error: %v", err) } if !allowed { - t.Error("Expected event to be allowed when script is not running (should fall back to default policy 'allow')") + t.Error("Expected event to be allowed when policy is disabled (should fall back to default policy 'allow')") } // Test with default policy "deny" @@ -1174,7 +1174,7 @@ func TestScriptPolicyNotRunningFallsBackToDefault(t *testing.T) { t.Errorf("Unexpected error: %v", err2) } if allowed2 { - t.Error("Expected event to be denied when script is not running and default policy is 'deny'") + t.Error("Expected event to be denied when policy is disabled and default policy is 'deny'") } } @@ -1340,12 +1340,11 @@ func TestNewPolicyWithDefaultPolicyJSON(t *testing.T) { } } -func TestScriptProcessingFailureFallsBackToDefault(t *testing.T) { +func TestScriptProcessingDisabledFallsBackToDefault(t *testing.T) { // Generate real keypair for testing eventSigner, eventPubkey := generateTestKeypair(t) - // Test that script processing failures fall back to default policy - // We'll test this by using a manager that's not running (simulating failure) + // Test that when policy is disabled, it falls back to default policy policy := &P{ DefaultPolicy: "allow", Rules: map[int]Rule{ @@ -1355,21 +1354,21 @@ func TestScriptProcessingFailureFallsBackToDefault(t *testing.T) { }, }, Manager: &PolicyManager{ - enabled: true, - isRunning: false, // Script is not running (simulating failure) + enabled: false, // Policy is disabled + isRunning: false, }, } // Create real test event with proper signing testEvent := createTestEvent(t, eventSigner, "test content", 1) - // Should allow the event when script is not running (falls back to default "allow") + // Should allow the event when policy is disabled (falls back to default "allow") allowed, err := policy.checkScriptPolicy("write", testEvent, "policy.sh", eventPubkey, "127.0.0.1") if err != nil { t.Errorf("Unexpected error: %v", err) } if !allowed { - t.Error("Expected event to be allowed when script is not running (should fall back to default policy 'allow')") + t.Error("Expected event to be allowed when policy is disabled (should fall back to default policy 'allow')") } // Test with default policy "deny" @@ -1379,7 +1378,7 @@ func TestScriptProcessingFailureFallsBackToDefault(t *testing.T) { t.Errorf("Unexpected error: %v", err2) } if allowed2 { - t.Error("Expected event to be denied when script is not running and default policy is 'deny'") + t.Error("Expected event to be denied when policy is disabled and default policy is 'deny'") } } diff --git a/pkg/version/version b/pkg/version/version index 014ec61..4646fd9 100644 --- a/pkg/version/version +++ b/pkg/version/version @@ -1 +1 @@ -v0.20.2 \ No newline at end of file +v0.20.3 \ No newline at end of file