Enhance Sprocket functionality and error handling
This commit introduces significant improvements to the Sprocket system, including: - Detailed documentation in `readme.adoc` for manual updates and failure handling. - Implementation of automatic disablement of Sprocket on failure, with periodic checks for recovery. - Enhanced logging for event rejection when Sprocket is disabled or not running. These changes ensure better user guidance and system resilience during Sprocket failures.
This commit is contained in:
@@ -35,11 +35,22 @@ func (l *Listener) HandleEvent(msg []byte) (err error) {
|
||||
|
||||
// Check if sprocket is enabled and process event through it
|
||||
if l.sprocketManager != nil && l.sprocketManager.IsEnabled() {
|
||||
if !l.sprocketManager.IsRunning() {
|
||||
// Sprocket is enabled but not running - drop all messages
|
||||
log.W.F("sprocket is enabled but not running, dropping event %0x", env.E.ID)
|
||||
if l.sprocketManager.IsDisabled() {
|
||||
// Sprocket is disabled due to failure - reject all events
|
||||
log.W.F("sprocket is disabled, rejecting event %0x", env.E.ID)
|
||||
if err = Ok.Error(
|
||||
l, env, "sprocket policy not available",
|
||||
l, env, "sprocket disabled - events rejected until sprocket is restored",
|
||||
); chk.E(err) {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if !l.sprocketManager.IsRunning() {
|
||||
// Sprocket is enabled but not running - reject all events
|
||||
log.W.F("sprocket is enabled but not running, rejecting event %0x", env.E.ID)
|
||||
if err = Ok.Error(
|
||||
l, env, "sprocket not running - events rejected until sprocket starts",
|
||||
); chk.E(err) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -19,11 +19,9 @@ func Run(
|
||||
) (quit chan struct{}) {
|
||||
// shutdown handler
|
||||
go func() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.I.F("shutting down")
|
||||
close(quit)
|
||||
}
|
||||
<-ctx.Done()
|
||||
log.I.F("shutting down")
|
||||
close(quit)
|
||||
}()
|
||||
// get the admins
|
||||
var err error
|
||||
|
||||
@@ -37,6 +37,7 @@ type SprocketManager struct {
|
||||
mutex sync.RWMutex
|
||||
isRunning bool
|
||||
enabled bool
|
||||
disabled bool // true when sprocket is disabled due to failure
|
||||
stdin io.WriteCloser
|
||||
stdout io.ReadCloser
|
||||
stderr io.ReadCloser
|
||||
@@ -56,21 +57,105 @@ func NewSprocketManager(ctx context.Context, appName string, enabled bool) *Spro
|
||||
configDir: configDir,
|
||||
scriptPath: scriptPath,
|
||||
enabled: enabled,
|
||||
disabled: false,
|
||||
responseChan: make(chan SprocketResponse, 100), // Buffered channel for responses
|
||||
}
|
||||
|
||||
// Start the sprocket script if it exists and is enabled
|
||||
if enabled {
|
||||
go sm.startSprocketIfExists()
|
||||
// Start periodic check for sprocket script availability
|
||||
go sm.periodicCheck()
|
||||
}
|
||||
|
||||
return sm
|
||||
}
|
||||
|
||||
// disableSprocket disables sprocket due to failure
|
||||
func (sm *SprocketManager) disableSprocket() {
|
||||
sm.mutex.Lock()
|
||||
defer sm.mutex.Unlock()
|
||||
|
||||
if !sm.disabled {
|
||||
sm.disabled = true
|
||||
log.W.F("sprocket disabled due to failure - all events will be rejected (script location: %s)", sm.scriptPath)
|
||||
}
|
||||
}
|
||||
|
||||
// enableSprocket re-enables sprocket and attempts to start it
|
||||
func (sm *SprocketManager) enableSprocket() {
|
||||
sm.mutex.Lock()
|
||||
defer sm.mutex.Unlock()
|
||||
|
||||
if sm.disabled {
|
||||
sm.disabled = false
|
||||
log.I.F("sprocket re-enabled, attempting to start")
|
||||
|
||||
// Attempt to start sprocket in background
|
||||
go func() {
|
||||
if _, err := os.Stat(sm.scriptPath); err == nil {
|
||||
if err := sm.StartSprocket(); err != nil {
|
||||
log.E.F("failed to restart sprocket: %v", err)
|
||||
sm.disableSprocket()
|
||||
} else {
|
||||
log.I.F("sprocket restarted successfully")
|
||||
}
|
||||
} else {
|
||||
log.W.F("sprocket script still not found, keeping disabled")
|
||||
sm.disableSprocket()
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
// periodicCheck periodically checks if sprocket script becomes available
|
||||
func (sm *SprocketManager) periodicCheck() {
|
||||
ticker := time.NewTicker(30 * time.Second) // Check every 30 seconds
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-sm.ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
sm.mutex.RLock()
|
||||
disabled := sm.disabled
|
||||
running := sm.isRunning
|
||||
sm.mutex.RUnlock()
|
||||
|
||||
// Only check if sprocket is disabled or not running
|
||||
if disabled || !running {
|
||||
if _, err := os.Stat(sm.scriptPath); err == nil {
|
||||
// Script is available, try to enable/restart
|
||||
if disabled {
|
||||
sm.enableSprocket()
|
||||
} else if !running {
|
||||
// Script exists but sprocket isn't running, try to start
|
||||
go func() {
|
||||
if err := sm.StartSprocket(); err != nil {
|
||||
log.E.F("failed to restart sprocket: %v", err)
|
||||
sm.disableSprocket()
|
||||
} else {
|
||||
log.I.F("sprocket restarted successfully")
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// startSprocketIfExists starts the sprocket script if the file exists
|
||||
func (sm *SprocketManager) startSprocketIfExists() {
|
||||
if _, err := os.Stat(sm.scriptPath); err == nil {
|
||||
sm.StartSprocket()
|
||||
if err := sm.StartSprocket(); err != nil {
|
||||
log.E.F("failed to start sprocket: %v", err)
|
||||
sm.disableSprocket()
|
||||
}
|
||||
} else {
|
||||
log.W.F("sprocket script not found at %s, disabling sprocket", sm.scriptPath)
|
||||
sm.disableSprocket()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -473,6 +558,13 @@ func (sm *SprocketManager) IsRunning() bool {
|
||||
return sm.isRunning
|
||||
}
|
||||
|
||||
// IsDisabled returns whether sprocket is disabled due to failure
|
||||
func (sm *SprocketManager) IsDisabled() bool {
|
||||
sm.mutex.RLock()
|
||||
defer sm.mutex.RUnlock()
|
||||
return sm.disabled
|
||||
}
|
||||
|
||||
// monitorProcess monitors the sprocket process and cleans up when it exits
|
||||
func (sm *SprocketManager) monitorProcess() {
|
||||
if sm.currentCmd == nil {
|
||||
@@ -504,6 +596,9 @@ func (sm *SprocketManager) monitorProcess() {
|
||||
|
||||
if err != nil {
|
||||
log.E.F("sprocket process exited with error: %v", err)
|
||||
// Auto-disable sprocket on failure
|
||||
sm.disabled = true
|
||||
log.W.F("sprocket disabled due to process failure - all events will be rejected (script location: %s)", sm.scriptPath)
|
||||
} else {
|
||||
log.I.F("sprocket process exited normally")
|
||||
}
|
||||
|
||||
61
readme.adoc
61
readme.adoc
@@ -227,6 +227,67 @@ export ORLY_APP_NAME="ORLY"
|
||||
The sprocket script should be placed at:
|
||||
`~/.config/{ORLY_APP_NAME}/sprocket.sh`
|
||||
|
||||
For example, with default `ORLY_APP_NAME="ORLY"`:
|
||||
`~/.config/ORLY/sprocket.sh`
|
||||
|
||||
Backup files are automatically created when updating sprocket scripts via the web UI, with timestamps like:
|
||||
`~/.config/ORLY/sprocket.sh.20240101120000`
|
||||
|
||||
=== manual sprocket updates
|
||||
|
||||
For manual sprocket script updates, you can use the stop/write/restart method:
|
||||
|
||||
1. **Stop the relay**:
|
||||
```bash
|
||||
# Send SIGINT to gracefully stop
|
||||
kill -INT <relay_pid>
|
||||
```
|
||||
|
||||
2. **Write new sprocket script**:
|
||||
```bash
|
||||
# Create/update the sprocket script
|
||||
cat > ~/.config/ORLY/sprocket.sh << 'EOF'
|
||||
#!/bin/bash
|
||||
while read -r line; do
|
||||
if [[ -n "$line" ]]; then
|
||||
event_id=$(echo "$line" | jq -r '.id')
|
||||
echo "{\"id\":\"$event_id\",\"action\":\"accept\",\"msg\":\"\"}"
|
||||
fi
|
||||
done
|
||||
EOF
|
||||
|
||||
# Make it executable
|
||||
chmod +x ~/.config/ORLY/sprocket.sh
|
||||
```
|
||||
|
||||
3. **Restart the relay**:
|
||||
```bash
|
||||
./orly
|
||||
```
|
||||
|
||||
The relay will automatically detect the new sprocket script and start it. If the script fails, sprocket will be disabled and all events rejected until the script is fixed.
|
||||
|
||||
=== failure handling
|
||||
|
||||
When sprocket is enabled but fails to start or crashes:
|
||||
|
||||
1. **Automatic Disable**: Sprocket is automatically disabled
|
||||
2. **Event Rejection**: All incoming events are rejected with error message
|
||||
3. **Periodic Recovery**: Every 30 seconds, the system checks if the sprocket script becomes available
|
||||
4. **Auto-Restart**: If the script is found, sprocket is automatically re-enabled and restarted
|
||||
|
||||
This ensures that:
|
||||
- Relay continues running even when sprocket fails
|
||||
- No events are processed without proper sprocket filtering
|
||||
- Sprocket automatically recovers when the script is fixed
|
||||
- Clear error messages inform users about the sprocket status
|
||||
- Error messages include the exact file location for easy fixes
|
||||
|
||||
When sprocket fails, the error message will show:
|
||||
`sprocket disabled due to failure - all events will be rejected (script location: ~/.config/ORLY/sprocket.sh)`
|
||||
|
||||
This makes it easy to locate and fix the sprocket script file.
|
||||
|
||||
=== example script
|
||||
|
||||
Here's a Python example that implements various filtering criteria:
|
||||
|
||||
Reference in New Issue
Block a user