Enhance WebSocket connection management and error handling
Some checks failed
Go / build (push) Has been cancelled
Go / release (push) Has been cancelled

- Set initial read deadline for connections to prevent premature timeouts on idle connections.
- Improved pong and ping handlers to extend read deadlines and handle timeout errors more effectively.
- Refined error logging for connection issues, distinguishing between timeouts and connection errors to enhance debugging.
- Updated subscriber delivery logic to handle timeouts gracefully, allowing for potential recovery without immediate disconnection.
This commit is contained in:
2025-10-30 18:32:03 +00:00
parent b70f03bce0
commit ba2d35012c
2 changed files with 108 additions and 23 deletions

View File

@@ -283,17 +283,36 @@ func (p *P) Deliver(ev *event.E) {
hex.Enc(ev.ID), d.sub.remote, d.id, deliveryDuration, err)
// Check for timeout specifically
if strings.Contains(err.Error(), "timeout") || strings.Contains(err.Error(), "deadline") {
isTimeout := strings.Contains(err.Error(), "timeout") || strings.Contains(err.Error(), "deadline exceeded")
if isTimeout {
log.E.F("subscription delivery TIMEOUT: event=%s to=%s after %v (limit=%v)",
hex.Enc(ev.ID), d.sub.remote, deliveryDuration, DefaultWriteTimeout)
}
// Log connection cleanup
log.D.F("removing failed subscriber connection: %s", d.sub.remote)
// Only close connection on permanent errors, not transient timeouts
// WebSocket write errors typically indicate connection issues, but we should
// distinguish between timeouts (client might be slow) and connection errors
isConnectionError := strings.Contains(err.Error(), "use of closed network connection") ||
strings.Contains(err.Error(), "broken pipe") ||
strings.Contains(err.Error(), "connection reset") ||
websocket.IsCloseError(err, websocket.CloseAbnormalClosure,
websocket.CloseGoingAway,
websocket.CloseNoStatusReceived)
// On error, remove the subscriber connection safely
p.removeSubscriber(d.w)
_ = d.w.Close()
if isConnectionError {
log.D.F("removing failed subscriber connection due to connection error: %s", d.sub.remote)
p.removeSubscriber(d.w)
_ = d.w.Close()
} else if isTimeout {
// For timeouts, log but don't immediately close - give it another chance
// The read deadline will catch dead connections eventually
log.W.F("subscription delivery timeout for %s (client may be slow), skipping event but keeping connection", d.sub.remote)
} else {
// Unknown error - be conservative and close
log.D.F("removing failed subscriber connection due to unknown error: %s", d.sub.remote)
p.removeSubscriber(d.w)
_ = d.w.Close()
}
continue
}