Enhance WebSocket write handling and connection management
Some checks failed
Go / build (push) Has been cancelled
Go / release (push) Has been cancelled

- Introduced a buffered write channel and a dedicated write worker goroutine to serialize write operations, preventing concurrent write panics.
- Updated the Write and WriteControl methods to send messages through the write channel, improving error handling and connection stability.
- Refactored ping and pong handlers to utilize the new write channel for sending control messages.
- Enhanced publisher logic to manage write channels for WebSocket connections, ensuring efficient message delivery and error handling.
- Bumped version to v0.23.0 to reflect these changes.
This commit is contained in:
2025-11-02 17:02:28 +00:00
parent 0123c2d6f5
commit 354a2f1cda
5 changed files with 187 additions and 130 deletions

View File

@@ -3,7 +3,6 @@ package app
import (
"context"
"fmt"
"strings"
"sync"
"time"
@@ -18,6 +17,7 @@ import (
"next.orly.dev/pkg/encoders/kind"
"next.orly.dev/pkg/interfaces/publisher"
"next.orly.dev/pkg/interfaces/typer"
"next.orly.dev/pkg/protocol/publish"
"next.orly.dev/pkg/utils"
)
@@ -33,6 +33,9 @@ type Subscription struct {
// connections.
type Map map[*websocket.Conn]map[string]Subscription
// WriteChanMap maps websocket connections to their write channels
type WriteChanMap map[*websocket.Conn]chan<- publish.WriteRequest
type W struct {
*websocket.Conn
@@ -69,19 +72,37 @@ type P struct {
Mx sync.RWMutex
// Map is the map of subscribers and subscriptions from the websocket api.
Map
// WriteChans maps websocket connections to their write channels
WriteChans WriteChanMap
}
var _ publisher.I = &P{}
func NewPublisher(c context.Context) (publisher *P) {
return &P{
c: c,
Map: make(Map),
c: c,
Map: make(Map),
WriteChans: make(WriteChanMap, 100),
}
}
func (p *P) Type() (typeName string) { return Type }
// SetWriteChan stores the write channel for a websocket connection
func (p *P) SetWriteChan(conn *websocket.Conn, writeChan chan<- publish.WriteRequest) {
p.Mx.Lock()
defer p.Mx.Unlock()
p.WriteChans[conn] = writeChan
}
// GetWriteChan returns the write channel for a websocket connection
func (p *P) GetWriteChan(conn *websocket.Conn) (chan<- publish.WriteRequest, bool) {
p.Mx.RLock()
defer p.Mx.RUnlock()
ch, ok := p.WriteChans[conn]
return ch, ok
}
// Receive handles incoming messages to manage websocket listener subscriptions
// and associated filters.
//
@@ -269,61 +290,40 @@ func (p *P) Deliver(ev *event.E) {
log.D.F("attempting delivery of event %s (kind=%d, len=%d) to subscription %s @ %s",
hex.Enc(ev.ID), ev.Kind, len(msgData), d.id, d.sub.remote)
// Use a separate context with timeout for writes to prevent race conditions
// where the publisher context gets cancelled while writing events
deadline := time.Now().Add(DefaultWriteTimeout)
d.w.SetWriteDeadline(deadline)
// Get write channel for this connection
p.Mx.RLock()
writeChan, hasChan := p.GetWriteChan(d.w)
stillSubscribed := p.Map[d.w] != nil
p.Mx.RUnlock()
deliveryStart := time.Now()
if err = d.w.WriteMessage(websocket.TextMessage, msgData); err != nil {
deliveryDuration := time.Since(deliveryStart)
// Log detailed failure information
log.E.F("subscription delivery FAILED: event=%s to=%s sub=%s duration=%v error=%v",
hex.Enc(ev.ID), d.sub.remote, d.id, deliveryDuration, err)
// Check for timeout specifically
isTimeout := strings.Contains(err.Error(), "timeout") || strings.Contains(err.Error(), "deadline exceeded")
if isTimeout {
log.E.F("subscription delivery TIMEOUT: event=%s to=%s after %v (limit=%v)",
hex.Enc(ev.ID), d.sub.remote, deliveryDuration, DefaultWriteTimeout)
}
// Only close connection on permanent errors, not transient timeouts
// WebSocket write errors typically indicate connection issues, but we should
// distinguish between timeouts (client might be slow) and connection errors
isConnectionError := strings.Contains(err.Error(), "use of closed network connection") ||
strings.Contains(err.Error(), "broken pipe") ||
strings.Contains(err.Error(), "connection reset") ||
websocket.IsCloseError(err, websocket.CloseAbnormalClosure,
websocket.CloseGoingAway,
websocket.CloseNoStatusReceived)
if isConnectionError {
log.D.F("removing failed subscriber connection due to connection error: %s", d.sub.remote)
p.removeSubscriber(d.w)
_ = d.w.Close()
} else if isTimeout {
// For timeouts, log but don't immediately close - give it another chance
// The read deadline will catch dead connections eventually
log.W.F("subscription delivery timeout for %s (client may be slow), skipping event but keeping connection", d.sub.remote)
} else {
// Unknown error - be conservative and close
log.D.F("removing failed subscriber connection due to unknown error: %s", d.sub.remote)
p.removeSubscriber(d.w)
_ = d.w.Close()
}
if !stillSubscribed {
log.D.F("skipping delivery to %s - connection no longer subscribed", d.sub.remote)
continue
}
deliveryDuration := time.Since(deliveryStart)
log.D.F("subscription delivery SUCCESS: event=%s to=%s sub=%s duration=%v len=%d",
hex.Enc(ev.ID), d.sub.remote, d.id, deliveryDuration, len(msgData))
if !hasChan {
log.D.F("skipping delivery to %s - no write channel available", d.sub.remote)
continue
}
// Log slow deliveries for performance monitoring
if deliveryDuration > time.Millisecond*50 {
log.D.F("SLOW subscription delivery: event=%s to=%s duration=%v (>50ms)",
hex.Enc(ev.ID), d.sub.remote, deliveryDuration)
// Send to write channel - non-blocking with timeout
select {
case <-p.c.Done():
continue
case writeChan <- publish.WriteRequest{Data: msgData, MsgType: websocket.TextMessage, IsControl: false}:
log.D.F("subscription delivery QUEUED: event=%s to=%s sub=%s len=%d",
hex.Enc(ev.ID), d.sub.remote, d.id, len(msgData))
case <-time.After(DefaultWriteTimeout):
log.E.F("subscription delivery TIMEOUT: event=%s to=%s sub=%s (write channel full)",
hex.Enc(ev.ID), d.sub.remote, d.id)
// Check if connection is still valid
p.Mx.RLock()
stillSubscribed = p.Map[d.w] != nil
p.Mx.RUnlock()
if !stillSubscribed {
log.D.F("removing failed subscriber connection due to channel timeout: %s", d.sub.remote)
p.removeSubscriber(d.w)
}
}
}
}
@@ -340,6 +340,7 @@ func (p *P) removeSubscriberId(ws *websocket.Conn, id string) {
// Check the actual map after deletion, not the original reference
if len(p.Map[ws]) == 0 {
delete(p.Map, ws)
delete(p.WriteChans, ws)
}
}
}
@@ -350,6 +351,7 @@ func (p *P) removeSubscriber(ws *websocket.Conn) {
defer p.Mx.Unlock()
clear(p.Map[ws])
delete(p.Map, ws)
delete(p.WriteChans, ws)
}
// canSeePrivateEvent checks if the authenticated user can see an event with a private tag