Fix directory spider tag loss: size limits and validation
Some checks failed
Go / build-and-release (push) Has been cancelled

- Increase WebSocket message size limit from 500KB to 10MB to prevent
  truncation of large kind 3 follow list events (8000+ follows)
- Add validation in SaveEvent to reject kind 3 events without p tags
  before storage, preventing malformed events from buggy relays
- Implement CleanupKind3WithoutPTags() to remove existing malformed
  kind 3 events at startup
- Add enhanced logging showing tag count and event ID when rejecting
  invalid kind 3 events for better observability
- Create round-trip test proving binary tag encoding preserves p tags
  correctly through JSON→binary→JSON cycle
- Root cause: 500KB limit was truncating large follow lists during
  WebSocket receive, causing tags to be lost or incomplete
- Three-layer defense: prevent at gate (size), validate (save time),
  and cleanup (startup)

Files modified:
- app/handle-websocket.go: Increase DefaultMaxMessageSize to 10MB
- pkg/database/save-event.go: Add kind 3 validation with logging
- pkg/database/cleanup-kind3.go: New cleanup function
- pkg/database/cleanup-kind3_test.go: Round-trip test
- app/main.go: Invoke cleanup at startup
This commit is contained in:
2025-11-27 13:49:33 +00:00
parent 7fedcd24d3
commit 48b0b6984c
7 changed files with 236 additions and 2 deletions

View File

@@ -479,6 +479,36 @@ ORLY has received several significant performance improvements in recent updates
- Serial-based event fetching for efficiency - Serial-based event fetching for efficiency
- Filter analysis in `get-indexes-from-filter.go` selects optimal strategy - Filter analysis in `get-indexes-from-filter.go` selects optimal strategy
## Git Commit Message Format
When asked to "make a commit comment", generate a commit message following this standard format:
**Structure:**
- **First line**: 72 characters maximum, imperative mood summary
- **Second line**: Empty line
- **Body**: Bullet points describing each change in detail
- **Optional**: "Files modified:" section listing affected files
**Example:**
```
Fix directory spider tag loss: size limits and validation
- Increase WebSocket message size limit from 500KB to 10MB to prevent
truncation of large kind 3 follow list events (8000+ follows)
- Add validation in SaveEvent to reject kind 3 events without p tags
before storage, preventing malformed events from buggy relays
- Implement CleanupKind3WithoutPTags() to remove existing malformed
kind 3 events at startup
- Add enhanced logging showing tag count and event ID when rejecting
invalid kind 3 events for better observability
Files modified:
- app/handle-websocket.go: Increase DefaultMaxMessageSize to 10MB
- pkg/database/save-event.go: Add kind 3 validation with logging
- pkg/database/cleanup-kind3.go: New cleanup function
- app/main.go: Invoke cleanup at startup
```
## Release Process ## Release Process
1. Update version in `pkg/version/version` file (e.g., v1.2.3) 1. Update version in `pkg/version/version` file (e.g., v1.2.3)

View File

@@ -21,7 +21,10 @@ const (
DefaultPongWait = 60 * time.Second DefaultPongWait = 60 * time.Second
DefaultPingWait = DefaultPongWait / 2 DefaultPingWait = DefaultPongWait / 2
DefaultWriteTimeout = 3 * time.Second DefaultWriteTimeout = 3 * time.Second
DefaultMaxMessageSize = 512000 // Match khatru's MaxMessageSize // DefaultMaxMessageSize is the maximum message size for WebSocket connections
// Increased from 512KB to 10MB to support large kind 3 follow lists (10k+ follows)
// and other large events without truncation
DefaultMaxMessageSize = 10 * 1024 * 1024 // 10MB
// ClientMessageSizeLimit is the maximum message size that clients can handle // ClientMessageSizeLimit is the maximum message size that clients can handle
// This is set to 100MB to allow large messages // This is set to 100MB to allow large messages
ClientMessageSizeLimit = 100 * 1024 * 1024 // 100MB ClientMessageSizeLimit = 100 * 1024 * 1024 // 100MB

View File

@@ -92,6 +92,13 @@ func Run(
// Continue anyway - follows can be loaded when admins update their follow lists // Continue anyway - follows can be loaded when admins update their follow lists
} }
// Cleanup any kind 3 events that lost their p tags (only for Badger backend)
if badgerDB, ok := db.(*database.D); ok {
if err := badgerDB.CleanupKind3WithoutPTags(ctx); chk.E(err) {
log.E.F("failed to cleanup kind 3 events: %v", err)
}
}
// Initialize spider manager based on mode (only for Badger backend) // Initialize spider manager based on mode (only for Badger backend)
if badgerDB, ok := db.(*database.D); ok && cfg.SpiderMode != "none" { if badgerDB, ok := db.(*database.D); ok && cfg.SpiderMode != "none" {
if l.spiderManager, err = spider.New(ctx, badgerDB, l.publishers, cfg.SpiderMode); chk.E(err) { if l.spiderManager, err = spider.New(ctx, badgerDB, l.publishers, cfg.SpiderMode); chk.E(err) {

View File

@@ -0,0 +1,72 @@
package database
import (
"context"
"time"
"git.mleku.dev/mleku/nostr/encoders/filter"
"git.mleku.dev/mleku/nostr/encoders/kind"
"lol.mleku.dev/chk"
"lol.mleku.dev/log"
)
// CleanupKind3WithoutPTags scans for kind 3 follow list events that have no p tags
// and deletes them. This cleanup is needed because the directory spider may have
// saved malformed events that lost their tags during serialization.
func (d *D) CleanupKind3WithoutPTags(ctx context.Context) error {
log.I.F("database: starting cleanup of kind 3 events without p tags")
startTime := time.Now()
// Query for all kind 3 events
f := &filter.F{
Kinds: kind.NewS(kind.FollowList),
}
events, err := d.QueryEvents(ctx, f)
if chk.E(err) {
return err
}
deletedCount := 0
// Check each event for p tags
for _, ev := range events {
hasPTag := false
if ev.Tags != nil && ev.Tags.Len() > 0 {
// Look for at least one p tag
for _, tag := range *ev.Tags {
if tag != nil && tag.Len() >= 2 {
key := tag.Key()
if len(key) == 1 && key[0] == 'p' {
hasPTag = true
break
}
}
}
}
// Delete events without p tags
if !hasPTag {
log.W.F("database: deleting kind 3 event without p tags from pubkey %x", ev.Pubkey)
if err := d.DeleteEvent(ctx, ev.ID); chk.E(err) {
log.E.F("database: failed to delete kind 3 event %x: %v", ev.ID, err)
continue
}
deletedCount++
}
}
duration := time.Since(startTime)
if deletedCount > 0 {
log.I.F("database: cleanup completed in %v - deleted %d kind 3 events without p tags (scanned %d total)",
duration, deletedCount, len(events))
} else {
log.I.F("database: cleanup completed in %v - no kind 3 events needed deletion (scanned %d total)",
duration, len(events))
}
return nil
}

View File

@@ -0,0 +1,97 @@
package database
import (
"bytes"
"encoding/json"
"testing"
"git.mleku.dev/mleku/nostr/encoders/event"
"github.com/stretchr/testify/assert"
)
// TestKind3TagRoundTrip tests that kind 3 events with p tags survive
// JSON -> binary -> JSON round trip
func TestKind3TagRoundTrip(t *testing.T) {
// Sample kind 3 event JSON with p tags
kind3JSON := `{
"id": "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
"pubkey": "fedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321",
"created_at": 1234567890,
"kind": 3,
"tags": [
["p", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"],
["p", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"],
["p", "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"]
],
"content": "",
"sig": "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
}`
// 1. Unmarshal from JSON (simulates receiving from WebSocket)
ev1 := event.New()
err := json.Unmarshal([]byte(kind3JSON), ev1)
assert.NoError(t, err)
assert.NotNil(t, ev1.Tags)
assert.Equal(t, 3, ev1.Tags.Len(), "Should have 3 tags")
// Verify all tags have key "p"
pTagCount := 0
for _, tag := range *ev1.Tags {
if tag != nil && tag.Len() >= 2 {
key := tag.Key()
if len(key) == 1 && key[0] == 'p' {
pTagCount++
t.Logf("Found p tag with value length: %d bytes", len(tag.Value()))
}
}
}
assert.Equal(t, 3, pTagCount, "Should have 3 p tags after JSON unmarshal")
// 2. Marshal to binary (simulates database storage)
buf := new(bytes.Buffer)
ev1.MarshalBinary(buf)
binaryData := buf.Bytes()
t.Logf("Binary encoding size: %d bytes", len(binaryData))
// 3. Unmarshal from binary (simulates database retrieval)
ev2 := event.New()
err = ev2.UnmarshalBinary(bytes.NewBuffer(binaryData))
assert.NoError(t, err)
assert.NotNil(t, ev2.Tags)
assert.Equal(t, 3, ev2.Tags.Len(), "Should have 3 tags after binary round-trip")
// Verify all tags still have key "p"
pTagCount2 := 0
for _, tag := range *ev2.Tags {
if tag != nil && tag.Len() >= 2 {
key := tag.Key()
if len(key) == 1 && key[0] == 'p' {
pTagCount2++
t.Logf("Found p tag after round-trip with value length: %d bytes", len(tag.Value()))
}
}
}
assert.Equal(t, 3, pTagCount2, "Should have 3 p tags after binary round-trip")
// 4. Marshal back to JSON to verify tags are still there
jsonData2, err := json.Marshal(ev2)
assert.NoError(t, err)
t.Logf("JSON after round-trip: %s", string(jsonData2))
// Parse the JSON and count p tags
var jsonMap map[string]interface{}
err = json.Unmarshal(jsonData2, &jsonMap)
assert.NoError(t, err)
tags, ok := jsonMap["tags"].([]interface{})
assert.True(t, ok, "tags should be an array")
assert.Equal(t, 3, len(tags), "Should have 3 tags in final JSON")
for i, tag := range tags {
tagArray, ok := tag.([]interface{})
assert.True(t, ok, "tag should be an array")
assert.GreaterOrEqual(t, len(tagArray), 2, "tag should have at least 2 elements")
assert.Equal(t, "p", tagArray[0], "tag %d should have key 'p'", i)
t.Logf("Tag %d: %v", i, tagArray)
}
}

View File

@@ -134,6 +134,31 @@ func (d *D) SaveEvent(c context.Context, ev *event.E) (
return return
} }
// Validate kind 3 (follow list) events have at least one p tag
// This prevents storing malformed follow lists that may come from buggy relays
if ev.Kind == 3 {
hasPTag := false
tagCount := 0
if ev.Tags != nil {
tagCount = ev.Tags.Len()
for _, tag := range *ev.Tags {
if tag != nil && tag.Len() >= 2 {
key := tag.Key()
if len(key) == 1 && key[0] == 'p' {
hasPTag = true
break
}
}
}
}
if !hasPTag {
log.W.F("SaveEvent: rejecting kind 3 event without p tags from pubkey %x (total tags: %d, event ID: %x)",
ev.Pubkey, tagCount, ev.ID)
err = errors.New("blocked: kind 3 follow list events must have at least one p tag")
return
}
}
// check if the event already exists // check if the event already exists
var ser *types.Uint40 var ser *types.Uint40
if ser, err = d.GetSerialById(ev.ID); err == nil && ser != nil { if ser, err = d.GetSerialById(ev.ID); err == nil && ser != nil {

View File

@@ -1 +1 @@
v0.31.0 v0.31.1