diff --git a/CLAUDE.md b/CLAUDE.md index 4ac8920..49df4d2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -479,6 +479,36 @@ ORLY has received several significant performance improvements in recent updates - Serial-based event fetching for efficiency - Filter analysis in `get-indexes-from-filter.go` selects optimal strategy +## Git Commit Message Format + +When asked to "make a commit comment", generate a commit message following this standard format: + +**Structure:** +- **First line**: 72 characters maximum, imperative mood summary +- **Second line**: Empty line +- **Body**: Bullet points describing each change in detail +- **Optional**: "Files modified:" section listing affected files + +**Example:** +``` +Fix directory spider tag loss: size limits and validation + +- Increase WebSocket message size limit from 500KB to 10MB to prevent + truncation of large kind 3 follow list events (8000+ follows) +- Add validation in SaveEvent to reject kind 3 events without p tags + before storage, preventing malformed events from buggy relays +- Implement CleanupKind3WithoutPTags() to remove existing malformed + kind 3 events at startup +- Add enhanced logging showing tag count and event ID when rejecting + invalid kind 3 events for better observability + +Files modified: +- app/handle-websocket.go: Increase DefaultMaxMessageSize to 10MB +- pkg/database/save-event.go: Add kind 3 validation with logging +- pkg/database/cleanup-kind3.go: New cleanup function +- app/main.go: Invoke cleanup at startup +``` + ## Release Process 1. Update version in `pkg/version/version` file (e.g., v1.2.3) diff --git a/app/handle-websocket.go b/app/handle-websocket.go index 8da4772..3723292 100644 --- a/app/handle-websocket.go +++ b/app/handle-websocket.go @@ -21,7 +21,10 @@ const ( DefaultPongWait = 60 * time.Second DefaultPingWait = DefaultPongWait / 2 DefaultWriteTimeout = 3 * time.Second - DefaultMaxMessageSize = 512000 // Match khatru's MaxMessageSize + // DefaultMaxMessageSize is the maximum message size for WebSocket connections + // Increased from 512KB to 10MB to support large kind 3 follow lists (10k+ follows) + // and other large events without truncation + DefaultMaxMessageSize = 10 * 1024 * 1024 // 10MB // ClientMessageSizeLimit is the maximum message size that clients can handle // This is set to 100MB to allow large messages ClientMessageSizeLimit = 100 * 1024 * 1024 // 100MB diff --git a/app/main.go b/app/main.go index 7790493..04260e7 100644 --- a/app/main.go +++ b/app/main.go @@ -92,6 +92,13 @@ func Run( // Continue anyway - follows can be loaded when admins update their follow lists } + // Cleanup any kind 3 events that lost their p tags (only for Badger backend) + if badgerDB, ok := db.(*database.D); ok { + if err := badgerDB.CleanupKind3WithoutPTags(ctx); chk.E(err) { + log.E.F("failed to cleanup kind 3 events: %v", err) + } + } + // Initialize spider manager based on mode (only for Badger backend) if badgerDB, ok := db.(*database.D); ok && cfg.SpiderMode != "none" { if l.spiderManager, err = spider.New(ctx, badgerDB, l.publishers, cfg.SpiderMode); chk.E(err) { diff --git a/pkg/database/cleanup-kind3.go b/pkg/database/cleanup-kind3.go new file mode 100644 index 0000000..cc79206 --- /dev/null +++ b/pkg/database/cleanup-kind3.go @@ -0,0 +1,72 @@ +package database + +import ( + "context" + "time" + + "git.mleku.dev/mleku/nostr/encoders/filter" + "git.mleku.dev/mleku/nostr/encoders/kind" + "lol.mleku.dev/chk" + "lol.mleku.dev/log" +) + +// CleanupKind3WithoutPTags scans for kind 3 follow list events that have no p tags +// and deletes them. This cleanup is needed because the directory spider may have +// saved malformed events that lost their tags during serialization. +func (d *D) CleanupKind3WithoutPTags(ctx context.Context) error { + log.I.F("database: starting cleanup of kind 3 events without p tags") + + startTime := time.Now() + + // Query for all kind 3 events + f := &filter.F{ + Kinds: kind.NewS(kind.FollowList), + } + + events, err := d.QueryEvents(ctx, f) + if chk.E(err) { + return err + } + + deletedCount := 0 + + // Check each event for p tags + for _, ev := range events { + hasPTag := false + + if ev.Tags != nil && ev.Tags.Len() > 0 { + // Look for at least one p tag + for _, tag := range *ev.Tags { + if tag != nil && tag.Len() >= 2 { + key := tag.Key() + if len(key) == 1 && key[0] == 'p' { + hasPTag = true + break + } + } + } + } + + // Delete events without p tags + if !hasPTag { + log.W.F("database: deleting kind 3 event without p tags from pubkey %x", ev.Pubkey) + if err := d.DeleteEvent(ctx, ev.ID); chk.E(err) { + log.E.F("database: failed to delete kind 3 event %x: %v", ev.ID, err) + continue + } + deletedCount++ + } + } + + duration := time.Since(startTime) + + if deletedCount > 0 { + log.I.F("database: cleanup completed in %v - deleted %d kind 3 events without p tags (scanned %d total)", + duration, deletedCount, len(events)) + } else { + log.I.F("database: cleanup completed in %v - no kind 3 events needed deletion (scanned %d total)", + duration, len(events)) + } + + return nil +} diff --git a/pkg/database/cleanup-kind3_test.go b/pkg/database/cleanup-kind3_test.go new file mode 100644 index 0000000..ecb4019 --- /dev/null +++ b/pkg/database/cleanup-kind3_test.go @@ -0,0 +1,97 @@ +package database + +import ( + "bytes" + "encoding/json" + "testing" + + "git.mleku.dev/mleku/nostr/encoders/event" + "github.com/stretchr/testify/assert" +) + +// TestKind3TagRoundTrip tests that kind 3 events with p tags survive +// JSON -> binary -> JSON round trip +func TestKind3TagRoundTrip(t *testing.T) { + // Sample kind 3 event JSON with p tags + kind3JSON := `{ + "id": "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef", + "pubkey": "fedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321", + "created_at": 1234567890, + "kind": 3, + "tags": [ + ["p", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"], + ["p", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"], + ["p", "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"] + ], + "content": "", + "sig": "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + }` + + // 1. Unmarshal from JSON (simulates receiving from WebSocket) + ev1 := event.New() + err := json.Unmarshal([]byte(kind3JSON), ev1) + assert.NoError(t, err) + assert.NotNil(t, ev1.Tags) + assert.Equal(t, 3, ev1.Tags.Len(), "Should have 3 tags") + + // Verify all tags have key "p" + pTagCount := 0 + for _, tag := range *ev1.Tags { + if tag != nil && tag.Len() >= 2 { + key := tag.Key() + if len(key) == 1 && key[0] == 'p' { + pTagCount++ + t.Logf("Found p tag with value length: %d bytes", len(tag.Value())) + } + } + } + assert.Equal(t, 3, pTagCount, "Should have 3 p tags after JSON unmarshal") + + // 2. Marshal to binary (simulates database storage) + buf := new(bytes.Buffer) + ev1.MarshalBinary(buf) + binaryData := buf.Bytes() + t.Logf("Binary encoding size: %d bytes", len(binaryData)) + + // 3. Unmarshal from binary (simulates database retrieval) + ev2 := event.New() + err = ev2.UnmarshalBinary(bytes.NewBuffer(binaryData)) + assert.NoError(t, err) + assert.NotNil(t, ev2.Tags) + assert.Equal(t, 3, ev2.Tags.Len(), "Should have 3 tags after binary round-trip") + + // Verify all tags still have key "p" + pTagCount2 := 0 + for _, tag := range *ev2.Tags { + if tag != nil && tag.Len() >= 2 { + key := tag.Key() + if len(key) == 1 && key[0] == 'p' { + pTagCount2++ + t.Logf("Found p tag after round-trip with value length: %d bytes", len(tag.Value())) + } + } + } + assert.Equal(t, 3, pTagCount2, "Should have 3 p tags after binary round-trip") + + // 4. Marshal back to JSON to verify tags are still there + jsonData2, err := json.Marshal(ev2) + assert.NoError(t, err) + t.Logf("JSON after round-trip: %s", string(jsonData2)) + + // Parse the JSON and count p tags + var jsonMap map[string]interface{} + err = json.Unmarshal(jsonData2, &jsonMap) + assert.NoError(t, err) + + tags, ok := jsonMap["tags"].([]interface{}) + assert.True(t, ok, "tags should be an array") + assert.Equal(t, 3, len(tags), "Should have 3 tags in final JSON") + + for i, tag := range tags { + tagArray, ok := tag.([]interface{}) + assert.True(t, ok, "tag should be an array") + assert.GreaterOrEqual(t, len(tagArray), 2, "tag should have at least 2 elements") + assert.Equal(t, "p", tagArray[0], "tag %d should have key 'p'", i) + t.Logf("Tag %d: %v", i, tagArray) + } +} diff --git a/pkg/database/save-event.go b/pkg/database/save-event.go index facd635..19836c2 100644 --- a/pkg/database/save-event.go +++ b/pkg/database/save-event.go @@ -134,6 +134,31 @@ func (d *D) SaveEvent(c context.Context, ev *event.E) ( return } + // Validate kind 3 (follow list) events have at least one p tag + // This prevents storing malformed follow lists that may come from buggy relays + if ev.Kind == 3 { + hasPTag := false + tagCount := 0 + if ev.Tags != nil { + tagCount = ev.Tags.Len() + for _, tag := range *ev.Tags { + if tag != nil && tag.Len() >= 2 { + key := tag.Key() + if len(key) == 1 && key[0] == 'p' { + hasPTag = true + break + } + } + } + } + if !hasPTag { + log.W.F("SaveEvent: rejecting kind 3 event without p tags from pubkey %x (total tags: %d, event ID: %x)", + ev.Pubkey, tagCount, ev.ID) + err = errors.New("blocked: kind 3 follow list events must have at least one p tag") + return + } + } + // check if the event already exists var ser *types.Uint40 if ser, err = d.GetSerialById(ev.ID); err == nil && ser != nil { diff --git a/pkg/version/version b/pkg/version/version index 4e0bea4..e4d3918 100644 --- a/pkg/version/version +++ b/pkg/version/version @@ -1 +1 @@ -v0.31.0 \ No newline at end of file +v0.31.1 \ No newline at end of file