Fix directory spider tag loss: size limits and validation
Some checks failed
Go / build-and-release (push) Has been cancelled
Some checks failed
Go / build-and-release (push) Has been cancelled
- Increase WebSocket message size limit from 500KB to 10MB to prevent truncation of large kind 3 follow list events (8000+ follows) - Add validation in SaveEvent to reject kind 3 events without p tags before storage, preventing malformed events from buggy relays - Implement CleanupKind3WithoutPTags() to remove existing malformed kind 3 events at startup - Add enhanced logging showing tag count and event ID when rejecting invalid kind 3 events for better observability - Create round-trip test proving binary tag encoding preserves p tags correctly through JSON→binary→JSON cycle - Root cause: 500KB limit was truncating large follow lists during WebSocket receive, causing tags to be lost or incomplete - Three-layer defense: prevent at gate (size), validate (save time), and cleanup (startup) Files modified: - app/handle-websocket.go: Increase DefaultMaxMessageSize to 10MB - pkg/database/save-event.go: Add kind 3 validation with logging - pkg/database/cleanup-kind3.go: New cleanup function - pkg/database/cleanup-kind3_test.go: Round-trip test - app/main.go: Invoke cleanup at startup
This commit is contained in:
30
CLAUDE.md
30
CLAUDE.md
@@ -479,6 +479,36 @@ ORLY has received several significant performance improvements in recent updates
|
||||
- Serial-based event fetching for efficiency
|
||||
- Filter analysis in `get-indexes-from-filter.go` selects optimal strategy
|
||||
|
||||
## Git Commit Message Format
|
||||
|
||||
When asked to "make a commit comment", generate a commit message following this standard format:
|
||||
|
||||
**Structure:**
|
||||
- **First line**: 72 characters maximum, imperative mood summary
|
||||
- **Second line**: Empty line
|
||||
- **Body**: Bullet points describing each change in detail
|
||||
- **Optional**: "Files modified:" section listing affected files
|
||||
|
||||
**Example:**
|
||||
```
|
||||
Fix directory spider tag loss: size limits and validation
|
||||
|
||||
- Increase WebSocket message size limit from 500KB to 10MB to prevent
|
||||
truncation of large kind 3 follow list events (8000+ follows)
|
||||
- Add validation in SaveEvent to reject kind 3 events without p tags
|
||||
before storage, preventing malformed events from buggy relays
|
||||
- Implement CleanupKind3WithoutPTags() to remove existing malformed
|
||||
kind 3 events at startup
|
||||
- Add enhanced logging showing tag count and event ID when rejecting
|
||||
invalid kind 3 events for better observability
|
||||
|
||||
Files modified:
|
||||
- app/handle-websocket.go: Increase DefaultMaxMessageSize to 10MB
|
||||
- pkg/database/save-event.go: Add kind 3 validation with logging
|
||||
- pkg/database/cleanup-kind3.go: New cleanup function
|
||||
- app/main.go: Invoke cleanup at startup
|
||||
```
|
||||
|
||||
## Release Process
|
||||
|
||||
1. Update version in `pkg/version/version` file (e.g., v1.2.3)
|
||||
|
||||
@@ -21,7 +21,10 @@ const (
|
||||
DefaultPongWait = 60 * time.Second
|
||||
DefaultPingWait = DefaultPongWait / 2
|
||||
DefaultWriteTimeout = 3 * time.Second
|
||||
DefaultMaxMessageSize = 512000 // Match khatru's MaxMessageSize
|
||||
// DefaultMaxMessageSize is the maximum message size for WebSocket connections
|
||||
// Increased from 512KB to 10MB to support large kind 3 follow lists (10k+ follows)
|
||||
// and other large events without truncation
|
||||
DefaultMaxMessageSize = 10 * 1024 * 1024 // 10MB
|
||||
// ClientMessageSizeLimit is the maximum message size that clients can handle
|
||||
// This is set to 100MB to allow large messages
|
||||
ClientMessageSizeLimit = 100 * 1024 * 1024 // 100MB
|
||||
|
||||
@@ -92,6 +92,13 @@ func Run(
|
||||
// Continue anyway - follows can be loaded when admins update their follow lists
|
||||
}
|
||||
|
||||
// Cleanup any kind 3 events that lost their p tags (only for Badger backend)
|
||||
if badgerDB, ok := db.(*database.D); ok {
|
||||
if err := badgerDB.CleanupKind3WithoutPTags(ctx); chk.E(err) {
|
||||
log.E.F("failed to cleanup kind 3 events: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize spider manager based on mode (only for Badger backend)
|
||||
if badgerDB, ok := db.(*database.D); ok && cfg.SpiderMode != "none" {
|
||||
if l.spiderManager, err = spider.New(ctx, badgerDB, l.publishers, cfg.SpiderMode); chk.E(err) {
|
||||
|
||||
72
pkg/database/cleanup-kind3.go
Normal file
72
pkg/database/cleanup-kind3.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package database
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"git.mleku.dev/mleku/nostr/encoders/filter"
|
||||
"git.mleku.dev/mleku/nostr/encoders/kind"
|
||||
"lol.mleku.dev/chk"
|
||||
"lol.mleku.dev/log"
|
||||
)
|
||||
|
||||
// CleanupKind3WithoutPTags scans for kind 3 follow list events that have no p tags
|
||||
// and deletes them. This cleanup is needed because the directory spider may have
|
||||
// saved malformed events that lost their tags during serialization.
|
||||
func (d *D) CleanupKind3WithoutPTags(ctx context.Context) error {
|
||||
log.I.F("database: starting cleanup of kind 3 events without p tags")
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
// Query for all kind 3 events
|
||||
f := &filter.F{
|
||||
Kinds: kind.NewS(kind.FollowList),
|
||||
}
|
||||
|
||||
events, err := d.QueryEvents(ctx, f)
|
||||
if chk.E(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
deletedCount := 0
|
||||
|
||||
// Check each event for p tags
|
||||
for _, ev := range events {
|
||||
hasPTag := false
|
||||
|
||||
if ev.Tags != nil && ev.Tags.Len() > 0 {
|
||||
// Look for at least one p tag
|
||||
for _, tag := range *ev.Tags {
|
||||
if tag != nil && tag.Len() >= 2 {
|
||||
key := tag.Key()
|
||||
if len(key) == 1 && key[0] == 'p' {
|
||||
hasPTag = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Delete events without p tags
|
||||
if !hasPTag {
|
||||
log.W.F("database: deleting kind 3 event without p tags from pubkey %x", ev.Pubkey)
|
||||
if err := d.DeleteEvent(ctx, ev.ID); chk.E(err) {
|
||||
log.E.F("database: failed to delete kind 3 event %x: %v", ev.ID, err)
|
||||
continue
|
||||
}
|
||||
deletedCount++
|
||||
}
|
||||
}
|
||||
|
||||
duration := time.Since(startTime)
|
||||
|
||||
if deletedCount > 0 {
|
||||
log.I.F("database: cleanup completed in %v - deleted %d kind 3 events without p tags (scanned %d total)",
|
||||
duration, deletedCount, len(events))
|
||||
} else {
|
||||
log.I.F("database: cleanup completed in %v - no kind 3 events needed deletion (scanned %d total)",
|
||||
duration, len(events))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
97
pkg/database/cleanup-kind3_test.go
Normal file
97
pkg/database/cleanup-kind3_test.go
Normal file
@@ -0,0 +1,97 @@
|
||||
package database
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"git.mleku.dev/mleku/nostr/encoders/event"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// TestKind3TagRoundTrip tests that kind 3 events with p tags survive
|
||||
// JSON -> binary -> JSON round trip
|
||||
func TestKind3TagRoundTrip(t *testing.T) {
|
||||
// Sample kind 3 event JSON with p tags
|
||||
kind3JSON := `{
|
||||
"id": "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
|
||||
"pubkey": "fedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321",
|
||||
"created_at": 1234567890,
|
||||
"kind": 3,
|
||||
"tags": [
|
||||
["p", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"],
|
||||
["p", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"],
|
||||
["p", "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"]
|
||||
],
|
||||
"content": "",
|
||||
"sig": "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
|
||||
}`
|
||||
|
||||
// 1. Unmarshal from JSON (simulates receiving from WebSocket)
|
||||
ev1 := event.New()
|
||||
err := json.Unmarshal([]byte(kind3JSON), ev1)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, ev1.Tags)
|
||||
assert.Equal(t, 3, ev1.Tags.Len(), "Should have 3 tags")
|
||||
|
||||
// Verify all tags have key "p"
|
||||
pTagCount := 0
|
||||
for _, tag := range *ev1.Tags {
|
||||
if tag != nil && tag.Len() >= 2 {
|
||||
key := tag.Key()
|
||||
if len(key) == 1 && key[0] == 'p' {
|
||||
pTagCount++
|
||||
t.Logf("Found p tag with value length: %d bytes", len(tag.Value()))
|
||||
}
|
||||
}
|
||||
}
|
||||
assert.Equal(t, 3, pTagCount, "Should have 3 p tags after JSON unmarshal")
|
||||
|
||||
// 2. Marshal to binary (simulates database storage)
|
||||
buf := new(bytes.Buffer)
|
||||
ev1.MarshalBinary(buf)
|
||||
binaryData := buf.Bytes()
|
||||
t.Logf("Binary encoding size: %d bytes", len(binaryData))
|
||||
|
||||
// 3. Unmarshal from binary (simulates database retrieval)
|
||||
ev2 := event.New()
|
||||
err = ev2.UnmarshalBinary(bytes.NewBuffer(binaryData))
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, ev2.Tags)
|
||||
assert.Equal(t, 3, ev2.Tags.Len(), "Should have 3 tags after binary round-trip")
|
||||
|
||||
// Verify all tags still have key "p"
|
||||
pTagCount2 := 0
|
||||
for _, tag := range *ev2.Tags {
|
||||
if tag != nil && tag.Len() >= 2 {
|
||||
key := tag.Key()
|
||||
if len(key) == 1 && key[0] == 'p' {
|
||||
pTagCount2++
|
||||
t.Logf("Found p tag after round-trip with value length: %d bytes", len(tag.Value()))
|
||||
}
|
||||
}
|
||||
}
|
||||
assert.Equal(t, 3, pTagCount2, "Should have 3 p tags after binary round-trip")
|
||||
|
||||
// 4. Marshal back to JSON to verify tags are still there
|
||||
jsonData2, err := json.Marshal(ev2)
|
||||
assert.NoError(t, err)
|
||||
t.Logf("JSON after round-trip: %s", string(jsonData2))
|
||||
|
||||
// Parse the JSON and count p tags
|
||||
var jsonMap map[string]interface{}
|
||||
err = json.Unmarshal(jsonData2, &jsonMap)
|
||||
assert.NoError(t, err)
|
||||
|
||||
tags, ok := jsonMap["tags"].([]interface{})
|
||||
assert.True(t, ok, "tags should be an array")
|
||||
assert.Equal(t, 3, len(tags), "Should have 3 tags in final JSON")
|
||||
|
||||
for i, tag := range tags {
|
||||
tagArray, ok := tag.([]interface{})
|
||||
assert.True(t, ok, "tag should be an array")
|
||||
assert.GreaterOrEqual(t, len(tagArray), 2, "tag should have at least 2 elements")
|
||||
assert.Equal(t, "p", tagArray[0], "tag %d should have key 'p'", i)
|
||||
t.Logf("Tag %d: %v", i, tagArray)
|
||||
}
|
||||
}
|
||||
@@ -134,6 +134,31 @@ func (d *D) SaveEvent(c context.Context, ev *event.E) (
|
||||
return
|
||||
}
|
||||
|
||||
// Validate kind 3 (follow list) events have at least one p tag
|
||||
// This prevents storing malformed follow lists that may come from buggy relays
|
||||
if ev.Kind == 3 {
|
||||
hasPTag := false
|
||||
tagCount := 0
|
||||
if ev.Tags != nil {
|
||||
tagCount = ev.Tags.Len()
|
||||
for _, tag := range *ev.Tags {
|
||||
if tag != nil && tag.Len() >= 2 {
|
||||
key := tag.Key()
|
||||
if len(key) == 1 && key[0] == 'p' {
|
||||
hasPTag = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !hasPTag {
|
||||
log.W.F("SaveEvent: rejecting kind 3 event without p tags from pubkey %x (total tags: %d, event ID: %x)",
|
||||
ev.Pubkey, tagCount, ev.ID)
|
||||
err = errors.New("blocked: kind 3 follow list events must have at least one p tag")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// check if the event already exists
|
||||
var ser *types.Uint40
|
||||
if ser, err = d.GetSerialById(ev.ID); err == nil && ser != nil {
|
||||
|
||||
@@ -1 +1 @@
|
||||
v0.31.0
|
||||
v0.31.1
|
||||
Reference in New Issue
Block a user