Fix duplicate REPORTS relationships in Neo4j backend (v0.36.1)
Some checks failed
Go / build-and-release (push) Has been cancelled

- Change processReport() to use MERGE instead of CREATE for REPORTS
  relationships, deduplicating by (reporter, reported, report_type)
- Add ON CREATE/ON MATCH clauses to preserve newest event data while
  preventing duplicate relationships
- Add getExistingReportEvent() helper to check for existing reports
- Add markReportEventSuperseded() to track superseded events
- Add v4 migration migrateDeduplicateReports() to clean up existing
  duplicate REPORTS relationships in databases
- Add comprehensive tests: TestReportDeduplication with subtests for
  deduplication, different types, and superseded event tracking
- Update WOT_SPEC.md with REPORTS deduplication behavior and correct
  property names (report_type, created_at, created_by_event)
- Bump version to v0.36.1

Fixes: https://git.nostrdev.com/mleku/next.orly.dev/issues/16

Files modified:
- pkg/neo4j/social-event-processor.go: MERGE-based deduplication
- pkg/neo4j/migrations.go: v4 migration for duplicate cleanup
- pkg/neo4j/social-event-processor_test.go: Deduplication tests
- pkg/neo4j/WOT_SPEC.md: Updated REPORTS documentation
- pkg/version/version: Bump to v0.36.1

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-16 10:13:15 +01:00
parent 96bdf5cba2
commit 9d6280eab1
5 changed files with 452 additions and 17 deletions

View File

@@ -30,6 +30,11 @@ var migrations = []Migration{
Description: "Convert direct REFERENCES/MENTIONS relationships to Tag-based model",
Migrate: migrateToTagBasedReferences,
},
{
Version: "v4",
Description: "Deduplicate REPORTS relationships by (reporter, reported, report_type)",
Migrate: migrateDeduplicateReports,
},
}
// RunMigrations executes all pending migrations
@@ -492,3 +497,101 @@ func migrateToTagBasedReferences(ctx context.Context, n *N) error {
n.Logger.Infof("Tag-based references migration completed successfully")
return nil
}
// migrateDeduplicateReports removes duplicate REPORTS relationships
// Prior to this migration, processReport() used CREATE which allowed multiple
// REPORTS relationships with the same report_type between the same two users.
// This migration keeps only the most recent report (by created_at) for each
// (reporter, reported, report_type) combination.
func migrateDeduplicateReports(ctx context.Context, n *N) error {
// Step 1: Count duplicate REPORTS relationships
// Duplicates are defined as multiple REPORTS with the same (reporter, reported, report_type)
countDuplicatesCypher := `
MATCH (reporter:NostrUser)-[r:REPORTS]->(reported:NostrUser)
WITH reporter, reported, r.report_type AS type, collect(r) AS rels
WHERE size(rels) > 1
RETURN sum(size(rels) - 1) AS duplicate_count
`
result, err := n.ExecuteRead(ctx, countDuplicatesCypher, nil)
if err != nil {
return fmt.Errorf("failed to count duplicate REPORTS: %w", err)
}
var duplicateCount int64
if result.Next(ctx) {
if count, ok := result.Record().Values[0].(int64); ok {
duplicateCount = count
}
}
if duplicateCount == 0 {
n.Logger.Infof("no duplicate REPORTS relationships found, migration complete")
return nil
}
n.Logger.Infof("found %d duplicate REPORTS relationships to remove", duplicateCount)
// Step 2: Delete duplicate REPORTS, keeping the one with the highest created_at
// This query:
// 1. Groups REPORTS by (reporter, reported, report_type)
// 2. Finds the maximum created_at for each group
// 3. Deletes all relationships in the group except the newest one
deleteDuplicatesCypher := `
MATCH (reporter:NostrUser)-[r:REPORTS]->(reported:NostrUser)
WITH reporter, reported, r.report_type AS type,
collect(r) AS rels, max(r.created_at) AS maxCreatedAt
WHERE size(rels) > 1
UNWIND rels AS rel
WITH rel, maxCreatedAt
WHERE rel.created_at < maxCreatedAt
DELETE rel
RETURN count(*) AS deleted
`
writeResult, err := n.ExecuteWrite(ctx, deleteDuplicatesCypher, nil)
if err != nil {
return fmt.Errorf("failed to delete duplicate REPORTS: %w", err)
}
var deletedCount int64
if writeResult.Next(ctx) {
if count, ok := writeResult.Record().Values[0].(int64); ok {
deletedCount = count
}
}
n.Logger.Infof("deleted %d duplicate REPORTS relationships", deletedCount)
// Step 3: Mark superseded ProcessedSocialEvent nodes for deleted reports
// Find ProcessedSocialEvent nodes (kind 1984) whose event IDs are no longer
// referenced by any REPORTS relationship's created_by_event
markSupersededCypher := `
MATCH (evt:ProcessedSocialEvent {event_kind: 1984})
WHERE evt.superseded_by IS NULL
AND NOT EXISTS {
MATCH ()-[r:REPORTS]->()
WHERE r.created_by_event = evt.event_id
}
SET evt.superseded_by = 'migration_v4_dedupe'
RETURN count(evt) AS superseded
`
markResult, err := n.ExecuteWrite(ctx, markSupersededCypher, nil)
if err != nil {
// Non-fatal - just log warning
n.Logger.Warningf("failed to mark superseded ProcessedSocialEvent nodes: %v", err)
} else {
var supersededCount int64
if markResult.Next(ctx) {
if count, ok := markResult.Record().Values[0].(int64); ok {
supersededCount = count
}
}
if supersededCount > 0 {
n.Logger.Infof("marked %d ProcessedSocialEvent nodes as superseded", supersededCount)
}
}
n.Logger.Infof("REPORTS deduplication migration completed successfully")
return nil
}