Remove Dgraph, check hex field case, reject if any uppercase

2025-12-03 16:26:07 +00:00
parent 1851ba39fa
commit 880772cab1
31 changed files with 471 additions and 4951 deletions
--- a/pkg/neo4j/hex_utils.go
+++ b/pkg/neo4j/hex_utils.go
@@ -0,0 +1,103 @@
+// Package neo4j provides hex utilities for normalizing pubkeys and event IDs.
+//
+// The nostr library applies binary optimization to e/p tags, storing 64-character
+// hex strings as 33-byte binary (32 bytes + null terminator). This file provides
+// utilities to ensure all pubkeys and event IDs stored in Neo4j are in consistent
+// lowercase hex format.
+package neo4j
+
+import (
+	"strings"
+
+	"git.mleku.dev/mleku/nostr/encoders/hex"
+	"git.mleku.dev/mleku/nostr/encoders/tag"
+)
+
+// Tag binary encoding constants (matching the nostr library)
+const (
+	// BinaryEncodedLen is the length of a binary-encoded 32-byte hash with null terminator
+	BinaryEncodedLen = 33
+	// HexEncodedLen is the length of a hex-encoded 32-byte hash (pubkey or event ID)
+	HexEncodedLen = 64
+	// HashLen is the raw length of a hash (pubkey/event ID)
+	HashLen = 32
+)
+
+// IsBinaryEncoded checks if a value is stored in the nostr library's binary-optimized format
+func IsBinaryEncoded(val []byte) bool {
+	return len(val) == BinaryEncodedLen && val[HashLen] == 0
+}
+
+// NormalizePubkeyHex ensures a pubkey/event ID is in lowercase hex format.
+// It handles:
+// - Binary-encoded values (33 bytes with null terminator) -> converts to lowercase hex
+// - Uppercase hex strings -> converts to lowercase
+// - Already lowercase hex -> returns as-is
+//
+// This should be used for all pubkeys and event IDs before storing in Neo4j
+// to prevent duplicate nodes due to case differences.
+func NormalizePubkeyHex(val []byte) string {
+	// Handle binary-encoded values from the nostr library
+	if IsBinaryEncoded(val) {
+		// Convert binary to lowercase hex
+		return hex.Enc(val[:HashLen])
+	}
+
+	// Handle hex strings (may be uppercase from external sources)
+	if len(val) == HexEncodedLen {
+		return strings.ToLower(string(val))
+	}
+
+	// For other lengths (possibly prefixes), lowercase the hex
+	return strings.ToLower(string(val))
+}
+
+// ExtractPTagValue extracts a pubkey from a p-tag, handling binary encoding.
+// Returns lowercase hex string suitable for Neo4j storage.
+// Returns empty string if the tag doesn't have a valid value.
+func ExtractPTagValue(t *tag.T) string {
+	if t == nil || len(t.T) < 2 {
+		return ""
+	}
+
+	// Use ValueHex() which properly handles both binary and hex formats
+	hexVal := t.ValueHex()
+	if len(hexVal) == 0 {
+		return ""
+	}
+
+	// Ensure lowercase (ValueHex returns the library's encoding which is lowercase,
+	// but we normalize anyway for safety with external data)
+	return strings.ToLower(string(hexVal))
+}
+
+// ExtractETagValue extracts an event ID from an e-tag, handling binary encoding.
+// Returns lowercase hex string suitable for Neo4j storage.
+// Returns empty string if the tag doesn't have a valid value.
+func ExtractETagValue(t *tag.T) string {
+	if t == nil || len(t.T) < 2 {
+		return ""
+	}
+
+	// Use ValueHex() which properly handles both binary and hex formats
+	hexVal := t.ValueHex()
+	if len(hexVal) == 0 {
+		return ""
+	}
+
+	// Ensure lowercase
+	return strings.ToLower(string(hexVal))
+}
+
+// IsValidHexPubkey checks if a string is a valid 64-character hex pubkey
+func IsValidHexPubkey(s string) bool {
+	if len(s) != HexEncodedLen {
+		return false
+	}
+	for _, c := range s {
+		if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) {
+			return false
+		}
+	}
+	return true
+}
--- a/pkg/neo4j/query-events.go
+++ b/pkg/neo4j/query-events.go
@@ -113,32 +113,49 @@ func (n *N) buildCypherQuery(f *filter.F, includeDeleteEvents bool) (string, map
 	// Tag filters - this is where Neo4j's graph capabilities shine
 	// We can efficiently traverse tag relationships
 	tagIndex := 0
-	for _, tagValues := range *f.Tags {
-		if len(tagValues.T) > 0 {
-			tagVarName := fmt.Sprintf("t%d", tagIndex)
-			tagTypeParam := fmt.Sprintf("tagType_%d", tagIndex)
-			tagValuesParam := fmt.Sprintf("tagValues_%d", tagIndex)
+	if f.Tags != nil {
+		for _, tagValues := range *f.Tags {
+			if len(tagValues.T) > 0 {
+				tagVarName := fmt.Sprintf("t%d", tagIndex)
+				tagTypeParam := fmt.Sprintf("tagType_%d", tagIndex)
+				tagValuesParam := fmt.Sprintf("tagValues_%d", tagIndex)

-			// Add tag relationship to MATCH clause
-			matchClause += fmt.Sprintf(" OPTIONAL MATCH (e)-[:TAGGED_WITH]->(%s:Tag)", tagVarName)
+				// Add tag relationship to MATCH clause
+				matchClause += fmt.Sprintf(" OPTIONAL MATCH (e)-[:TAGGED_WITH]->(%s:Tag)", tagVarName)

-			// The first element is the tag type (e.g., "e", "p", etc.)
-			tagType := string(tagValues.T[0])
+				// The first element is the tag type (e.g., "e", "p", etc.)
+				tagType := string(tagValues.T[0])

-			// Convert remaining tag values to strings (skip first element which is the type)
-			tagValueStrings := make([]string, len(tagValues.T)-1)
-			for i, tv := range tagValues.T[1:] {
-				tagValueStrings[i] = string(tv)
+				// Convert remaining tag values to strings (skip first element which is the type)
+				// For e/p tags, use NormalizePubkeyHex to handle binary encoding and uppercase hex
+				tagValueStrings := make([]string, 0, len(tagValues.T)-1)
+				for _, tv := range tagValues.T[1:] {
+					if tagType == "e" || tagType == "p" {
+						// Normalize e/p tag values to lowercase hex (handles binary encoding)
+						normalized := NormalizePubkeyHex(tv)
+						if normalized != "" {
+							tagValueStrings = append(tagValueStrings, normalized)
+						}
+					} else {
+						// For other tags, use direct string conversion
+						tagValueStrings = append(tagValueStrings, string(tv))
+					}
+				}
+
+				// Skip if no valid values after normalization
+				if len(tagValueStrings) == 0 {
+					continue
+				}
+
+				// Add WHERE conditions for this tag
+				params[tagTypeParam] = tagType
+				params[tagValuesParam] = tagValueStrings
+				whereClauses = append(whereClauses,
+					fmt.Sprintf("(%s.type = $%s AND %s.value IN $%s)",
+						tagVarName, tagTypeParam, tagVarName, tagValuesParam))
+
+				tagIndex++
 			}
-
-			// Add WHERE conditions for this tag
-			params[tagTypeParam] = tagType
-			params[tagValuesParam] = tagValueStrings
-			whereClauses = append(whereClauses,
-				fmt.Sprintf("(%s.type = $%s AND %s.value IN $%s)",
-					tagVarName, tagTypeParam, tagVarName, tagValuesParam))
-
-			tagIndex++
 		}
 	}

--- a/pkg/neo4j/save-event.go
+++ b/pkg/neo4j/save-event.go
@@ -164,10 +164,15 @@ CREATE (e)-[:AUTHORED_BY]->(a)
 			}

 			tagType := string(tagItem.T[0])
-			tagValue := string(tagItem.T[1])

 			switch tagType {
 			case "e": // Event reference - creates REFERENCES relationship
+				// Use ExtractETagValue to handle binary encoding and normalize to lowercase hex
+				tagValue := ExtractETagValue(tagItem)
+				if tagValue == "" {
+					continue // Skip invalid e-tags
+				}
+
 				// Create reference to another event (if it exists)
 				paramName := fmt.Sprintf("eTag_%d", eTagIndex)
 				params[paramName] = tagValue
@@ -201,6 +206,12 @@ FOREACH (ignoreMe IN CASE WHEN ref%d IS NOT NULL THEN [1] ELSE [] END |
 				eTagIndex++

 			case "p": // Pubkey mention - creates MENTIONS relationship
+				// Use ExtractPTagValue to handle binary encoding and normalize to lowercase hex
+				tagValue := ExtractPTagValue(tagItem)
+				if tagValue == "" {
+					continue // Skip invalid p-tags
+				}
+
 				// Create mention to another author
 				paramName := fmt.Sprintf("pTag_%d", pTagIndex)
 				params[paramName] = tagValue
@@ -214,6 +225,9 @@ CREATE (e)-[:MENTIONS]->(mentioned%d)
 				pTagIndex++

 			default: // Other tags - creates Tag nodes and TAGGED_WITH relationships
+				// For non-e/p tags, use direct string conversion (no binary encoding)
+				tagValue := string(tagItem.T[1])
+
 				// Create tag node and relationship
 				typeParam := fmt.Sprintf("tagType_%d", tagNodeIndex)
 				valueParam := fmt.Sprintf("tagValue_%d", tagNodeIndex)
--- a/pkg/neo4j/social-event-processor.go
+++ b/pkg/neo4j/social-event-processor.go
@@ -220,11 +220,12 @@ func (p *SocialEventProcessor) processReport(ctx context.Context, ev *event.E) e
 	var reportedPubkey string
 	var reportType string = "other" // default

-	for _, tag := range *ev.Tags {
-		if len(tag.T) >= 2 && string(tag.T[0]) == "p" {
-			reportedPubkey = string(tag.T[1])
-			if len(tag.T) >= 3 {
-				reportType = string(tag.T[2])
+	for _, t := range *ev.Tags {
+		if len(t.T) >= 2 && string(t.T[0]) == "p" {
+			// Use ExtractPTagValue to handle binary encoding and normalize to lowercase
+			reportedPubkey = ExtractPTagValue(t)
+			if len(t.T) >= 3 {
+				reportType = string(t.T[2])
 			}
 			break // Use first p-tag
 		}
@@ -574,14 +575,17 @@ func (p *SocialEventProcessor) BatchProcessContactLists(ctx context.Context, eve
 // Helper functions

 // extractPTags extracts unique pubkeys from p-tags
+// Uses ExtractPTagValue to properly handle binary-encoded tag values
+// and normalizes to lowercase hex for consistent Neo4j storage
 func extractPTags(ev *event.E) []string {
 	seen := make(map[string]bool)
 	var pubkeys []string

-	for _, tag := range *ev.Tags {
-		if len(tag.T) >= 2 && string(tag.T[0]) == "p" {
-			pubkey := string(tag.T[1])
-			if len(pubkey) == 64 && !seen[pubkey] { // Basic validation: 64 hex chars
+	for _, t := range *ev.Tags {
+		if len(t.T) >= 2 && string(t.T[0]) == "p" {
+			// Use ExtractPTagValue to handle binary encoding and normalize to lowercase
+			pubkey := ExtractPTagValue(t)
+			if IsValidHexPubkey(pubkey) && !seen[pubkey] {
 				seen[pubkey] = true
 				pubkeys = append(pubkeys, pubkey)
 			}