Remove Dgraph, check hex field case, reject if any uppercase
Some checks failed
Go / build-and-release (push) Has been cancelled

This commit is contained in:
2025-12-03 16:26:07 +00:00
parent 1851ba39fa
commit 880772cab1
31 changed files with 471 additions and 4951 deletions

103
pkg/neo4j/hex_utils.go Normal file
View File

@@ -0,0 +1,103 @@
// Package neo4j provides hex utilities for normalizing pubkeys and event IDs.
//
// The nostr library applies binary optimization to e/p tags, storing 64-character
// hex strings as 33-byte binary (32 bytes + null terminator). This file provides
// utilities to ensure all pubkeys and event IDs stored in Neo4j are in consistent
// lowercase hex format.
package neo4j
import (
"strings"
"git.mleku.dev/mleku/nostr/encoders/hex"
"git.mleku.dev/mleku/nostr/encoders/tag"
)
// Tag binary encoding constants (matching the nostr library)
const (
// BinaryEncodedLen is the length of a binary-encoded 32-byte hash with null terminator
BinaryEncodedLen = 33
// HexEncodedLen is the length of a hex-encoded 32-byte hash (pubkey or event ID)
HexEncodedLen = 64
// HashLen is the raw length of a hash (pubkey/event ID)
HashLen = 32
)
// IsBinaryEncoded checks if a value is stored in the nostr library's binary-optimized format
func IsBinaryEncoded(val []byte) bool {
return len(val) == BinaryEncodedLen && val[HashLen] == 0
}
// NormalizePubkeyHex ensures a pubkey/event ID is in lowercase hex format.
// It handles:
// - Binary-encoded values (33 bytes with null terminator) -> converts to lowercase hex
// - Uppercase hex strings -> converts to lowercase
// - Already lowercase hex -> returns as-is
//
// This should be used for all pubkeys and event IDs before storing in Neo4j
// to prevent duplicate nodes due to case differences.
func NormalizePubkeyHex(val []byte) string {
// Handle binary-encoded values from the nostr library
if IsBinaryEncoded(val) {
// Convert binary to lowercase hex
return hex.Enc(val[:HashLen])
}
// Handle hex strings (may be uppercase from external sources)
if len(val) == HexEncodedLen {
return strings.ToLower(string(val))
}
// For other lengths (possibly prefixes), lowercase the hex
return strings.ToLower(string(val))
}
// ExtractPTagValue extracts a pubkey from a p-tag, handling binary encoding.
// Returns lowercase hex string suitable for Neo4j storage.
// Returns empty string if the tag doesn't have a valid value.
func ExtractPTagValue(t *tag.T) string {
if t == nil || len(t.T) < 2 {
return ""
}
// Use ValueHex() which properly handles both binary and hex formats
hexVal := t.ValueHex()
if len(hexVal) == 0 {
return ""
}
// Ensure lowercase (ValueHex returns the library's encoding which is lowercase,
// but we normalize anyway for safety with external data)
return strings.ToLower(string(hexVal))
}
// ExtractETagValue extracts an event ID from an e-tag, handling binary encoding.
// Returns lowercase hex string suitable for Neo4j storage.
// Returns empty string if the tag doesn't have a valid value.
func ExtractETagValue(t *tag.T) string {
if t == nil || len(t.T) < 2 {
return ""
}
// Use ValueHex() which properly handles both binary and hex formats
hexVal := t.ValueHex()
if len(hexVal) == 0 {
return ""
}
// Ensure lowercase
return strings.ToLower(string(hexVal))
}
// IsValidHexPubkey checks if a string is a valid 64-character hex pubkey
func IsValidHexPubkey(s string) bool {
if len(s) != HexEncodedLen {
return false
}
for _, c := range s {
if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) {
return false
}
}
return true
}

View File

@@ -113,32 +113,49 @@ func (n *N) buildCypherQuery(f *filter.F, includeDeleteEvents bool) (string, map
// Tag filters - this is where Neo4j's graph capabilities shine
// We can efficiently traverse tag relationships
tagIndex := 0
for _, tagValues := range *f.Tags {
if len(tagValues.T) > 0 {
tagVarName := fmt.Sprintf("t%d", tagIndex)
tagTypeParam := fmt.Sprintf("tagType_%d", tagIndex)
tagValuesParam := fmt.Sprintf("tagValues_%d", tagIndex)
if f.Tags != nil {
for _, tagValues := range *f.Tags {
if len(tagValues.T) > 0 {
tagVarName := fmt.Sprintf("t%d", tagIndex)
tagTypeParam := fmt.Sprintf("tagType_%d", tagIndex)
tagValuesParam := fmt.Sprintf("tagValues_%d", tagIndex)
// Add tag relationship to MATCH clause
matchClause += fmt.Sprintf(" OPTIONAL MATCH (e)-[:TAGGED_WITH]->(%s:Tag)", tagVarName)
// Add tag relationship to MATCH clause
matchClause += fmt.Sprintf(" OPTIONAL MATCH (e)-[:TAGGED_WITH]->(%s:Tag)", tagVarName)
// The first element is the tag type (e.g., "e", "p", etc.)
tagType := string(tagValues.T[0])
// The first element is the tag type (e.g., "e", "p", etc.)
tagType := string(tagValues.T[0])
// Convert remaining tag values to strings (skip first element which is the type)
tagValueStrings := make([]string, len(tagValues.T)-1)
for i, tv := range tagValues.T[1:] {
tagValueStrings[i] = string(tv)
// Convert remaining tag values to strings (skip first element which is the type)
// For e/p tags, use NormalizePubkeyHex to handle binary encoding and uppercase hex
tagValueStrings := make([]string, 0, len(tagValues.T)-1)
for _, tv := range tagValues.T[1:] {
if tagType == "e" || tagType == "p" {
// Normalize e/p tag values to lowercase hex (handles binary encoding)
normalized := NormalizePubkeyHex(tv)
if normalized != "" {
tagValueStrings = append(tagValueStrings, normalized)
}
} else {
// For other tags, use direct string conversion
tagValueStrings = append(tagValueStrings, string(tv))
}
}
// Skip if no valid values after normalization
if len(tagValueStrings) == 0 {
continue
}
// Add WHERE conditions for this tag
params[tagTypeParam] = tagType
params[tagValuesParam] = tagValueStrings
whereClauses = append(whereClauses,
fmt.Sprintf("(%s.type = $%s AND %s.value IN $%s)",
tagVarName, tagTypeParam, tagVarName, tagValuesParam))
tagIndex++
}
// Add WHERE conditions for this tag
params[tagTypeParam] = tagType
params[tagValuesParam] = tagValueStrings
whereClauses = append(whereClauses,
fmt.Sprintf("(%s.type = $%s AND %s.value IN $%s)",
tagVarName, tagTypeParam, tagVarName, tagValuesParam))
tagIndex++
}
}

View File

@@ -164,10 +164,15 @@ CREATE (e)-[:AUTHORED_BY]->(a)
}
tagType := string(tagItem.T[0])
tagValue := string(tagItem.T[1])
switch tagType {
case "e": // Event reference - creates REFERENCES relationship
// Use ExtractETagValue to handle binary encoding and normalize to lowercase hex
tagValue := ExtractETagValue(tagItem)
if tagValue == "" {
continue // Skip invalid e-tags
}
// Create reference to another event (if it exists)
paramName := fmt.Sprintf("eTag_%d", eTagIndex)
params[paramName] = tagValue
@@ -201,6 +206,12 @@ FOREACH (ignoreMe IN CASE WHEN ref%d IS NOT NULL THEN [1] ELSE [] END |
eTagIndex++
case "p": // Pubkey mention - creates MENTIONS relationship
// Use ExtractPTagValue to handle binary encoding and normalize to lowercase hex
tagValue := ExtractPTagValue(tagItem)
if tagValue == "" {
continue // Skip invalid p-tags
}
// Create mention to another author
paramName := fmt.Sprintf("pTag_%d", pTagIndex)
params[paramName] = tagValue
@@ -214,6 +225,9 @@ CREATE (e)-[:MENTIONS]->(mentioned%d)
pTagIndex++
default: // Other tags - creates Tag nodes and TAGGED_WITH relationships
// For non-e/p tags, use direct string conversion (no binary encoding)
tagValue := string(tagItem.T[1])
// Create tag node and relationship
typeParam := fmt.Sprintf("tagType_%d", tagNodeIndex)
valueParam := fmt.Sprintf("tagValue_%d", tagNodeIndex)

View File

@@ -220,11 +220,12 @@ func (p *SocialEventProcessor) processReport(ctx context.Context, ev *event.E) e
var reportedPubkey string
var reportType string = "other" // default
for _, tag := range *ev.Tags {
if len(tag.T) >= 2 && string(tag.T[0]) == "p" {
reportedPubkey = string(tag.T[1])
if len(tag.T) >= 3 {
reportType = string(tag.T[2])
for _, t := range *ev.Tags {
if len(t.T) >= 2 && string(t.T[0]) == "p" {
// Use ExtractPTagValue to handle binary encoding and normalize to lowercase
reportedPubkey = ExtractPTagValue(t)
if len(t.T) >= 3 {
reportType = string(t.T[2])
}
break // Use first p-tag
}
@@ -574,14 +575,17 @@ func (p *SocialEventProcessor) BatchProcessContactLists(ctx context.Context, eve
// Helper functions
// extractPTags extracts unique pubkeys from p-tags
// Uses ExtractPTagValue to properly handle binary-encoded tag values
// and normalizes to lowercase hex for consistent Neo4j storage
func extractPTags(ev *event.E) []string {
seen := make(map[string]bool)
var pubkeys []string
for _, tag := range *ev.Tags {
if len(tag.T) >= 2 && string(tag.T[0]) == "p" {
pubkey := string(tag.T[1])
if len(pubkey) == 64 && !seen[pubkey] { // Basic validation: 64 hex chars
for _, t := range *ev.Tags {
if len(t.T) >= 2 && string(t.T[0]) == "p" {
// Use ExtractPTagValue to handle binary encoding and normalize to lowercase
pubkey := ExtractPTagValue(t)
if IsValidHexPubkey(pubkey) && !seen[pubkey] {
seen[pubkey] = true
pubkeys = append(pubkeys, pubkey)
}