Remove Dgraph, check hex field case, reject if any uppercase
Some checks failed
Go / build-and-release (push) Has been cancelled
Some checks failed
Go / build-and-release (push) Has been cancelled
This commit is contained in:
103
pkg/neo4j/hex_utils.go
Normal file
103
pkg/neo4j/hex_utils.go
Normal file
@@ -0,0 +1,103 @@
|
||||
// Package neo4j provides hex utilities for normalizing pubkeys and event IDs.
|
||||
//
|
||||
// The nostr library applies binary optimization to e/p tags, storing 64-character
|
||||
// hex strings as 33-byte binary (32 bytes + null terminator). This file provides
|
||||
// utilities to ensure all pubkeys and event IDs stored in Neo4j are in consistent
|
||||
// lowercase hex format.
|
||||
package neo4j
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"git.mleku.dev/mleku/nostr/encoders/hex"
|
||||
"git.mleku.dev/mleku/nostr/encoders/tag"
|
||||
)
|
||||
|
||||
// Tag binary encoding constants (matching the nostr library)
|
||||
const (
|
||||
// BinaryEncodedLen is the length of a binary-encoded 32-byte hash with null terminator
|
||||
BinaryEncodedLen = 33
|
||||
// HexEncodedLen is the length of a hex-encoded 32-byte hash (pubkey or event ID)
|
||||
HexEncodedLen = 64
|
||||
// HashLen is the raw length of a hash (pubkey/event ID)
|
||||
HashLen = 32
|
||||
)
|
||||
|
||||
// IsBinaryEncoded checks if a value is stored in the nostr library's binary-optimized format
|
||||
func IsBinaryEncoded(val []byte) bool {
|
||||
return len(val) == BinaryEncodedLen && val[HashLen] == 0
|
||||
}
|
||||
|
||||
// NormalizePubkeyHex ensures a pubkey/event ID is in lowercase hex format.
|
||||
// It handles:
|
||||
// - Binary-encoded values (33 bytes with null terminator) -> converts to lowercase hex
|
||||
// - Uppercase hex strings -> converts to lowercase
|
||||
// - Already lowercase hex -> returns as-is
|
||||
//
|
||||
// This should be used for all pubkeys and event IDs before storing in Neo4j
|
||||
// to prevent duplicate nodes due to case differences.
|
||||
func NormalizePubkeyHex(val []byte) string {
|
||||
// Handle binary-encoded values from the nostr library
|
||||
if IsBinaryEncoded(val) {
|
||||
// Convert binary to lowercase hex
|
||||
return hex.Enc(val[:HashLen])
|
||||
}
|
||||
|
||||
// Handle hex strings (may be uppercase from external sources)
|
||||
if len(val) == HexEncodedLen {
|
||||
return strings.ToLower(string(val))
|
||||
}
|
||||
|
||||
// For other lengths (possibly prefixes), lowercase the hex
|
||||
return strings.ToLower(string(val))
|
||||
}
|
||||
|
||||
// ExtractPTagValue extracts a pubkey from a p-tag, handling binary encoding.
|
||||
// Returns lowercase hex string suitable for Neo4j storage.
|
||||
// Returns empty string if the tag doesn't have a valid value.
|
||||
func ExtractPTagValue(t *tag.T) string {
|
||||
if t == nil || len(t.T) < 2 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Use ValueHex() which properly handles both binary and hex formats
|
||||
hexVal := t.ValueHex()
|
||||
if len(hexVal) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Ensure lowercase (ValueHex returns the library's encoding which is lowercase,
|
||||
// but we normalize anyway for safety with external data)
|
||||
return strings.ToLower(string(hexVal))
|
||||
}
|
||||
|
||||
// ExtractETagValue extracts an event ID from an e-tag, handling binary encoding.
|
||||
// Returns lowercase hex string suitable for Neo4j storage.
|
||||
// Returns empty string if the tag doesn't have a valid value.
|
||||
func ExtractETagValue(t *tag.T) string {
|
||||
if t == nil || len(t.T) < 2 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Use ValueHex() which properly handles both binary and hex formats
|
||||
hexVal := t.ValueHex()
|
||||
if len(hexVal) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Ensure lowercase
|
||||
return strings.ToLower(string(hexVal))
|
||||
}
|
||||
|
||||
// IsValidHexPubkey checks if a string is a valid 64-character hex pubkey
|
||||
func IsValidHexPubkey(s string) bool {
|
||||
if len(s) != HexEncodedLen {
|
||||
return false
|
||||
}
|
||||
for _, c := range s {
|
||||
if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
@@ -113,32 +113,49 @@ func (n *N) buildCypherQuery(f *filter.F, includeDeleteEvents bool) (string, map
|
||||
// Tag filters - this is where Neo4j's graph capabilities shine
|
||||
// We can efficiently traverse tag relationships
|
||||
tagIndex := 0
|
||||
for _, tagValues := range *f.Tags {
|
||||
if len(tagValues.T) > 0 {
|
||||
tagVarName := fmt.Sprintf("t%d", tagIndex)
|
||||
tagTypeParam := fmt.Sprintf("tagType_%d", tagIndex)
|
||||
tagValuesParam := fmt.Sprintf("tagValues_%d", tagIndex)
|
||||
if f.Tags != nil {
|
||||
for _, tagValues := range *f.Tags {
|
||||
if len(tagValues.T) > 0 {
|
||||
tagVarName := fmt.Sprintf("t%d", tagIndex)
|
||||
tagTypeParam := fmt.Sprintf("tagType_%d", tagIndex)
|
||||
tagValuesParam := fmt.Sprintf("tagValues_%d", tagIndex)
|
||||
|
||||
// Add tag relationship to MATCH clause
|
||||
matchClause += fmt.Sprintf(" OPTIONAL MATCH (e)-[:TAGGED_WITH]->(%s:Tag)", tagVarName)
|
||||
// Add tag relationship to MATCH clause
|
||||
matchClause += fmt.Sprintf(" OPTIONAL MATCH (e)-[:TAGGED_WITH]->(%s:Tag)", tagVarName)
|
||||
|
||||
// The first element is the tag type (e.g., "e", "p", etc.)
|
||||
tagType := string(tagValues.T[0])
|
||||
// The first element is the tag type (e.g., "e", "p", etc.)
|
||||
tagType := string(tagValues.T[0])
|
||||
|
||||
// Convert remaining tag values to strings (skip first element which is the type)
|
||||
tagValueStrings := make([]string, len(tagValues.T)-1)
|
||||
for i, tv := range tagValues.T[1:] {
|
||||
tagValueStrings[i] = string(tv)
|
||||
// Convert remaining tag values to strings (skip first element which is the type)
|
||||
// For e/p tags, use NormalizePubkeyHex to handle binary encoding and uppercase hex
|
||||
tagValueStrings := make([]string, 0, len(tagValues.T)-1)
|
||||
for _, tv := range tagValues.T[1:] {
|
||||
if tagType == "e" || tagType == "p" {
|
||||
// Normalize e/p tag values to lowercase hex (handles binary encoding)
|
||||
normalized := NormalizePubkeyHex(tv)
|
||||
if normalized != "" {
|
||||
tagValueStrings = append(tagValueStrings, normalized)
|
||||
}
|
||||
} else {
|
||||
// For other tags, use direct string conversion
|
||||
tagValueStrings = append(tagValueStrings, string(tv))
|
||||
}
|
||||
}
|
||||
|
||||
// Skip if no valid values after normalization
|
||||
if len(tagValueStrings) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Add WHERE conditions for this tag
|
||||
params[tagTypeParam] = tagType
|
||||
params[tagValuesParam] = tagValueStrings
|
||||
whereClauses = append(whereClauses,
|
||||
fmt.Sprintf("(%s.type = $%s AND %s.value IN $%s)",
|
||||
tagVarName, tagTypeParam, tagVarName, tagValuesParam))
|
||||
|
||||
tagIndex++
|
||||
}
|
||||
|
||||
// Add WHERE conditions for this tag
|
||||
params[tagTypeParam] = tagType
|
||||
params[tagValuesParam] = tagValueStrings
|
||||
whereClauses = append(whereClauses,
|
||||
fmt.Sprintf("(%s.type = $%s AND %s.value IN $%s)",
|
||||
tagVarName, tagTypeParam, tagVarName, tagValuesParam))
|
||||
|
||||
tagIndex++
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -164,10 +164,15 @@ CREATE (e)-[:AUTHORED_BY]->(a)
|
||||
}
|
||||
|
||||
tagType := string(tagItem.T[0])
|
||||
tagValue := string(tagItem.T[1])
|
||||
|
||||
switch tagType {
|
||||
case "e": // Event reference - creates REFERENCES relationship
|
||||
// Use ExtractETagValue to handle binary encoding and normalize to lowercase hex
|
||||
tagValue := ExtractETagValue(tagItem)
|
||||
if tagValue == "" {
|
||||
continue // Skip invalid e-tags
|
||||
}
|
||||
|
||||
// Create reference to another event (if it exists)
|
||||
paramName := fmt.Sprintf("eTag_%d", eTagIndex)
|
||||
params[paramName] = tagValue
|
||||
@@ -201,6 +206,12 @@ FOREACH (ignoreMe IN CASE WHEN ref%d IS NOT NULL THEN [1] ELSE [] END |
|
||||
eTagIndex++
|
||||
|
||||
case "p": // Pubkey mention - creates MENTIONS relationship
|
||||
// Use ExtractPTagValue to handle binary encoding and normalize to lowercase hex
|
||||
tagValue := ExtractPTagValue(tagItem)
|
||||
if tagValue == "" {
|
||||
continue // Skip invalid p-tags
|
||||
}
|
||||
|
||||
// Create mention to another author
|
||||
paramName := fmt.Sprintf("pTag_%d", pTagIndex)
|
||||
params[paramName] = tagValue
|
||||
@@ -214,6 +225,9 @@ CREATE (e)-[:MENTIONS]->(mentioned%d)
|
||||
pTagIndex++
|
||||
|
||||
default: // Other tags - creates Tag nodes and TAGGED_WITH relationships
|
||||
// For non-e/p tags, use direct string conversion (no binary encoding)
|
||||
tagValue := string(tagItem.T[1])
|
||||
|
||||
// Create tag node and relationship
|
||||
typeParam := fmt.Sprintf("tagType_%d", tagNodeIndex)
|
||||
valueParam := fmt.Sprintf("tagValue_%d", tagNodeIndex)
|
||||
|
||||
@@ -220,11 +220,12 @@ func (p *SocialEventProcessor) processReport(ctx context.Context, ev *event.E) e
|
||||
var reportedPubkey string
|
||||
var reportType string = "other" // default
|
||||
|
||||
for _, tag := range *ev.Tags {
|
||||
if len(tag.T) >= 2 && string(tag.T[0]) == "p" {
|
||||
reportedPubkey = string(tag.T[1])
|
||||
if len(tag.T) >= 3 {
|
||||
reportType = string(tag.T[2])
|
||||
for _, t := range *ev.Tags {
|
||||
if len(t.T) >= 2 && string(t.T[0]) == "p" {
|
||||
// Use ExtractPTagValue to handle binary encoding and normalize to lowercase
|
||||
reportedPubkey = ExtractPTagValue(t)
|
||||
if len(t.T) >= 3 {
|
||||
reportType = string(t.T[2])
|
||||
}
|
||||
break // Use first p-tag
|
||||
}
|
||||
@@ -574,14 +575,17 @@ func (p *SocialEventProcessor) BatchProcessContactLists(ctx context.Context, eve
|
||||
// Helper functions
|
||||
|
||||
// extractPTags extracts unique pubkeys from p-tags
|
||||
// Uses ExtractPTagValue to properly handle binary-encoded tag values
|
||||
// and normalizes to lowercase hex for consistent Neo4j storage
|
||||
func extractPTags(ev *event.E) []string {
|
||||
seen := make(map[string]bool)
|
||||
var pubkeys []string
|
||||
|
||||
for _, tag := range *ev.Tags {
|
||||
if len(tag.T) >= 2 && string(tag.T[0]) == "p" {
|
||||
pubkey := string(tag.T[1])
|
||||
if len(pubkey) == 64 && !seen[pubkey] { // Basic validation: 64 hex chars
|
||||
for _, t := range *ev.Tags {
|
||||
if len(t.T) >= 2 && string(t.T[0]) == "p" {
|
||||
// Use ExtractPTagValue to handle binary encoding and normalize to lowercase
|
||||
pubkey := ExtractPTagValue(t)
|
||||
if IsValidHexPubkey(pubkey) && !seen[pubkey] {
|
||||
seen[pubkey] = true
|
||||
pubkeys = append(pubkeys, pubkey)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user