Add full-text search indexing for word tokens and update tokenization logic

- Introduced word index (`WordPrefix`) for tokenized search terms. - Added word token extraction in event and filter processing. - Implemented Unicode-aware, case-insensitive tokenizer with URL, mention, and hex filters. - Extended full-text indexing to include tags and content.
2025-10-01 15:03:41 +01:00
parent 7e6adf9fba
commit 86ac7b7897
7 changed files with 253 additions and 5 deletions
--- a/pkg/database/save-event.go
+++ b/pkg/database/save-event.go
@@ -9,10 +9,12 @@ import (

 	"github.com/dgraph-io/badger/v4"
 	"lol.mleku.dev/chk"
+	"lol.mleku.dev/log"
 	"next.orly.dev/pkg/database/indexes"
 	"next.orly.dev/pkg/database/indexes/types"
 	"next.orly.dev/pkg/encoders/event"
 	"next.orly.dev/pkg/encoders/filter"
+	"next.orly.dev/pkg/encoders/hex"
 	"next.orly.dev/pkg/encoders/kind"
 	"next.orly.dev/pkg/encoders/tag"
 )
@@ -230,10 +232,10 @@ func (d *D) SaveEvent(c context.Context, ev *event.E) (kc, vc int, err error) {
 			return
 		},
 	)
-	// log.T.F(
-	// 	"total data written: %d bytes keys %d bytes values for event ID %s", kc,
-	// 	vc, hex.Enc(ev.ID),
-	// )
+	log.T.F(
+		"total data written: %d bytes keys %d bytes values for event ID %s", kc,
+		vc, hex.Enc(ev.ID),
+	)
 	// log.T.C(
 	// 	func() string {
 	// 		return fmt.Sprintf("event:\n%s\n", ev.Serialize())