Add full-text search indexing for word tokens and update tokenization logic
- Introduced word index (`WordPrefix`) for tokenized search terms. - Added word token extraction in event and filter processing. - Implemented Unicode-aware, case-insensitive tokenizer with URL, mention, and hex filters. - Extended full-text indexing to include tags and content.
This commit is contained in:
@@ -9,10 +9,12 @@ import (
|
||||
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"lol.mleku.dev/chk"
|
||||
"lol.mleku.dev/log"
|
||||
"next.orly.dev/pkg/database/indexes"
|
||||
"next.orly.dev/pkg/database/indexes/types"
|
||||
"next.orly.dev/pkg/encoders/event"
|
||||
"next.orly.dev/pkg/encoders/filter"
|
||||
"next.orly.dev/pkg/encoders/hex"
|
||||
"next.orly.dev/pkg/encoders/kind"
|
||||
"next.orly.dev/pkg/encoders/tag"
|
||||
)
|
||||
@@ -230,10 +232,10 @@ func (d *D) SaveEvent(c context.Context, ev *event.E) (kc, vc int, err error) {
|
||||
return
|
||||
},
|
||||
)
|
||||
// log.T.F(
|
||||
// "total data written: %d bytes keys %d bytes values for event ID %s", kc,
|
||||
// vc, hex.Enc(ev.ID),
|
||||
// )
|
||||
log.T.F(
|
||||
"total data written: %d bytes keys %d bytes values for event ID %s", kc,
|
||||
vc, hex.Enc(ev.ID),
|
||||
)
|
||||
// log.T.C(
|
||||
// func() string {
|
||||
// return fmt.Sprintf("event:\n%s\n", ev.Serialize())
|
||||
|
||||
Reference in New Issue
Block a user