This commit is contained in:
@@ -14,7 +14,7 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
currentVersion uint32 = 1
|
||||
currentVersion uint32 = 2
|
||||
)
|
||||
|
||||
func (d *D) RunMigrations() {
|
||||
@@ -56,22 +56,8 @@ func (d *D) RunMigrations() {
|
||||
}
|
||||
if dbVersion == 0 {
|
||||
log.D.F("no version tag found, creating...")
|
||||
// write the version tag now
|
||||
if err = d.Update(
|
||||
func(txn *badger.Txn) (err error) {
|
||||
buf := new(bytes.Buffer)
|
||||
vv := new(types.Uint32)
|
||||
vv.Set(currentVersion)
|
||||
log.I.S(vv)
|
||||
if err = indexes.VersionEnc(vv).MarshalWrite(buf); chk.E(err) {
|
||||
return
|
||||
}
|
||||
if err = txn.Set(buf.Bytes(), nil); chk.E(err) {
|
||||
return
|
||||
}
|
||||
return
|
||||
},
|
||||
); chk.E(err) {
|
||||
// write the version tag now (ensure any old tags are removed first)
|
||||
if err = d.writeVersionTag(currentVersion); chk.E(err) {
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -79,7 +65,136 @@ func (d *D) RunMigrations() {
|
||||
log.I.F("migrating to version 1...")
|
||||
// the first migration is expiration tags
|
||||
d.UpdateExpirationTags()
|
||||
// bump to version 1
|
||||
_ = d.writeVersionTag(1)
|
||||
}
|
||||
if dbVersion < 2 {
|
||||
log.I.F("migrating to version 2...")
|
||||
// backfill word indexes
|
||||
d.UpdateWordIndexes()
|
||||
// bump to version 2
|
||||
_ = d.writeVersionTag(2)
|
||||
}
|
||||
}
|
||||
|
||||
// writeVersionTag writes a new version tag key to the database (no value)
|
||||
func (d *D) writeVersionTag(ver uint32) (err error) {
|
||||
return d.Update(
|
||||
func(txn *badger.Txn) (err error) {
|
||||
// delete any existing version keys first (there should only be one, but be safe)
|
||||
verPrf := new(bytes.Buffer)
|
||||
if _, err = indexes.VersionPrefix.Write(verPrf); chk.E(err) {
|
||||
return
|
||||
}
|
||||
it := txn.NewIterator(badger.IteratorOptions{Prefix: verPrf.Bytes()})
|
||||
defer it.Close()
|
||||
for it.Rewind(); it.Valid(); it.Next() {
|
||||
item := it.Item()
|
||||
key := item.KeyCopy(nil)
|
||||
if err = txn.Delete(key); chk.E(err) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// now write the new version key
|
||||
buf := new(bytes.Buffer)
|
||||
vv := new(types.Uint32)
|
||||
vv.Set(ver)
|
||||
if err = indexes.VersionEnc(vv).MarshalWrite(buf); chk.E(err) {
|
||||
return
|
||||
}
|
||||
return txn.Set(buf.Bytes(), nil)
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
func (d *D) UpdateWordIndexes() {
|
||||
log.T.F("updating word indexes...")
|
||||
var err error
|
||||
var wordIndexes [][]byte
|
||||
// iterate all events and generate word index keys from content and tags
|
||||
if err = d.View(
|
||||
func(txn *badger.Txn) (err error) {
|
||||
prf := new(bytes.Buffer)
|
||||
if err = indexes.EventEnc(nil).MarshalWrite(prf); chk.E(err) {
|
||||
return
|
||||
}
|
||||
it := txn.NewIterator(badger.IteratorOptions{Prefix: prf.Bytes()})
|
||||
defer it.Close()
|
||||
for it.Rewind(); it.Valid(); it.Next() {
|
||||
item := it.Item()
|
||||
var val []byte
|
||||
if val, err = item.ValueCopy(nil); chk.E(err) {
|
||||
continue
|
||||
}
|
||||
// decode the event
|
||||
ev := new(event.E)
|
||||
if err = ev.UnmarshalBinary(bytes.NewBuffer(val)); chk.E(err) {
|
||||
continue
|
||||
}
|
||||
// log.I.F("updating word indexes for event: %s", ev.Serialize())
|
||||
// read serial from key
|
||||
key := item.Key()
|
||||
ser := indexes.EventVars()
|
||||
if err = indexes.EventDec(ser).UnmarshalRead(bytes.NewBuffer(key)); chk.E(err) {
|
||||
continue
|
||||
}
|
||||
// collect unique word hashes for this event
|
||||
seen := make(map[string]struct{})
|
||||
// from content
|
||||
if len(ev.Content) > 0 {
|
||||
for _, h := range TokenHashes(ev.Content) {
|
||||
seen[string(h)] = struct{}{}
|
||||
}
|
||||
}
|
||||
// from all tag fields (key and values)
|
||||
if ev.Tags != nil && ev.Tags.Len() > 0 {
|
||||
for _, t := range *ev.Tags {
|
||||
for _, field := range t.T {
|
||||
if len(field) == 0 {
|
||||
continue
|
||||
}
|
||||
for _, h := range TokenHashes(field) {
|
||||
seen[string(h)] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// build keys
|
||||
for k := range seen {
|
||||
w := new(types.Word)
|
||||
w.FromWord([]byte(k))
|
||||
buf := new(bytes.Buffer)
|
||||
if err = indexes.WordEnc(
|
||||
w, ser,
|
||||
).MarshalWrite(buf); chk.E(err) {
|
||||
continue
|
||||
}
|
||||
wordIndexes = append(wordIndexes, buf.Bytes())
|
||||
}
|
||||
}
|
||||
return
|
||||
},
|
||||
); chk.E(err) {
|
||||
return
|
||||
}
|
||||
// sort the indexes for ordered writes
|
||||
sort.Slice(
|
||||
wordIndexes, func(i, j int) bool {
|
||||
return bytes.Compare(
|
||||
wordIndexes[i], wordIndexes[j],
|
||||
) < 0
|
||||
},
|
||||
)
|
||||
// write in a batch
|
||||
batch := d.NewWriteBatch()
|
||||
for _, v := range wordIndexes {
|
||||
if err = batch.Set(v, nil); chk.E(err) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
_ = batch.Flush()
|
||||
log.T.F("finished updating word indexes...")
|
||||
}
|
||||
|
||||
func (d *D) UpdateExpirationTags() {
|
||||
|
||||
194
pkg/database/query-events-search_test.go
Normal file
194
pkg/database/query-events-search_test.go
Normal file
@@ -0,0 +1,194 @@
|
||||
package database
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"lol.mleku.dev/chk"
|
||||
"next.orly.dev/pkg/crypto/p256k"
|
||||
"next.orly.dev/pkg/encoders/event"
|
||||
"next.orly.dev/pkg/encoders/filter"
|
||||
"next.orly.dev/pkg/encoders/kind"
|
||||
"next.orly.dev/pkg/encoders/tag"
|
||||
"next.orly.dev/pkg/encoders/timestamp"
|
||||
)
|
||||
|
||||
// helper to create a fresh DB
|
||||
func newTestDB(t *testing.T) (*D, context.Context, context.CancelFunc, string) {
|
||||
t.Helper()
|
||||
tempDir, err := os.MkdirTemp("", "search-db-*")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp dir: %v", err)
|
||||
}
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
db, err := New(ctx, cancel, tempDir, "error")
|
||||
if err != nil {
|
||||
cancel()
|
||||
os.RemoveAll(tempDir)
|
||||
t.Fatalf("Failed to init DB: %v", err)
|
||||
}
|
||||
return db, ctx, cancel, tempDir
|
||||
}
|
||||
|
||||
// TestQueryEventsBySearchTerms creates a small set of events with content and tags,
|
||||
// saves them, then queries using filter.Search to ensure the word index works.
|
||||
func TestQueryEventsBySearchTerms(t *testing.T) {
|
||||
db, ctx, cancel, tempDir := newTestDB(t)
|
||||
defer func() {
|
||||
// cancel context first to stop background routines cleanly
|
||||
cancel()
|
||||
db.Close()
|
||||
os.RemoveAll(tempDir)
|
||||
}()
|
||||
|
||||
// signer for all events
|
||||
sign := new(p256k.Signer)
|
||||
if err := sign.Generate(); chk.E(err) {
|
||||
t.Fatalf("signer generate: %v", err)
|
||||
}
|
||||
|
||||
now := timestamp.Now().V
|
||||
|
||||
// Events to cover tokenizer rules:
|
||||
// - regular words
|
||||
// - URLs ignored
|
||||
// - 64-char hex ignored
|
||||
// - nostr: URIs ignored
|
||||
// - #[n] mentions ignored
|
||||
// - tag fields included in search
|
||||
|
||||
// 1. Contains words: "alpha beta", plus URL and hex (ignored)
|
||||
ev1 := event.New()
|
||||
ev1.Kind = kind.TextNote.K
|
||||
ev1.Pubkey = sign.Pub()
|
||||
ev1.CreatedAt = now - 5
|
||||
ev1.Content = []byte("Alpha beta visit https://example.com deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
|
||||
ev1.Tags = tag.NewS()
|
||||
ev1.Sign(sign)
|
||||
if _, _, err := db.SaveEvent(ctx, ev1); err != nil {
|
||||
t.Fatalf("save ev1: %v", err)
|
||||
}
|
||||
|
||||
// 2. Contains overlap word "beta" and unique "gamma" and nostr: URI ignored
|
||||
ev2 := event.New()
|
||||
ev2.Kind = kind.TextNote.K
|
||||
ev2.Pubkey = sign.Pub()
|
||||
ev2.CreatedAt = now - 4
|
||||
ev2.Content = []byte("beta and GAMMA with nostr:nevent1qqqqq")
|
||||
ev2.Tags = tag.NewS()
|
||||
ev2.Sign(sign)
|
||||
if _, _, err := db.SaveEvent(ctx, ev2); err != nil {
|
||||
t.Fatalf("save ev2: %v", err)
|
||||
}
|
||||
|
||||
// 3. Contains only a URL (should not create word tokens) and mention #[1] (ignored)
|
||||
ev3 := event.New()
|
||||
ev3.Kind = kind.TextNote.K
|
||||
ev3.Pubkey = sign.Pub()
|
||||
ev3.CreatedAt = now - 3
|
||||
ev3.Content = []byte("see www.example.org #[1]")
|
||||
ev3.Tags = tag.NewS()
|
||||
ev3.Sign(sign)
|
||||
if _, _, err := db.SaveEvent(ctx, ev3); err != nil {
|
||||
t.Fatalf("save ev3: %v", err)
|
||||
}
|
||||
|
||||
// 4. No content words, but tag value has searchable words: "delta epsilon"
|
||||
ev4 := event.New()
|
||||
ev4.Kind = kind.TextNote.K
|
||||
ev4.Pubkey = sign.Pub()
|
||||
ev4.CreatedAt = now - 2
|
||||
ev4.Content = []byte("")
|
||||
ev4.Tags = tag.NewS()
|
||||
*ev4.Tags = append(*ev4.Tags, tag.NewFromAny("t", "delta epsilon"))
|
||||
ev4.Sign(sign)
|
||||
if _, _, err := db.SaveEvent(ctx, ev4); err != nil {
|
||||
t.Fatalf("save ev4: %v", err)
|
||||
}
|
||||
|
||||
// 5. Another event with both content and tag tokens for ordering checks
|
||||
ev5 := event.New()
|
||||
ev5.Kind = kind.TextNote.K
|
||||
ev5.Pubkey = sign.Pub()
|
||||
ev5.CreatedAt = now - 1
|
||||
ev5.Content = []byte("alpha DELTA mixed-case and link http://foo.bar")
|
||||
ev5.Tags = tag.NewS()
|
||||
*ev5.Tags = append(*ev5.Tags, tag.NewFromAny("t", "zeta"))
|
||||
ev5.Sign(sign)
|
||||
if _, _, err := db.SaveEvent(ctx, ev5); err != nil {
|
||||
t.Fatalf("save ev5: %v", err)
|
||||
}
|
||||
|
||||
// Small sleep to ensure created_at ordering is the only factor
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
|
||||
// Helper to run a search and return IDs
|
||||
run := func(q string) ([]*event.E, error) {
|
||||
f := &filter.F{Search: []byte(q)}
|
||||
return db.QueryEvents(ctx, f)
|
||||
}
|
||||
|
||||
// Single-term search: alpha -> should match ev1 and ev5 ordered by created_at desc (ev5 newer)
|
||||
if evs, err := run("alpha"); err != nil {
|
||||
t.Fatalf("search alpha: %v", err)
|
||||
} else {
|
||||
if len(evs) != 2 {
|
||||
t.Fatalf("alpha expected 2 results, got %d", len(evs))
|
||||
}
|
||||
if !(evs[0].CreatedAt >= evs[1].CreatedAt) {
|
||||
t.Fatalf("results not ordered by created_at desc")
|
||||
}
|
||||
}
|
||||
|
||||
// Overlap term beta -> ev1 and ev2
|
||||
if evs, err := run("beta"); err != nil {
|
||||
t.Fatalf("search beta: %v", err)
|
||||
} else if len(evs) != 2 {
|
||||
t.Fatalf("beta expected 2 results, got %d", len(evs))
|
||||
}
|
||||
|
||||
// Unique term gamma -> only ev2
|
||||
if evs, err := run("gamma"); err != nil {
|
||||
t.Fatalf("search gamma: %v", err)
|
||||
} else if len(evs) != 1 {
|
||||
t.Fatalf("gamma expected 1 result, got %d", len(evs))
|
||||
}
|
||||
|
||||
// URL terms should be ignored: example -> appears only as URL in ev1/ev3/ev5; tokenizer ignores URLs so expect 0
|
||||
if evs, err := run("example"); err != nil {
|
||||
t.Fatalf("search example: %v", err)
|
||||
} else if len(evs) != 0 {
|
||||
t.Fatalf("example expected 0 results (URL tokens ignored), got %d", len(evs))
|
||||
}
|
||||
|
||||
// Tag words searchable: delta should match ev4 and ev5 (delta in tag for ev4, in content for ev5)
|
||||
if evs, err := run("delta"); err != nil {
|
||||
t.Fatalf("search delta: %v", err)
|
||||
} else if len(evs) != 2 {
|
||||
t.Fatalf("delta expected 2 results, got %d", len(evs))
|
||||
}
|
||||
|
||||
// Very short token ignored: single-letter should yield 0
|
||||
if evs, err := run("a"); err != nil {
|
||||
t.Fatalf("search short token: %v", err)
|
||||
} else if len(evs) != 0 {
|
||||
t.Fatalf("single-letter expected 0 results, got %d", len(evs))
|
||||
}
|
||||
|
||||
// 64-char hex should be ignored
|
||||
hex64 := "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
|
||||
if evs, err := run(hex64); err != nil {
|
||||
t.Fatalf("search hex64: %v", err)
|
||||
} else if len(evs) != 0 {
|
||||
t.Fatalf("hex64 expected 0 results, got %d", len(evs))
|
||||
}
|
||||
|
||||
// nostr: scheme ignored
|
||||
if evs, err := run("nostr:nevent1qqqqq"); err != nil {
|
||||
t.Fatalf("search nostr: %v", err)
|
||||
} else if len(evs) != 0 {
|
||||
t.Fatalf("nostr: expected 0 results, got %d", len(evs))
|
||||
}
|
||||
}
|
||||
@@ -71,7 +71,12 @@ func TokenHashes(content []byte) [][]byte {
|
||||
}
|
||||
break
|
||||
}
|
||||
_ = start
|
||||
// If we didn't consume any rune for a word, advance by one rune to avoid stalling
|
||||
if i == start {
|
||||
_, size2 := utf8DecodeRuneInString(s[i:])
|
||||
i += size2
|
||||
continue
|
||||
}
|
||||
if len(runes) >= 2 {
|
||||
w := string(runes)
|
||||
// Exclude 64-char hex strings
|
||||
|
||||
@@ -1 +1 @@
|
||||
v0.8.9
|
||||
v0.9.0
|
||||
Reference in New Issue
Block a user