simplify fulltext keys to just be values

This commit is contained in:
2025-05-14 11:42:53 -01:06
parent 4d4505ffd6
commit 35012590a3
4 changed files with 34 additions and 33 deletions

View File

@@ -22,7 +22,7 @@ import (
// requested not to, so that the event can't be saved again.
func (r *T) DeleteEvent(c context.T, eid *eventid.T, noTombstone ...bool) (err error) {
var foundSerial []byte
seri := serial.New(nil)
ser := serial.New(nil)
err = r.View(func(txn *badger.Txn) (err error) {
// query event by id to ensure we don't try to save duplicates
prf := prefixes.Id.Key(id.New(eid))
@@ -34,9 +34,9 @@ func (r *T) DeleteEvent(c context.T, eid *eventid.T, noTombstone ...bool) (err e
// get the serial
k = it.Item().Key()
// copy serial out
keys.Read(k, index.Empty(), id.New(&eventid.T{}), seri)
keys.Read(k, index.Empty(), id.New(&eventid.T{}), ser)
// save into foundSerial
foundSerial = seri.Val
foundSerial = ser.Val
}
return
})
@@ -49,10 +49,11 @@ func (r *T) DeleteEvent(c context.T, eid *eventid.T, noTombstone ...bool) (err e
var indexKeys [][]byte
ev := event.New()
var evKey, evb, tombstoneKey []byte
var w, l [][]byte
// fetch the event to get its index keys
err = r.View(func(txn *badger.Txn) (err error) {
// retrieve the event record
evKey = keys.Write(index.New(prefixes.Event), seri)
evKey = keys.Write(index.New(prefixes.Event), ser)
it := txn.NewIterator(badger.IteratorOptions{})
defer it.Close()
it.Seek(evKey)
@@ -63,7 +64,9 @@ func (r *T) DeleteEvent(c context.T, eid *eventid.T, noTombstone ...bool) (err e
if _, err = r.Unmarshal(ev, evb); chk.E(err) {
return
}
indexKeys = GetIndexKeysForEvent(ev, seri)
indexKeys = GetIndexKeysForEvent(ev, ser)
indexKeys = append(indexKeys, r.GetFulltextKeys(ev, ser)...)
indexKeys = append(indexKeys, r.GetLangKeys(ev, ser)...)
// we don't make tombstones for replacements, but it is better to shift that
// logic outside of this closure.
if len(noTombstone) > 0 && !noTombstone[0] {
@@ -77,6 +80,8 @@ func (r *T) DeleteEvent(c context.T, eid *eventid.T, noTombstone ...bool) (err e
if chk.E(err) {
return
}
_, _ = w, l
err = r.Update(func(txn *badger.Txn) (err error) {
if err = txn.Delete(evKey); chk.E(err) {
}

View File

@@ -28,36 +28,12 @@ func (r *T) WriteFulltextIndex(w *Words) (err error) {
}
r.WG.Add(1)
defer r.WG.Done()
// log.I.F("making fulltext index for %d", w.ser.Uint64())
for i := range w.wordMap {
retry:
if err = r.Update(func(txn *badger.Txn) (err error) {
prf := prefixes.FulltextIndex.Key(arb.New(i))
var item2 *badger.Item
if item2, err = txn.Get(prf); err != nil {
// make a new record
if err = txn.Set(prf, w.ser.Val); chk.E(err) {
return
}
} else {
if item2.KeySize() == int64(len(prf)) {
select {
case <-r.Ctx.Done():
return
default:
}
var val2 []byte
if val2, err = item2.ValueCopy(nil); chk.E(err) {
return
}
if !bytes.Contains(val2, w.ser.Val) {
val2 = append(val2, w.ser.Val...)
if err = txn.Set(prf, val2); chk.E(err) {
return
}
}
return
}
key := prefixes.FulltextIndex.Key(arb.New(i), w.ser)
if err = txn.Set(key, nil); chk.E(err) {
return
}
return
}); chk.E(err) {
@@ -67,6 +43,15 @@ func (r *T) WriteFulltextIndex(w *Words) (err error) {
return
}
func (r *T) GetFulltextKeys(ev *event.T, ser *serial.T) (keys [][]byte) {
w := r.GetWordsFromContent(ev)
for i := range w {
key := prefixes.FulltextIndex.Key(arb.New(i), ser)
keys = append(keys, key)
}
return
}
func (r *T) GetWordsFromContent(ev *event.T) (wordMap map[string]struct{}) {
wordMap = make(map[string]struct{})
if ev.Kind.IsText() {

View File

@@ -49,6 +49,15 @@ retry:
return
}
func (r *T) GetLangKeys(ev *event.T, ser *serial.T) (keys [][]byte) {
langs := r.GetLangTags(ev)
for _, v := range langs {
key := prefixes.LangIndex.Key(lang.New(v), ser)
keys = append(keys, key)
}
return
}
func (r *T) GetLangTags(ev *event.T) (langs []string) {
if ev.Kind.IsText() {
tgs := ev.Tags.GetAll(tag.New("l"))

View File

@@ -117,8 +117,10 @@ const (
Configuration
// FulltextIndex is an inverted index that maps a search term to the events it is found in.
// The keys are written with the word and serial to make handling them easier, even though
// it's a little bigger, it's much faster to search, which is what matters.
//
// [ 15 ][ word ] [ array of serials of events containing the word ]
// [ 15 ][ word ][ serial ]
FulltextIndex
// LangIndex is an index of events with language tags. These use ISO639-2 3-letter codes