diff --git a/addresstag/addresstag.go b/addresstag/addresstag.go index 5d42db4..54a6fae 100644 --- a/addresstag/addresstag.go +++ b/addresstag/addresstag.go @@ -1,21 +1,22 @@ package addresstag import ( + "bytes" "strconv" - "strings" "realy.lol/chk" "realy.lol/hex" ) // DecodeAddressTag unpacks the contents of an `a` tag. -func DecodeAddressTag(tagValue string) (k uint16, pkb []byte, d string) { - split := strings.Split(tagValue, ":") +func DecodeAddressTag(tagValue []byte) (k uint16, pkb []byte, d []byte) { + split := bytes.Split(tagValue, []byte(":")) if len(split) == 3 { var err error var key uint64 - if pkb, _ = hex.Dec(split[1]); len(pkb) == 32 { - if key, err = strconv.ParseUint(split[0], 10, 16); !chk.E(err) { + if pkb, _ = hex.DecAppend(pkb, split[1]); len(pkb) == 32 { + // todo: use ints package for this + if key, err = strconv.ParseUint(string(split[0]), 10, 16); !chk.E(err) { return uint16(key), pkb, split[2] } } diff --git a/ratel/create-a-tag.go b/ratel/create-a-tag.go index d3339ac..c3978a0 100644 --- a/ratel/create-a-tag.go +++ b/ratel/create-a-tag.go @@ -20,6 +20,8 @@ import ( // Create_a_Tag generates tag indexes from a tag key, tag value, created_at // timestamp and the event serial. +// +// This only covers what are essentially p tags and a tags. func Create_a_Tag(tagKey, tagValue string, CA *createdat.T, ser *serial.T) (prf index.P, elems []keys.Element, err error) { diff --git a/ratel/fulltext-query.go b/ratel/fulltext-query.go index b2996b7..8d9cd47 100644 --- a/ratel/fulltext-query.go +++ b/ratel/fulltext-query.go @@ -1 +1,270 @@ package ratel + +import ( + "bytes" + "sort" + "time" + + "github.com/dgraph-io/badger/v4" + + "realy.lol/chk" + "realy.lol/context" + "realy.lol/filter" + "realy.lol/log" + "realy.lol/ratel/keys/arb" + "realy.lol/ratel/keys/serial" + "realy.lol/ratel/prefixes" + "realy.lol/store" + "realy.lol/tag" +) + +type FulltextSequence struct { + inSequence int + distance int + sequence []int + items []*prefixes.FulltextIndexKey +} + +func (r *T) QueryFulltextEvents(c context.T, f *filter.T) (evs []store.IdTsPk, err error) { + start := time.Now() + // just use QueryEvents if there isn't actually any fulltext search field content. + if len(f.Search) == 0 { + return r.QueryForIds(c, f) + } + split := bytes.Split(f.Search, []byte{' '}) + var lang []byte + var terms [][]byte + for i := range split { + if bytes.HasPrefix(split[i], []byte("lang:")) { + lang = split[i][5:] + } else { + terms = append(terms, split[i]) + } + } + var fTags []*tag.T + if f.Tags != nil { + fTags = f.Tags.ToSliceOfTags() + } + fAut := f.Authors.ToSliceOfBytes() + fKinds := f.Kinds.K + var matches []*prefixes.FulltextIndexKey + if err = r.View(func(txn *badger.Txn) (err error) { + it := txn.NewIterator(badger.IteratorOptions{ + Prefix: prefixes.FulltextIndex.Key(), + Reverse: true, + }) + defer it.Close() + for _, v := range terms { + for it.Rewind(); it.ValidForPrefix(prefixes.FulltextIndex.Key(arb.New(v))); it.Next() { + item := it.Item() + k := item.KeyCopy(nil) + var idx *prefixes.FulltextIndexKey + if idx, err = prefixes.NewFulltextIndexKey(k); chk.E(err) { + continue + } + if f.Since != nil { + ts := idx.Timestamp() + if ts.I64() < f.Since.I64() { + // event is earlier than since + continue + } + } + if f.Until != nil { + ts := idx.Timestamp() + if ts.I64() > f.Until.I64() { + // event is later than until + continue + } + } + if len(fKinds) != 0 { + var found bool + ki := idx.Kind() + for _, kin := range fKinds { + if ki.Equal(kin) { + found = true + break + } + } + // kinds are present in filter and don't match + if !found { + continue + } + } + if len(fAut) > 0 { + var found bool + pk := idx.Pubkey() + for _, p := range fAut { + if bytes.Equal(p, pk) { + found = true + break + } + } + // pubkey is in filter and doesn't match + if !found { + continue + } + } + // get serial + ser := idx.Serial() + // check language tags + if len(lang) > 0 { + var found bool + func() { + itl := txn.NewIterator(badger.IteratorOptions{ + Prefix: prefixes.LangIndex.Key(), + }) + defer itl.Close() + for itl.Rewind(); itl.Valid(); itl.Next() { + s := serial.FromKey(itl.Item().KeyCopy(nil)) + if s.Uint64() == ser.Uint64() { + found = true + return + } + } + }() + // the event does not have an associated language tag + if !found { + continue + } + } + // now we can check tags, they can't be squished into a fulltext index, and + // require a second table iteration + if len(fTags) > 0 { + var found bool + for _, ft := range fTags { + if len(ft.Key()) == 2 && ft.Key()[0] == '#' { + var tp []byte + if tp, err = GetTagKeyPrefix(ft.Key()[0], ft.Value()); chk.E(err) { + continue + } + if len(tp) == 0 { + // the tag did not generate an index + continue + } + func() { + itt := txn.NewIterator(badger.IteratorOptions{ + Prefix: tp, + }) + defer itt.Close() + for itt.Rewind(); itt.Valid(); itt.Next() { + s := serial.FromKey(itt.Item().KeyCopy(nil)) + if s.Uint64() == ser.Uint64() { + found = true + return + } + } + }() + // the event does not have any of the required tags + if !found { + continue + } + } + } + if !found { + continue + } + } + // if we got to here, we have a match + matches = append(matches, idx) + } + } + return + }); chk.E(err) { + return + } + if len(matches) == 0 { + // didn't find any (?) + return + } + // next we need to group and sort the results + groups := make(map[uint64]FulltextSequence) + for _, v := range matches { + if _, ok := groups[v.Serial().Uint64()]; !ok { + groups[v.Serial().Uint64()] = FulltextSequence{items: []*prefixes.FulltextIndexKey{v}} + } else { + g := groups[v.Serial().Uint64()] + g.items = append(g.items, v) + } + } + // now we need to convert the map to a slice so we can sort it + var groupS []FulltextSequence + for _, g := range groups { + groupS = append(groupS, g) + } + // first, sort the groups by the number of elements in descending order + sort.Slice(groupS, func(i, j int) (e bool) { + return len(groupS[i].items) > len(groupS[j].items) + }) + // get the distance of the groups + for _, g := range groupS { + g.distance = int(g.items[len(g.items)-1].Sequence().Val - g.items[0].Sequence().Val) + } + // get the sequence as relates to the search terms + for _, g := range groupS { + for i := range g.items { + if i > 0 { + for k := range terms { + if bytes.Equal(g.items[i].Word(), terms[k]) { + g.sequence = append(g.sequence, i) + } + } + } + } + } + // count the number of elements of the sequence that are in ascending order + for _, g := range groupS { + for i := range g.sequence { + if i > 0 { + if g.sequence[i-1] < g.sequence[i] { + g.inSequence++ + } + } + } + } + // find the boundaries of each length segment of the group + var groupedCounts []int + var lastCount int + lastCount = len(groupS[0].items) + for i, g := range groupS { + if len(g.items) < lastCount { + groupedCounts = append(groupedCounts, i) + lastCount = len(g.items) + } + } + // break the groupS into segments of the same length + var segments [][]FulltextSequence + lastCount = 0 + for i := range groupedCounts { + segments = append(segments, groupS[lastCount:groupedCounts[i]]) + } + // sort the segments by distance and number in sequence + for _, s := range segments { + sort.Slice(s, func(i, j int) bool { + return (s[i].distance < s[j].distance) && s[i].inSequence > s[i].inSequence + }) + } + // flatten the segments back into a list + var list []FulltextSequence + for _, seg := range segments { + for _, bit := range seg { + list = append(list, bit) + } + } + // convert into store.IdTsPk + for _, bit := range list { + for _, el := range bit.items { + evs = append(evs, store.IdTsPk{ + Ts: el.Timestamp().I64(), + Id: el.EventId().Bytes(), + Pub: el.Pubkey(), + }) + } + } + if f.Limit != nil { + evs = evs[:*f.Limit] + } else { + evs = evs[:r.MaxLimit] + } + log.I.F("performed search for '%s' in %v", f.Search, time.Now().Sub(start)) + return +} diff --git a/ratel/getindexkeysforevent.go b/ratel/getindexkeysforevent.go index 670393e..fbbf5f4 100644 --- a/ratel/getindexkeysforevent.go +++ b/ratel/getindexkeysforevent.go @@ -6,7 +6,7 @@ import ( "realy.lol/chk" "realy.lol/event" "realy.lol/eventid" - "realy.lol/log" + "realy.lol/hex" "realy.lol/ratel/keys" "realy.lol/ratel/keys/createdat" "realy.lol/ratel/keys/fullid" @@ -59,15 +59,19 @@ func GetIndexKeysForEvent(ev *event.T, ser *serial.T) (keyz [][]byte) { } // ~ by tag value + date for i, t := range ev.Tags.ToSliceOfTags() { - // there is no value field + tsb := t.ToSliceOfBytes() + if t.Len() < 2 { + continue + } + tk, tv := tsb[0], tsb[1] if t.Len() < 2 || - // the tag is not a-zA-Z probably (this would permit arbitrary other - // single byte chars) - len(t.ToSliceOfBytes()[0]) != 1 || + // the tag is not a-zA-Z probably (this would permit arbitrary other single byte + // chars) + len(tk) != 1 || // the second field is zero length - len(t.ToSliceOfBytes()[1]) == 0 || + len(tv) == 0 || // the second field is more than 100 characters long - len(t.ToSliceOfBytes()[1]) > 100 { + len(tv) > 100 { // any of the above is true then the tag is not indexable continue } @@ -82,17 +86,27 @@ func GetIndexKeysForEvent(ev *event.T, ser *serial.T) (keyz [][]byte) { // duplicate continue } - // get key prefix (with full length) and offset where to write the last - // parts + // create tags for e (event references) but we don't care about the optional third value + // as it can't be searched for anyway (it's for clients to render threads) + if bytes.Equal(tk, []byte("e")) { + if len(tv) != 64 { + continue + } + var ei []byte + if ei, err = hex.DecAppend(ei, tv); chk.E(err) { + continue + } + keyz = append(keyz, prefixes.TagEventId.Key(id.New(eventid.NewWith(ei)), ser)) + continue + } + // get key prefix (with full length) and offset where to write the last parts. prf, elems := index.P(0), []keys.Element(nil) - if prf, elems, err = Create_a_Tag(string(t.ToSliceOfBytes()[0]), - string(t.ToSliceOfBytes()[1]), CA, - ser); chk.E(err) { - log.I.F("%v", t.ToStringSlice()) + if prf, elems, err = Create_a_Tag(string(tsb[0]), + string(tv), CA, ser); chk.E(err) { + // log.I.F("%v", t.ToStringSlice()) return } k := prf.Key(elems...) - // log.T.ToSliceOfBytes("tag '%s': %s key %0x", t.ToSliceOfBytes()[0], t.ToSliceOfBytes()[1:], k) keyz = append(keyz, k) } { // ~ by date only diff --git a/ratel/gettagkeyprefix.go b/ratel/gettagkeyprefix.go index 932d447..b62dabd 100644 --- a/ratel/gettagkeyprefix.go +++ b/ratel/gettagkeyprefix.go @@ -3,9 +3,11 @@ package ratel import ( eventstore "realy.lol/addresstag" "realy.lol/chk" + "realy.lol/eventid" "realy.lol/hex" "realy.lol/ratel/keys" "realy.lol/ratel/keys/arb" + "realy.lol/ratel/keys/id" "realy.lol/ratel/keys/kinder" "realy.lol/ratel/keys/pubkey" "realy.lol/ratel/prefixes" @@ -14,7 +16,7 @@ import ( // GetTagKeyPrefix returns tag index prefixes based on the initial field of a // tag. // -// There is 3 types of index tag keys: +// There is 4 types of index tag keys: // // - TagAddr: [ 8 ][ 2b Kind ][ 8b Pubkey ][ address/URL ][ 8b Serial ] // @@ -22,9 +24,17 @@ import ( // // - Tag: [ 6 ][ address/URL ][ 8b Serial ] // +// - TagEventId [ 8 bytes eventid.T prefix ][ 8 bytes Serial ] +// // This function produces the initial bytes without the index. -func GetTagKeyPrefix(tagValue string) (key []byte, err error) { - if k, pkb, d := eventstore.DecodeAddressTag(tagValue); len(pkb) == 32 { +func GetTagKeyPrefix(prf byte, tagValue []byte) (key []byte, err error) { + if prf == 'e' { + var eid []byte + if eid, err = hex.DecAppend(eid, tagValue); chk.E(err) { + return + } + key = prefixes.TagEventId.Key(id.New(eventid.NewWith(eid))) + } else if k, pkb, d := eventstore.DecodeAddressTag(tagValue); len(pkb) == 32 { // store value in the new special "a" tag index var pk *pubkey.T if pk, err = pubkey.NewFromBytes(pkb); chk.E(err) { @@ -35,7 +45,7 @@ func GetTagKeyPrefix(tagValue string) (key []byte, err error) { els = append(els, arb.New(d)) } key = prefixes.TagAddr.Key(els...) - } else if pkb, _ := hex.Dec(tagValue); len(pkb) == 32 { + } else if pkb, _ := hex.DecAppend(nil, tagValue); len(pkb) == 32 { // store value as bytes var pkk *pubkey.T if pkk, err = pubkey.NewFromBytes(pkb); chk.E(err) { diff --git a/ratel/prefixes/fulltext.go b/ratel/prefixes/fulltext.go index 5cef503..c66377d 100644 --- a/ratel/prefixes/fulltext.go +++ b/ratel/prefixes/fulltext.go @@ -33,7 +33,7 @@ type FulltextIndexKey struct { pubkey []byte timestamp *timestamp.T kind *kind.T - sequence uint32 + sequence *integer.T serial *serial.T } @@ -51,42 +51,62 @@ func (f *FulltextIndexKey) Segment(start, end int) []byte { } func (f *FulltextIndexKey) Word() (v []byte) { + if f.word != nil { + return f.word + } v = f.key[index.Len:f.endOfWord] f.word = v return } func (f *FulltextIndexKey) EventId() (v *eventid.T) { + if f.eventid != nil { + return f.eventid + } v = eventid.NewWith(f.Segment(StartOfEventId, StartOfPubkey)) f.eventid = v return } func (f *FulltextIndexKey) Pubkey() (v []byte) { + if f.pubkey != nil { + return f.pubkey + } v = f.Segment(StartOfPubkey, StartOfTimestamp) f.pubkey = v return } func (f *FulltextIndexKey) Timestamp() (v *timestamp.T) { + if f.timestamp != nil { + return f.timestamp + } v = timestamp.FromBytes(f.Segment(StartOfTimestamp, StartOfKind)) f.timestamp = v return } func (f *FulltextIndexKey) Kind() (v *kind.T) { + if f.kind != nil { + return f.kind + } v = kind.NewFromBytes(f.Segment(StartOfKind, StartOfSequence)) f.kind = v return } func (f *FulltextIndexKey) Sequence() (v *integer.T) { - v = integer.NewFrom(f.Segment(StartOfSequence, StartOfSerial)) - f.sequence = v.Val + if f.sequence != nil { + return f.sequence + } + f.sequence = integer.NewFrom(f.Segment(StartOfSequence, StartOfSerial)) return } func (f *FulltextIndexKey) Serial() (v *serial.T) { + if f.serial != nil { + return f.serial + } v = serial.New(f.Segment(StartOfSerial, len(f.key))) f.serial = v return diff --git a/ratel/prefixes/prefixes.go b/ratel/prefixes/prefixes.go index f96de3b..87883fe 100644 --- a/ratel/prefixes/prefixes.go +++ b/ratel/prefixes/prefixes.go @@ -132,6 +132,14 @@ const ( // // [ 16 ][ ISO639-2 code ][ serial ] LangIndex + + // TagEventId is a tag that is used to search for events containing e tag references to + // other events. This will greatly accelerate searches for threaded discussion and enable + // the creation of composite documents with a directed acyclic graph structure, such as git + // commits. + // + // [ 17 ][ 8 bytes eventid.T prefix ][ 8 bytes Serial ] + TagEventId ) // FilterPrefixes is a slice of the prefixes used by filter index to enable a loop diff --git a/ratel/preparequeries.go b/ratel/preparequeries.go index 28ebe16..15de2e0 100644 --- a/ratel/preparequeries.go +++ b/ratel/preparequeries.go @@ -126,15 +126,19 @@ func PrepareQueries(f *filter.T) ( ext = &filter.T{Kinds: f.Kinds} i := 0 for _, values := range f.Tags.ToSliceOfTags() { - for _, value := range values.ToSliceOfBytes()[1:] { - // get key prefix (with full length) and offset where to write the last parts - var prf []byte - if prf, err = GetTagKeyPrefix(string(value)); chk.E(err) { - continue + tsb := values.ToSliceOfBytes() + // indexable tags can only have 1 character in the key field. + if len(tsb[0]) == 1 { + for _, value := range tsb[1:] { + // get key prefix (with full length) and offset where to write the last parts + var prf []byte + if prf, err = GetTagKeyPrefix(tsb[0][0], value); chk.E(err) { + continue + } + // remove the last part to get just the prefix we want here + qs[i] = query{index: i, queryFilter: f, searchPrefix: prf} + i++ } - // remove the last part to get just the prefix we want here - qs[i] = query{index: i, queryFilter: f, searchPrefix: prf} - i++ } } // log.T.S("tags", qs) diff --git a/realy/acceptevent.go b/realy/acceptevent.go index 5b214b0..a7e0eee 100644 --- a/realy/acceptevent.go +++ b/realy/acceptevent.go @@ -44,6 +44,9 @@ func (s *Server) acceptEvent(c context.T, evt *event.T, authedPubkey []byte, } // check the mute list, and reject events authored by muted pubkeys, even if // they come from a pubkey that is on the follow list. + // + // note that some clients hide this info in the kind 10000 mute list, unfortunately. + // such as jumble. use old nostrudel or similar which still gives public readable info. for pk := range s.muted { if bytes.Equal(evt.Pubkey, []byte(pk)) { notice = "rejecting event with pubkey " + hex.Enc(evt.Pubkey) +