"Refactor filtering and indexing logic for event queries"

The filtering logic has been streamlined, adding enhanced query support for filters involving tags, authors, kinds, and their combinations. Introduced new utility methods for deduplication, intersection, and sorting to improve efficiency in handling event serials. Adjusted indexing structures and encoding for better modularity and readability.
2025-06-15 11:18:18 +01:00
parent d5ae20ba94
commit 321a1b47bb
7 changed files with 417 additions and 72 deletions
--- a/database/filter.go
+++ b/database/filter.go
@@ -1,9 +1,13 @@
 package database

 import (
+	"bytes"
 	"math"
+	"sort"

 	"x.realy.lol/chk"
+	"x.realy.lol/database/indexes"
+	"x.realy.lol/database/indexes/types/pubhash"
 	"x.realy.lol/database/indexes/types/varint"
 	"x.realy.lol/filter"
 	"x.realy.lol/hex"
@@ -51,7 +55,10 @@ func ToBitfield(f *filter.F) (b Bitfield) {
 	return
 }

-func (d *D) Filter(f filter.F) (evSerials []*varint.V, err error) {
+// Filter runs a nip-01 type query on a provided filter and returns the database serial keys of
+// the matching events, excluding a list of authors also provided from the result.
+func (d *D) Filter(f filter.F, exclude []*pubhash.T) (evSerials varint.S, err error) {
+	var evs varint.S
 	bf := ToBitfield(&f)
 	// first, if there is Ids these override everything else
 	if bf&hasIds != 0 {
@@ -66,7 +73,7 @@ func (d *D) Filter(f filter.F) (evSerials []*varint.V, err error) {
 				// just going to ignore it i guess
 				continue
 			}
-			evSerials = append(evSerials, ev)
+			evs = append(evs, ev)
 		}
 		return
 	}
@@ -81,44 +88,77 @@ func (d *D) Filter(f filter.F) (evSerials []*varint.V, err error) {
 	}
 	// next, check for filters that only have since and/or until
 	if bf&hasSince != 0 || bf&hasUntil != 0 {
-		if evSerials, err = d.GetEventSerialsByCreatedAtRange(since, until); chk.E(err) {
+		if evs, err = d.GetEventSerialsByCreatedAtRange(since, until); chk.E(err) {
 			return
 		}
-		return
+		goto done
 	}
 	// next, kinds
 	if bf&hasKinds == hasKinds && ^hasKinds&bf == 0 {
-		if evSerials, err = d.GetEventSerialsByKindsCreatedAtRange(f.Kinds, since, until); chk.E(err) {
+		if evs, err = d.GetEventSerialsByKindsCreatedAtRange(f.Kinds, since, until); chk.E(err) {
 			return
 		}
-		return
+		goto done
 	}
 	// next authors
 	if bf&hasAuthors == hasAuthors && ^hasAuthors&bf == 0 {
-		if evSerials, err = d.GetEventSerialsByAuthorsCreatedAtRange(f.Authors, since, until); chk.E(err) {
+		if evs, err = d.GetEventSerialsByAuthorsCreatedAtRange(f.Authors, since, until); chk.E(err) {
 			return
 		}
-		return
+		goto done
 	}
 	// next authors/kinds
-	ak := hasAuthors + hasKinds
-	if bf&(ak) == ak && ^ak&bf == 0 {
-		if evSerials, err = d.GetEventSerialsByKindsAuthorsCreatedAtRange(f.Kinds, f.Authors, since, until); chk.E(err) {
+
+	if ak := hasAuthors + hasKinds; bf&(ak) == ak && ^ak&bf == 0 {
+		if evs, err = d.GetEventSerialsByKindsAuthorsCreatedAtRange(f.Kinds, f.Authors, since, until); chk.E(err) {
 			return
 		}
-		return
+		goto done
+	}
+	// if there is tags, assemble them into an array of tags with the
+	if bf&hasTags != 0 && bf&^hasTags == 0 {
+		if evs, err = d.GetEventSerialsByTagsCreatedAtRange(f.Tags); chk.E(err) {
+
+		}
 	}
 	// next authors/tags
-	at := hasAuthors + hasTags
-	if bf&(at) == at && ^at&bf == 0 {
-		if evSerials, err = d.GetEventSerialsByKindsAuthorsCreatedAtRange(f.Kinds, f.Authors, since, until); chk.E(err) {
+	if at := hasAuthors + hasTags; bf&(at) == at && ^at&bf == 0 {
+		if evs, err = d.GetEventSerialsByAuthorsTagsCreatedAtRange(f.Tags, f.Authors, since, until); chk.E(err) {
 			return
 		}
-		return
+		goto done
 	}
-
 	// next kinds/tags
-
-	// next
+	if kt := hasKinds + hasTags; bf&(kt) == kt && ^kt&bf == 0 {
+		if evs, err = d.GetEventSerialsByKindsTagsCreatedAtRange(f.Tags, f.Kinds, since, until); chk.E(err) {
+			return
+		}
+		goto done
+	}
+	// next kinds/authors/tags
+	if kat := hasAuthors + hasTags; bf&(kat) == kat && ^kat&bf == 0 {
+		if evs, err = d.GetEventSerialsByKindsAuthorsTagsCreatedAtRange(f.Tags, f.Kinds, f.Authors, since, until); chk.E(err) {
+			return
+		}
+		goto done
+	}
+done:
+	// scan the FullIndex for these serials, and sort them by descending created_at
+	var index []indexes.FullIndex
+	if index, err = d.GetFullIndexesFromSerials(evs); chk.E(err) {
+		return
+	}
+	// sort by reverse chronological order
+	sort.Slice(index, func(i, j int) bool {
+		return index[i].CreatedAt.ToTimestamp() > index[j].CreatedAt.ToTimestamp()
+	})
+	for _, item := range index {
+		for _, x := range exclude {
+			if bytes.Equal(item.Pubkey.Bytes(), x.Bytes()) {
+				continue
+			}
+		}
+		evSerials = append(evSerials, item.Ser)
+	}
 	return
 }
--- a/database/find.go
+++ b/database/find.go
@@ -2,7 +2,6 @@ package database

 import (
 	"bytes"
-	"math"

 	"github.com/dgraph-io/badger/v4"

@@ -14,6 +13,9 @@ import (
 	"x.realy.lol/database/indexes/types/varint"
 	"x.realy.lol/errorf"
 	"x.realy.lol/event"
+	"x.realy.lol/filter"
+	"x.realy.lol/log"
+	"x.realy.lol/tags"
 	"x.realy.lol/timestamp"
 )

@@ -115,13 +117,12 @@ func (d *D) GetEventById(evId []byte) (ev *event.E, err error) {

 // GetEventSerialsByCreatedAtRange returns the serials of events with the given since/until
 // range in reverse chronological order (starting at until, going back to since).
-func (d *D) GetEventSerialsByCreatedAtRange(since, until timestamp.Timestamp) (sers []*varint.V, err error) {
+func (d *D) GetEventSerialsByCreatedAtRange(since, until timestamp.Timestamp) (sers varint.S, err error) {
 	// get the start (end) max possible index prefix
-	startCreatedAt, startSer := indexes.CreatedAtVars()
+	startCreatedAt, _ := indexes.CreatedAtVars()
 	startCreatedAt.FromInt(until.ToInt())
-	startSer.FromUint64(math.MaxUint64)
 	prf := new(bytes.Buffer)
-	if err = indexes.CreatedAtEnc(startCreatedAt, startSer).MarshalWrite(prf); chk.E(err) {
+	if err = indexes.CreatedAtEnc(startCreatedAt, nil).MarshalWrite(prf); chk.E(err) {
 		return
 	}
 	if err = d.View(func(txn *badger.Txn) (err error) {
@@ -149,16 +150,64 @@ func (d *D) GetEventSerialsByCreatedAtRange(since, until timestamp.Timestamp) (s
 	return
 }

-func (d *D) GetEventSerialsByKindsCreatedAtRange(kinds []int, since, until timestamp.Timestamp) (sers []*varint.V, err error) {
+func (d *D) GetEventSerialsByKindsCreatedAtRange(kinds []int, since, until timestamp.Timestamp) (sers varint.S, err error) {
 	// get the start (end) max possible index prefix, one for each kind in the list
 	var searchIdxs [][]byte
-	for _, k := range kinds {
-		kind, startCreatedAt, startSer := indexes.KindCreatedAtVars()
-		kind.Set(k)
+	kind, startCreatedAt, _ := indexes.KindCreatedAtVars()
 	startCreatedAt.FromInt(until.ToInt())
-		startSer.FromUint64(math.MaxUint64)
+	for _, k := range kinds {
+		kind.Set(k)
 		prf := new(bytes.Buffer)
-		if err = indexes.KindCreatedAtEnc(kind, startCreatedAt, startSer).MarshalWrite(prf); chk.E(err) {
+		if err = indexes.KindCreatedAtEnc(kind, startCreatedAt, nil).MarshalWrite(prf); chk.E(err) {
+			return
+		}
+		searchIdxs = append(searchIdxs, prf.Bytes())
+	}
+	for _, idx := range searchIdxs {
+		if err = d.View(func(txn *badger.Txn) (err error) {
+			it := txn.NewIterator(badger.IteratorOptions{Reverse: true, Prefix: idx})
+			defer it.Close()
+			key := make([]byte, 10)
+			for it.Rewind(); it.Valid(); it.Next() {
+				item := it.Item()
+				key = item.KeyCopy(key)
+				ki, ca, ser := indexes.KindCreatedAtVars()
+				buf := bytes.NewBuffer(key)
+				if err = indexes.KindCreatedAtDec(ki, ca, ser).UnmarshalRead(buf); chk.E(err) {
+					// skip it then
+					continue
+				}
+				if ca.ToTimestamp() < since {
+					break
+				}
+				sers = append(sers, ser)
+			}
+			return
+		}); chk.E(err) {
+			return
+		}
+	}
+	return
+}
+
+func (d *D) GetEventSerialsByAuthorsCreatedAtRange(pubkeys []string, since, until timestamp.Timestamp) (sers varint.S, err error) {
+	// get the start (end) max possible index prefix, one for each kind in the list
+	var searchIdxs [][]byte
+	var pkDecodeErrs int
+	pubkey, startCreatedAt, _ := indexes.PubkeyCreatedAtVars()
+	startCreatedAt.FromInt(until.ToInt())
+	for _, p := range pubkeys {
+		if err = pubkey.FromPubkeyHex(p); chk.E(err) {
+			// gracefully ignore wrong keys
+			pkDecodeErrs++
+			continue
+		}
+		if pkDecodeErrs == len(pubkeys) {
+			err = errorf.E("all pubkeys in authors field of filter failed to decode")
+			return
+		}
+		prf := new(bytes.Buffer)
+		if err = indexes.PubkeyCreatedAtEnc(pubkey, startCreatedAt, nil).MarshalWrite(prf); chk.E(err) {
 			return
 		}
 		searchIdxs = append(searchIdxs, prf.Bytes())
@@ -190,12 +239,14 @@ func (d *D) GetEventSerialsByKindsCreatedAtRange(kinds []int, since, until times
 	return
 }

-func (d *D) GetEventSerialsByAuthorsCreatedAtRange(pubkeys []string, since, until timestamp.Timestamp) (sers []*varint.V, err error) {
+func (d *D) GetEventSerialsByKindsAuthorsCreatedAtRange(kinds []int, pubkeys []string, since, until timestamp.Timestamp) (sers varint.S, err error) {
 	// get the start (end) max possible index prefix, one for each kind in the list
 	var searchIdxs [][]byte
 	var pkDecodeErrs int
+	kind, pubkey, startCreatedAt, _ := indexes.KindPubkeyCreatedAtVars()
+	startCreatedAt.FromInt(until.ToInt())
+	for _, k := range kinds {
 		for _, p := range pubkeys {
-		pubkey, startCreatedAt, startSer := indexes.PubkeyCreatedAtVars()
 			if err = pubkey.FromPubkeyHex(p); chk.E(err) {
 				// gracefully ignore wrong keys
 				pkDecodeErrs++
@@ -205,14 +256,14 @@ func (d *D) GetEventSerialsByAuthorsCreatedAtRange(pubkeys []string, since, unti
 				err = errorf.E("all pubkeys in authors field of filter failed to decode")
 				return
 			}
-		startCreatedAt.FromInt(until.ToInt())
-		startSer.FromUint64(math.MaxUint64)
+			kind.Set(k)
 			prf := new(bytes.Buffer)
-		if err = indexes.PubkeyCreatedAtEnc(pubkey, startCreatedAt, startSer).MarshalWrite(prf); chk.E(err) {
+			if err = indexes.KindPubkeyCreatedAtEnc(kind, pubkey, startCreatedAt, nil).MarshalWrite(prf); chk.E(err) {
 				return
 			}
 			searchIdxs = append(searchIdxs, prf.Bytes())
 		}
+	}
 	for _, idx := range searchIdxs {
 		if err = d.View(func(txn *badger.Txn) (err error) {
 			it := txn.NewIterator(badger.IteratorOptions{Reverse: true, Prefix: idx})
@@ -221,9 +272,9 @@ func (d *D) GetEventSerialsByAuthorsCreatedAtRange(pubkeys []string, since, unti
 			for it.Rewind(); it.Valid(); it.Next() {
 				item := it.Item()
 				key = item.KeyCopy(key)
-				kind, ca, ser := indexes.KindCreatedAtVars()
+				ki, ca, ser := indexes.KindCreatedAtVars()
 				buf := bytes.NewBuffer(key)
-				if err = indexes.KindCreatedAtDec(kind, ca, ser).UnmarshalRead(buf); chk.E(err) {
+				if err = indexes.KindCreatedAtDec(ki, ca, ser).UnmarshalRead(buf); chk.E(err) {
 					// skip it then
 					continue
 				}
@@ -240,54 +291,224 @@ func (d *D) GetEventSerialsByAuthorsCreatedAtRange(pubkeys []string, since, unti
 	return
 }

-func (d *D) GetEventSerialsByKindsAuthorsCreatedAtRange(kinds []int, pubkeys []string, since, until timestamp.Timestamp) (sers []*varint.V, err error) {
-	// get the start (end) max possible index prefix, one for each kind in the list
+// GetEventSerialsByTagsCreatedAtRange searches for events that match the tags in a filter and
+// returns the list of serials that were found.
+func (d *D) GetEventSerialsByTagsCreatedAtRange(t filter.TagMap) (sers varint.S, err error) {
+	if len(t) < 1 {
+		err = errorf.E("no tags provided")
+		return
+	}
 	var searchIdxs [][]byte
-	var pkDecodeErrs int
-	for _, k := range kinds {
-		for _, p := range pubkeys {
-			kind, pubkey, startCreatedAt, startSer := indexes.KindPubkeyCreatedAtVars()
-			if err = pubkey.FromPubkeyHex(p); chk.E(err) {
-				// gracefully ignore wrong keys
-				pkDecodeErrs++
+	for tk, tv := range t {
+		// the key of each element of the map must be `#X` where X is a-zA-Z
+		if len(tk) != 2 {
 			continue
 		}
-			if pkDecodeErrs == len(pubkeys) {
-				err = errorf.E("all pubkeys in authors field of filter failed to decode")
-				return
+		if tk[0] != '#' {
+			log.E.F("invalid tag map key '%s'", tk)
 		}
-			startCreatedAt.FromInt(until.ToInt())
-			startSer.FromUint64(math.MaxUint64)
-			kind.Set(k)
-			prf := new(bytes.Buffer)
-			if err = indexes.KindPubkeyCreatedAtEnc(kind, pubkey, startCreatedAt, startSer).MarshalWrite(prf); chk.E(err) {
-				return
-			}
-			searchIdxs = append(searchIdxs, prf.Bytes())
-		}
-	}
-	for _, idx := range searchIdxs {
-		if err = d.View(func(txn *badger.Txn) (err error) {
-			it := txn.NewIterator(badger.IteratorOptions{Reverse: true, Prefix: idx})
-			defer it.Close()
-			key := make([]byte, 10)
-			for it.Rewind(); it.Valid(); it.Next() {
-				item := it.Item()
-				key = item.KeyCopy(key)
-				kind, ca, ser := indexes.KindCreatedAtVars()
-				buf := bytes.NewBuffer(key)
-				if err = indexes.KindCreatedAtDec(kind, ca, ser).UnmarshalRead(buf); chk.E(err) {
-					// skip it then
+		switch tk[1] {
+		case 'a':
+			// not sure if this is a thing. maybe a prefix search?
+			for _, ta := range tv {
+				var atag tags.Tag_a
+				if atag, err = tags.Decode_a_Tag(ta); chk.E(err) {
+					err = nil
 					continue
 				}
-				if ca.ToTimestamp() < since {
-					break
+				if atag.Kind == 0 {
+					err = nil
+					continue
 				}
-				sers = append(sers, ser)
+				ki, pk, ident, _ := indexes.TagAVars()
+				ki.Set(atag.Kind)
+				if atag.Pubkey == nil {
+					err = nil
+					continue
+				}
+				if err = pk.FromPubkey(atag.Pubkey); chk.E(err) {
+					err = nil
+					continue
+				}
+				if len(atag.Ident) < 1 {
+				}
+				if err = ident.FromIdent([]byte(atag.Ident)); chk.E(err) {
+					err = nil
+				}
+				buf := new(bytes.Buffer)
+				if err = indexes.TagAEnc(ki, pk, ident, nil).MarshalWrite(buf); chk.E(err) {
+					err = nil
+					continue
+				}
+				searchIdxs = append(searchIdxs, buf.Bytes())
+			}
+		case 'd':
+			// d tags are identifiers used to mark replaceable events to create a namespace,
+			// that the references can be used to replace them, or referred to using 'a' tags.
+			for _, td := range tv {
+				ident, _ := indexes.TagIdentifierVars()
+				if err = ident.FromIdent([]byte(td)); chk.E(err) {
+					err = nil
+					continue
+				}
+				buf := new(bytes.Buffer)
+				if err = indexes.TagIdentifierEnc(ident, nil).MarshalWrite(buf); chk.E(err) {
+					err = nil
+					continue
+				}
+				searchIdxs = append(searchIdxs, buf.Bytes())
+			}
+		case 'e':
+			// e tags refer to events. they can have a third field such as 'root' and 'reply'
+			// but this third field isn't indexed.
+			for _, te := range tv {
+				evt, _ := indexes.TagEventVars()
+				if err = evt.FromIdHex(te); chk.E(err) {
+					err = nil
+					continue
+				}
+				buf := new(bytes.Buffer)
+				if err = indexes.TagEventEnc(evt, nil).MarshalWrite(buf); chk.E(err) {
+					err = nil
+					continue
+				}
+				searchIdxs = append(searchIdxs, buf.Bytes())
+			}
+		case 'p':
+			// p tags are references to author pubkeys of events. usually a 64 character hex
+			// string but sometimes is a hashtag in follow events.
+			for _, te := range tv {
+				pk, _ := indexes.TagPubkeyVars()
+				if err = pk.FromPubkeyHex(te); chk.E(err) {
+					err = nil
+					continue
+				}
+				buf := new(bytes.Buffer)
+				if err = indexes.TagPubkeyEnc(pk, nil).MarshalWrite(buf); chk.E(err) {
+					err = nil
+					continue
+				}
+				searchIdxs = append(searchIdxs, buf.Bytes())
+			}
+		case 't':
+			// t tags are hashtags, arbitrary strings that can be used to assist search for
+			// topics.
+			for _, tt := range tv {
+				ht, _ := indexes.TagHashtagVars()
+				if err = ht.FromIdent([]byte(tt)); chk.E(err) {
+					err = nil
+					continue
+				}
+				buf := new(bytes.Buffer)
+				if err = indexes.TagHashtagEnc(ht, nil).MarshalWrite(buf); chk.E(err) {
+					err = nil
+					continue
+				}
+				searchIdxs = append(searchIdxs, buf.Bytes())
+			}
+		default:
+			// everything else is arbitrary strings, that may have application specific
+			// semantics.
+			for _, tl := range tv {
+				l, val, _ := indexes.TagLetterVars()
+				l.Set(tk[1])
+				if err = val.FromIdent([]byte(tl)); chk.E(err) {
+					err = nil
+					continue
+				}
+				buf := new(bytes.Buffer)
+				if err = indexes.TagLetterEnc(l, val, nil).MarshalWrite(buf); chk.E(err) {
+					err = nil
+					continue
+				}
+				searchIdxs = append(searchIdxs, buf.Bytes())
 			}
-			return
-		}); chk.E(err) {
-			return
+		}
+	}
+	return
+}
+
+// GetEventSerialsByAuthorsTagsCreatedAtRange first performs
+func (d *D) GetEventSerialsByAuthorsTagsCreatedAtRange(t filter.TagMap, pubkeys []string, since, until timestamp.Timestamp) (sers varint.S, err error) {
+	var acSers, tagSers varint.S
+	if acSers, err = d.GetEventSerialsByAuthorsCreatedAtRange(pubkeys, since, until); chk.E(err) {
+		return
+	}
+	// now we have the most limited set of serials that are included by the pubkeys, we can then
+	// construct the tags searches for all of these serials to filter out the events that don't
+	// have both author AND one of the tags.
+	if tagSers, err = d.GetEventSerialsByTagsCreatedAtRange(t); chk.E(err) {
+		return
+	}
+	// remove the serials that are not present in both lists.
+	sers = varint.Intersect(acSers, tagSers)
+	return
+}
+
+// GetEventSerialsByKindsTagsCreatedAtRange first performs
+func (d *D) GetEventSerialsByKindsTagsCreatedAtRange(t filter.TagMap, kinds []int, since, until timestamp.Timestamp) (sers varint.S, err error) {
+	var acSers, tagSers varint.S
+	if acSers, err = d.GetEventSerialsByKindsCreatedAtRange(kinds, since, until); chk.E(err) {
+		return
+	}
+	// now we have the most limited set of serials that are included by the pubkeys, we can then
+	// construct the tags searches for all of these serials to filter out the events that don't
+	// have both author AND one of the tags.
+	if tagSers, err = d.GetEventSerialsByTagsCreatedAtRange(t); chk.E(err) {
+		return
+	}
+	// remove the serials that are not present in both lists.
+	sers = varint.Intersect(acSers, tagSers)
+	return
+}
+
+// GetEventSerialsByKindsAuthorsTagsCreatedAtRange first performs
+func (d *D) GetEventSerialsByKindsAuthorsTagsCreatedAtRange(t filter.TagMap, kinds []int, pubkeys []string, since, until timestamp.Timestamp) (sers varint.S, err error) {
+	var acSers, tagSers varint.S
+	if acSers, err = d.GetEventSerialsByKindsAuthorsCreatedAtRange(kinds, pubkeys, since, until); chk.E(err) {
+		return
+	}
+	// now we have the most limited set of serials that are included by the pubkeys, we can then
+	// construct the tags searches for all of these serials to filter out the events that don't
+	// have both author AND one of the tags.
+	if tagSers, err = d.GetEventSerialsByTagsCreatedAtRange(t); chk.E(err) {
+		return
+	}
+	// remove the serials that are not present in both lists.
+	sers = varint.Intersect(acSers, tagSers)
+	return
+}
+
+func (d *D) GetFullIndexesFromSerials(sers varint.S) (index []indexes.FullIndex, err error) {
+	for _, ser := range sers {
+		if err = d.View(func(txn *badger.Txn) (err error) {
+			buf := new(bytes.Buffer)
+			if err = indexes.FullIndexEnc(ser, nil, nil, nil, nil).MarshalWrite(buf); chk.E(err) {
+				return
+			}
+			prf := buf.Bytes()
+			it := txn.NewIterator(badger.IteratorOptions{Prefix: prf})
+			defer it.Close()
+			for it.Seek(prf); it.Valid(); {
+				item := it.Item()
+				key := item.KeyCopy(nil)
+				kBuf := bytes.NewBuffer(key)
+				s, t, p, k, c := indexes.FullIndexVars()
+				if err = indexes.FullIndexDec(s, t, p, k, c).UnmarshalRead(kBuf); chk.E(err) {
+					return
+				}
+				index = append(index, indexes.FullIndex{
+					Ser:       s,
+					Id:        t,
+					Pubkey:    p,
+					Kind:      k,
+					CreatedAt: c,
+				})
+				return
+			}
+			return
+		}); chk.E(err) {
+			// just skip then.
 		}
 	}
 	return
--- a/database/indexes/indexes.go
+++ b/database/indexes/indexes.go
@@ -32,6 +32,11 @@ func New(encoders ...codec.I) (i *T) { return &T{encoders} }

 func (t *T) MarshalWrite(w io.Writer) (err error) {
 	for _, e := range t.Encs {
+		if e == nil {
+			// allow a field to be empty, as is needed for search indexes to create search
+			// prefixes.
+			continue
+		}
 		if err = e.MarshalWrite(w); chk.E(err) {
 			return
 		}
@@ -74,6 +79,14 @@ func IdDec(id *idhash.T, ser *varint.V) (enc *T) {
 	return New(prefix.New(), id, ser)
 }

+type FullIndex struct {
+	Ser       *varint.V
+	Id        *fullid.T
+	Pubkey    *pubhash.T
+	Kind      *kindidx.T
+	CreatedAt *timestamp.T
+}
+
 func FullIndexVars() (ser *varint.V, t *fullid.T, p *pubhash.T, ki *kindidx.T,
 	ca *timestamp.T) {
 	ser = varint.New()
--- a/database/indexes/prefixes/prefixes.go
+++ b/database/indexes/prefixes/prefixes.go
@@ -90,6 +90,13 @@ const (
 	// [ prefix ][ 2 bytes kind number ][ 8 bytes hash of pubkey ][ 8 bytes hash of label ][ serial]
 	TagA

+	// TagIdentifier is a `d` tag identifier that creates an arbitrary label that can be used
+	// to refer to an event. This is used for parameterized replaceable events to identify them
+	// with `a` tags for reference.
+	//
+	// [ prefix ][ 8 byte hash of identifier ][ 8 serial ]
+	TagIdentifier
+
 	// TagEvent is a reference to an event.
 	//
 	// [ prefix ][ 8 bytes truncated hash of event Id ][ 8 serial ]
@@ -106,13 +113,6 @@ const (
 	// [ prefix ][ 8 bytes hash of hashtag ][ 8 serial ]
 	TagHashtag

-	// TagIdentifier is a `d` tag identifier that creates an arbitrary label that can be used
-	// to refer to an event. This is used for parameterized replaceable events to identify them
-	// with `a` tags for reference.
-	//
-	// [ prefix ][ 8 byte hash of identifier ][ 8 serial ]
-	TagIdentifier
-
 	// TagLetter covers all other types of single letter mandatory indexed tags, including
 	// such as `d` for identifiers and things like `m` for mimetype and other kinds of
 	// references, the actual letter is the second byte. The value is a truncated 8 byte hash.
--- a/database/indexes/types/varint/varint.go
+++ b/database/indexes/types/varint/varint.go
@@ -10,6 +10,8 @@ import (

 type V struct{ val uint64 }

+type S []*V
+
 func New() (s *V) { return &V{} }

 func (vi *V) FromUint64(ser uint64) {
@@ -46,3 +48,47 @@ func (vi *V) UnmarshalRead(r io.Reader) (err error) {
 	vi.val, err = varint.Decode(r)
 	return
 }
+
+// DeduplicateInOrder removes duplicates from a slice of V.
+func DeduplicateInOrder(s S) (v S) {
+	// for larger slices, this uses a lot less memory, at the cost of slower execution.
+	if len(s) > 10000 {
+	skip:
+		for i, sa := range s {
+			for j, sb := range s {
+				if i != j && sa.val == sb.val {
+					continue skip
+				}
+			}
+			v = append(v, sa)
+		}
+	} else {
+		// for small slices, this is faster but uses more memory.
+		seen := map[uint64]*V{}
+		for _, val := range s {
+			if _, ok := seen[val.val]; !ok {
+				v = append(v, val)
+				seen[val.val] = val
+			}
+		}
+	}
+	return
+}
+
+// Intersect deduplicates and performs a set intersection on two slices.
+func Intersect(a, b []*V) (sers []*V) {
+	// first deduplicate to eliminate unnecessary iterations
+	a = DeduplicateInOrder(a)
+	b = DeduplicateInOrder(b)
+	for _, as := range a {
+		for _, bs := range b {
+			if as.val == bs.val {
+				// if the match is found, add to the result and move to the next candidate from
+				// the "a" serial list.
+				sers = append(sers, as)
+				break
+			}
+		}
+	}
+	return
+}
--- a/helpers/helpers.go
+++ b/helpers/helpers.go
@@ -1,6 +1,7 @@
 package helpers

 import (
+	"slices"
 	"strconv"
 	"strings"
 	"sync"
@@ -123,3 +124,15 @@ func Hash(in []byte) (out []byte) {
 	h := sha256.Sum256(in)
 	return h[:]
 }
+
+// RemoveDuplicates removes repeated items in any slice of comparable items. This would not be
+// appropriate for pointers unless they were assembled from the same source where a pointer is
+// equal to a unique reference to the content.
+func RemoveDuplicates[T comparable](s []T) []T {
+	alreadySeen := make(map[T]struct{}, len(s))
+	return slices.DeleteFunc(s, func(val T) bool {
+		_, duplicate := alreadySeen[val]
+		alreadySeen[val] = struct{}{}
+		return duplicate
+	})
+}
--- a/tags/tags.go
+++ b/tags/tags.go
@@ -8,6 +8,7 @@ import (
 	"strings"

 	"x.realy.lol/chk"
+	"x.realy.lol/ec/schnorr"
 	"x.realy.lol/helpers"
 	"x.realy.lol/hex"
 	"x.realy.lol/ints"
@@ -246,33 +247,44 @@ func (tags Tags) Get_a_Tags() (atags []Tag_a) {
 	if len(a) > 0 {
 		for _, v := range a {
 			if v[0] == "a" && len(v) > 1 {
-				// try to split it
-				parts := strings.Split(v[1], ":")
-				// there must be a kind first
-				ki := ints.New(0)
-				if _, err = ki.Unmarshal([]byte(parts[0])); chk.E(err) {
+				var atag Tag_a
+				if atag, err = Decode_a_Tag(v[1]); chk.E(err) {
 					continue
 				}
-				atag := Tag_a{
-					Kind: int(ki.Uint16()),
-				}
-				if len(parts) < 2 {
-					continue
-				}
-				// next must be a pubkey
-				var pk []byte
-				if pk, err = hex.Dec(parts[1]); err != nil {
-					continue
-				}
-				atag.Pubkey = pk
-				// there possibly can be nothing after this
-				if len(parts) >= 3 {
-					// third part is the identifier (d tag)
-					atag.Ident = parts[2]
-				}
 				atags = append(atags, atag)
 			}
 		}
 	}
 	return
 }
+
+func Decode_a_Tag(a string) (ta Tag_a, err error) {
+	// try to split it
+	parts := strings.Split(a, ":")
+	// there must be a kind first
+	ki := ints.New(0)
+	if _, err = ki.Unmarshal([]byte(parts[0])); chk.E(err) {
+		return
+	}
+	ta = Tag_a{
+		Kind: int(ki.Uint16()),
+	}
+	if len(parts) < 2 {
+		return
+	}
+	// next must be a pubkey
+	if len(parts[1]) != 2*schnorr.PubKeyBytesLen {
+		return
+	}
+	var pk []byte
+	if pk, err = hex.Dec(parts[1]); err != nil {
+		return
+	}
+	ta.Pubkey = pk
+	// there possibly can be nothing after this
+	if len(parts) >= 3 {
+		// third part is the identifier (d tag)
+		ta.Ident = parts[2]
+	}
+	return
+}