Files
realy/filter/filter.go
2025-04-23 21:16:38 -01:06

580 lines
14 KiB
Go

// Package filter is a codec for nostr filters (queries) and includes tools for
// matching them to events, a canonical format scheme to enable compactly
// identifying subscription filters, and a simplified filter that leavse out the
// IDs and Search fields for use in the HTTP API.
package filter
import (
"bytes"
"encoding/binary"
"sort"
"lukechampine.com/frand"
"realy.mleku.dev/chk"
"realy.mleku.dev/ec/schnorr"
"realy.mleku.dev/ec/secp256k1"
"realy.mleku.dev/errorf"
"realy.mleku.dev/event"
"realy.mleku.dev/hex"
"realy.mleku.dev/ints"
"realy.mleku.dev/kind"
"realy.mleku.dev/kinds"
"realy.mleku.dev/realy/pointers"
"realy.mleku.dev/sha256"
"realy.mleku.dev/tag"
"realy.mleku.dev/tags"
"realy.mleku.dev/text"
"realy.mleku.dev/timestamp"
)
// T is the primary query form for requesting events from a nostr relay.
//
// The ordering of fields of filters is not specified as in the protocol there
// is no requirement to generate a hash for fast recognition of identical
// filters. However, for internal use in a relay, by applying a consistent sort
// order, this library will produce an identical JSON from the same *set* of
// fields no matter what order they were provided.
//
// This is to facilitate the deduplication of filters so an effective identical
// match is not performed on an identical filter.
type T struct {
IDs *tag.T `json:"ids,omitempty"`
Kinds *kinds.T `json:"kinds,omitempty"`
Authors *tag.T `json:"authors,omitempty"`
Tags *tags.T `json:"-,omitempty"`
Since *timestamp.T `json:"since,omitempty"`
Until *timestamp.T `json:"until,omitempty"`
Search []byte `json:"search,omitempty"`
Limit *uint `json:"limit,omitempty"`
}
// New creates a new, reasonably initialized filter that will be ready for most uses without
// further allocations.
func New() (f *T) {
return &T{
IDs: tag.NewWithCap(10),
Kinds: kinds.NewWithCap(10),
Authors: tag.NewWithCap(10),
Tags: tags.New(),
// Since: timestamp.New(),
// Until: timestamp.New(),
Search: nil,
}
}
// Clone creates a new filter with all the same elements in them, because they
// are immutable, basically, except setting the Limit field as 1, because it is
// used in the subscription management code to act as a reference counter, and
// making a clone implicitly means 1 reference.
func (f *T) Clone() (clone *T) {
lim := new(uint)
*lim = 1
_IDs := *f.IDs
_Kinds := *f.Kinds
_Authors := *f.Authors
_Tags := *f.Tags.Clone()
_Since := *f.Since
_Until := *f.Until
_Search := make([]byte, len(f.Search))
copy(Search, f.Search)
return &T{
IDs: &_IDs,
Kinds: &_Kinds,
Authors: &_Authors,
Tags: &_Tags,
Since: &_Since,
Until: &_Until,
Search: _Search,
Limit: lim,
}
}
var (
// IDs is the JSON object key for IDs.
IDs = []byte("ids")
// Kinds is the JSON object key for Kinds.
Kinds = []byte("kinds")
// Authors is the JSON object key for Authors.
Authors = []byte("authors")
// Since is the JSON object key for Since.
Since = []byte("since")
// Until is the JSON object key for Until.
Until = []byte("until")
// Limit is the JSON object key for Limit.
Limit = []byte("limit")
// Search is the JSON object key for Search.
Search = []byte("search")
)
// Marshal a filter into raw JSON bytes, minified. The field ordering and sort of fields is
// canonicalized so that a hash can identify the same filter.
func (f *T) Marshal(dst []byte) (b []byte) {
var err error
_ = err
var first bool
// sort the fields so they come out the same
f.Sort()
// open parentheses
dst = append(dst, '{')
if f.IDs != nil && f.IDs.Len() > 0 {
first = true
dst = text.JSONKey(dst, IDs)
dst = text.MarshalHexArray(dst, f.IDs.ToSliceOfBytes())
}
if f.Kinds.Len() > 0 {
if first {
dst = append(dst, ',')
} else {
first = true
}
dst = text.JSONKey(dst, Kinds)
dst = f.Kinds.Marshal(dst)
}
if f.Authors.Len() > 0 {
if first {
dst = append(dst, ',')
} else {
first = true
}
dst = text.JSONKey(dst, Authors)
dst = text.MarshalHexArray(dst, f.Authors.ToSliceOfBytes())
}
if f.Tags.Len() > 0 {
// log.I.S(f.Tags)
// if first {
// dst = append(dst, ',')
// } else {
// first = true
// }
// tags are stored as tags with the initial element the "#a" and the rest the list in
// each element of the tags list. eg:
//
// [["#p","<pubkey1>","<pubkey3"],["#t","hashtag","stuff"]]
//
for _, tg := range f.Tags.ToSliceOfTags() {
if tg == nil {
// nothing here
continue
}
if tg.Len() < 1 || len(tg.Key()) != 2 {
// if there is no values, skip; the "key" field must be 2 characters long,
continue
}
tKey := tg.ToSliceOfBytes()[0]
if tKey[0] != '#' &&
(tKey[1] < 'a' && tKey[1] > 'z' || tKey[1] < 'A' && tKey[1] > 'Z') {
// first "key" field must begin with '#' and second be alpha
continue
}
values := tg.ToSliceOfBytes()[1:]
if len(values) == 0 {
continue
}
if first {
dst = append(dst, ',')
} else {
first = true
}
// append the key
dst = append(dst, '"', tg.B(0)[0], tg.B(0)[1], '"', ':')
dst = append(dst, '[')
for i, value := range values {
dst = append(dst, '"')
if tKey[1] == 'e' || tKey[1] == 'p' {
// event and pubkey tags are binary 32 bytes
dst = hex.EncAppend(dst, value)
} else {
dst = append(dst, value...)
}
dst = append(dst, '"')
if i < len(values)-1 {
dst = append(dst, ',')
}
}
dst = append(dst, ']')
}
}
if f.Since != nil && f.Since.U64() > 0 {
if first {
dst = append(dst, ',')
} else {
first = true
}
dst = text.JSONKey(dst, Since)
dst = f.Since.Marshal(dst)
}
if f.Until != nil && f.Until.U64() > 0 {
if first {
dst = append(dst, ',')
} else {
first = true
}
dst = text.JSONKey(dst, Until)
dst = f.Until.Marshal(dst)
}
if len(f.Search) > 0 {
if first {
dst = append(dst, ',')
} else {
first = true
}
dst = text.JSONKey(dst, Search)
dst = text.AppendQuote(dst, f.Search, text.NostrEscape)
}
if pointers.Present(f.Limit) {
if first {
dst = append(dst, ',')
} else {
first = true
}
dst = text.JSONKey(dst, Limit)
dst = ints.New(*f.Limit).Marshal(dst)
}
// close parentheses
dst = append(dst, '}')
b = dst
return
}
// Serialize a filter.T into raw minified JSON bytes.
func (f *T) Serialize() (b []byte) { return f.Marshal(nil) }
// states of the unmarshaler
const (
beforeOpen = iota
openParen
inKey
inKV
inVal
betweenKV
afterClose
)
// Unmarshal a filter from raw (minified) JSON bytes into the runtime format.
//
// todo: this may tolerate whitespace, not certain currently.
func (f *T) Unmarshal(b []byte) (r []byte, err error) {
r = b[:]
var key []byte
var state int
for ; len(r) >= 0; r = r[1:] {
// log.I.ToSliceOfBytes("%c", rem[0])
switch state {
case beforeOpen:
if r[0] == '{' {
state = openParen
// log.I.Ln("openParen")
}
case openParen:
if r[0] == '"' {
state = inKey
// log.I.Ln("inKey")
}
case inKey:
if r[0] == '"' {
state = inKV
// log.I.Ln("inKV")
} else {
key = append(key, r[0])
}
case inKV:
if r[0] == ':' {
state = inVal
}
case inVal:
if len(key) < 1 {
err = errorf.E("filter key zero length: '%s'\n'%s", b, r)
return
}
switch key[0] {
case '#':
// tags start with # and have 1 letter
l := len(key)
if l != 2 {
err = errorf.E("filter tag keys can only be # and one alpha character: '%s'\n%s",
key, b)
return
}
k := make([]byte, len(key))
copy(k, key)
switch key[1] {
case 'e', 'p':
// the tags must all be 64 character hexadecimal
var ff [][]byte
if ff, r, err = text.UnmarshalHexArray(r,
sha256.Size); chk.E(err) {
return
}
ff = append([][]byte{k}, ff...)
f.Tags = f.Tags.AppendTags(tag.New(ff...))
// f.Tags.T = append(f.Tags.T, tag.New(ff...))
default:
// other types of tags can be anything
var ff [][]byte
if ff, r, err = text.UnmarshalStringArray(r); chk.E(err) {
return
}
ff = append([][]byte{k}, ff...)
f.Tags = f.Tags.AppendTags(tag.New(ff...))
// f.Tags.T = append(f.Tags.T, tag.New(ff...))
}
state = betweenKV
case IDs[0]:
if len(key) < len(IDs) {
goto invalid
}
var ff [][]byte
if ff, r, err = text.UnmarshalHexArray(r, sha256.Size); chk.E(err) {
return
}
f.IDs = tag.New(ff...)
state = betweenKV
case Kinds[0]:
if len(key) < len(Kinds) {
goto invalid
}
f.Kinds = kinds.NewWithCap(0)
if r, err = f.Kinds.Unmarshal(r); chk.E(err) {
return
}
state = betweenKV
case Authors[0]:
if len(key) < len(Authors) {
goto invalid
}
var ff [][]byte
if ff, r, err = text.UnmarshalHexArray(r, schnorr.PubKeyBytesLen); chk.E(err) {
return
}
f.Authors = tag.New(ff...)
state = betweenKV
case Until[0]:
if len(key) < len(Until) {
goto invalid
}
u := ints.New(0)
if r, err = u.Unmarshal(r); chk.E(err) {
return
}
f.Until = timestamp.FromUnix(int64(u.N))
state = betweenKV
case Limit[0]:
if len(key) < len(Limit) {
goto invalid
}
l := ints.New(0)
if r, err = l.Unmarshal(r); chk.E(err) {
return
}
u := uint(l.N)
f.Limit = &u
state = betweenKV
case Search[0]:
if len(key) < len(Since) {
goto invalid
}
switch key[1] {
case Search[1]:
if len(key) < len(Search) {
goto invalid
}
var txt []byte
if txt, r, err = text.UnmarshalQuoted(r); chk.E(err) {
return
}
f.Search = txt
// log.I.ToSliceOfBytes("\n%s\n%s", txt, rem)
state = betweenKV
// log.I.Ln("betweenKV")
case Since[1]:
if len(key) < len(Since) {
goto invalid
}
s := ints.New(0)
if r, err = s.Unmarshal(r); chk.E(err) {
return
}
f.Since = timestamp.FromUnix(int64(s.N))
state = betweenKV
// log.I.Ln("betweenKV")
}
default:
goto invalid
}
key = key[:0]
case betweenKV:
if len(r) == 0 {
return
}
if r[0] == '}' {
state = afterClose
} else if r[0] == ',' {
state = openParen
} else if r[0] == '"' {
state = inKey
}
}
if len(r) == 0 {
return
}
if r[0] == '}' {
r = r[1:]
return
}
}
invalid:
err = errorf.E("invalid key,\n'%s'\n'%s'", string(b), string(r))
return
}
// Matches checks a filter against an event and determines if the event matches the filter.
func (f *T) Matches(ev *event.T) bool {
if ev == nil {
return false
}
if f.IDs.Len() > 0 && !f.IDs.Contains(ev.Id) {
return false
}
if f.Kinds.Len() > 0 && !f.Kinds.Contains(ev.Kind) {
return false
}
if f.Authors.Len() > 0 && !f.Authors.Contains(ev.Pubkey) {
return false
}
if f.Tags.Len() > 0 && !ev.Tags.Intersects(f.Tags) {
return false
}
if f.Since.Int() != 0 && ev.CreatedAt.I64() < f.Since.I64() {
return false
}
if f.Until.Int() != 0 && ev.CreatedAt.I64() > f.Until.I64() {
return false
}
return true
}
// Fingerprint returns an 8 byte truncated sha256 hash of the filter in the canonical form
// created by Marshal.
//
// This hash is generated via the JSON encoded form of the filter, with the Limit field removed.
// This value should be set to zero after all results from a query of stored events, as per
// NIP-01.
func (f *T) Fingerprint() (fp uint64, err error) {
lim := f.Limit
f.Limit = nil
var b []byte
b = f.Marshal(b)
h := sha256.Sum256(b)
hb := h[:]
fp = binary.LittleEndian.Uint64(hb)
f.Limit = lim
return
}
// Sort the fields of a filter so a fingerprint on a filter that has the same set of content
// produces the same fingerprint.
func (f *T) Sort() {
if f.IDs != nil {
sort.Sort(f.IDs)
}
if f.Kinds != nil {
sort.Sort(f.Kinds)
}
if f.Authors != nil {
sort.Sort(f.Authors)
}
if f.Tags != nil {
sort.Sort(f.Tags)
}
}
func arePointerValuesEqual[V comparable](a *V, b *V) bool {
if a == nil && b == nil {
return true
}
if a != nil && b != nil {
return *a == *b
}
return false
}
// Equal checks a filter against another filter to see if they are the same filter.
func (f *T) Equal(b *T) bool {
// sort the fields so they come out the same
f.Sort()
if !f.Kinds.Equals(b.Kinds) ||
!f.IDs.Equal(b.IDs) ||
!f.Authors.Equal(b.Authors) ||
f.Tags.Len() != b.Tags.Len() ||
!arePointerValuesEqual(f.Since, b.Since) ||
!arePointerValuesEqual(f.Until, b.Until) ||
!bytes.Equal(f.Search, b.Search) ||
!f.Tags.Equal(b.Tags) {
return false
}
return true
}
// GenFilter is a testing tool to create random arbitrary filters for tests.
func GenFilter() (f *T, err error) {
f = New()
n := frand.Intn(16)
for _ = range n {
id := make([]byte, sha256.Size)
frand.Read(id)
f.IDs = f.IDs.Append(id)
}
n = frand.Intn(16)
for _ = range n {
f.Kinds.K = append(f.Kinds.K, kind.New(frand.Intn(65535)))
}
n = frand.Intn(16)
for _ = range n {
var sk *secp256k1.SecretKey
if sk, err = secp256k1.GenerateSecretKey(); chk.E(err) {
return
}
pk := sk.PubKey()
f.Authors = f.Authors.Append(schnorr.SerializePubKey(pk))
}
a := frand.Intn(16)
if a < n {
n = a
}
for i := range n {
p := make([]byte, 0, schnorr.PubKeyBytesLen*2)
p = hex.EncAppend(p, f.Authors.B(i))
}
for b := 'a'; b <= 'z'; b++ {
l := frand.Intn(6)
if b == 'e' || b == 'p' {
var idb [][]byte
for range l {
id := make([]byte, sha256.Size)
frand.Read(id)
idb = append(idb, id)
}
idb = append([][]byte{{'#', byte(b)}}, idb...)
f.Tags = f.Tags.AppendTags(tag.FromBytesSlice(idb...))
} else {
var idb [][]byte
for range l {
bb := make([]byte, frand.Intn(31)+1)
frand.Read(bb)
id := make([]byte, 0, len(bb)*2)
id = hex.EncAppend(id, bb)
idb = append(idb, id)
}
idb = append([][]byte{{'#', byte(b)}}, idb...)
f.Tags = f.Tags.AppendTags(tag.FromBytesSlice(idb...))
}
}
tn := int(timestamp.Now().I64())
f.Since = &timestamp.T{int64(tn - frand.Intn(10000))}
f.Until = timestamp.Now()
f.Search = []byte("token search text")
return
}