update fulltext to moar fields, fix auth bug

This commit is contained in:
2025-05-14 20:25:55 -01:06
parent 35012590a3
commit 4e4d0b5997
16 changed files with 318 additions and 114 deletions

View File

@@ -12,9 +12,11 @@ import (
"realy.lol/sha256"
)
const Len = sha256.Size
// T is the SHA256 hash in hexadecimal of the canonical form of an event as
// produced by the output of T.ToCanonical().Bytes().
type T [sha256.Size]byte
type T [Len]byte
// New creates a new eventid.T. This is actually more wordy than simply creating a &T{} via
// slice literal.
@@ -35,9 +37,9 @@ func (ei *T) Set(b []byte) (err error) {
err = errorf.E("event id is nil")
return
}
if len(b) != sha256.Size {
if len(b) != Len {
err = errorf.E("Id bytes incorrect size, got %d require %d",
len(b), sha256.Size)
len(b), Len)
return
}
copy(ei[:], b)
@@ -90,7 +92,7 @@ func (ei *T) Equal(ei2 *T) (eq bool) {
// Marshal renders the eventid.T into JSON.
func (ei *T) Marshal(dst []byte) (b []byte) {
b = dst
b = make([]byte, 0, 2*sha256.Size+2)
b = make([]byte, 0, 2*Len+2)
b = append(b, '"')
hex.EncAppend(b, ei[:])
b = append(b, '"')
@@ -100,10 +102,10 @@ func (ei *T) Marshal(dst []byte) (b []byte) {
// Unmarshal decodes a JSON encoded eventid.T.
func (ei *T) Unmarshal(b []byte) (rem []byte, err error) {
// trim off the quotes.
b = b[1 : 2*sha256.Size+1]
if len(b) != 2*sha256.Size {
b = b[1 : 2*Len+1]
if len(b) != 2*Len {
err = errorf.E("event Id hex incorrect size, got %d require %d",
len(b), 2*sha256.Size)
len(b), 2*Len)
log.E.Ln(string(b))
return
}
@@ -118,12 +120,12 @@ func (ei *T) Unmarshal(b []byte) (rem []byte, err error) {
// NewFromString inspects a string and ensures it is a valid, 64 character long
// hexadecimal string, returns the string coerced to the type.
func NewFromString(s string) (ei *T, err error) {
if len(s) != 2*sha256.Size {
if len(s) != 2*Len {
return nil, errorf.E("event Id hex wrong size, got %d require %d",
len(s), 2*sha256.Size)
len(s), 2*Len)
}
ei = &T{}
b := make([]byte, 0, sha256.Size)
b := make([]byte, 0, Len)
b, err = hex.DecAppend(b, []byte(s))
copy(ei[:], b)
return
@@ -131,7 +133,7 @@ func NewFromString(s string) (ei *T, err error) {
// Gen creates a fake pseudorandom generated event Id for tests.
func Gen() (ei *T) {
b := frand.Bytes(sha256.Size)
b := frand.Bytes(Len)
ei = &T{}
copy(ei[:], b)
return

View File

@@ -4,6 +4,7 @@
package kind
import (
"encoding/binary"
"sync"
"golang.org/x/exp/constraints"
@@ -12,6 +13,8 @@ import (
"realy.lol/ints"
)
const Len = 2
// T - which will be externally referenced as kind.T is the event type in the
// nostr protocol, the use of the capital T signifying type, consistent with Go
// idiom, the Go standard library, and much, conformant, existing code.
@@ -23,6 +26,10 @@ type T struct {
// will be truncated.
func New[V constraints.Integer](k V) (ki *T) { return &T{uint16(k)} }
func NewFromBytes(b []byte) (ki *T) {
return New(binary.LittleEndian.Uint16(b))
}
// ToInt returns the value of the kind.T as an int.
func (k *T) ToInt() int {
if k == nil {

1
ratel/fulltext-query.go Normal file
View File

@@ -0,0 +1 @@
package ratel

View File

@@ -11,15 +11,22 @@ import (
"realy.lol/chk"
"realy.lol/event"
"realy.lol/eventid"
"realy.lol/hex"
"realy.lol/ratel/keys/arb"
"realy.lol/ratel/keys/createdat"
"realy.lol/ratel/keys/fullid"
"realy.lol/ratel/keys/integer"
"realy.lol/ratel/keys/kinder"
"realy.lol/ratel/keys/pubkey"
"realy.lol/ratel/keys/serial"
"realy.lol/ratel/prefixes"
)
type Words struct {
ser *serial.T
wordMap map[string]struct{}
ev *event.T
wordMap map[string]int
}
func (r *T) WriteFulltextIndex(w *Words) (err error) {
@@ -28,10 +35,26 @@ func (r *T) WriteFulltextIndex(w *Words) (err error) {
}
r.WG.Add(1)
defer r.WG.Done()
for i := range w.wordMap {
for word, pos := range w.wordMap {
retry:
if err = r.Update(func(txn *badger.Txn) (err error) {
key := prefixes.FulltextIndex.Key(arb.New(i), w.ser)
var eid *eventid.T
if eid, err = eventid.NewFromBytes(w.ev.Id); chk.E(err) {
return
}
var pk *pubkey.T
if pk, err = pubkey.New(w.ev.Pubkey); chk.E(err) {
return
}
key := prefixes.FulltextIndex.Key(
arb.New(word),
fullid.New(eid),
pk,
createdat.New(w.ev.CreatedAt),
kinder.New(w.ev.Kind.ToU16()),
integer.New(pos),
w.ser,
)
if err = txn.Set(key, nil); chk.E(err) {
return
}
@@ -52,17 +75,18 @@ func (r *T) GetFulltextKeys(ev *event.T, ser *serial.T) (keys [][]byte) {
return
}
func (r *T) GetWordsFromContent(ev *event.T) (wordMap map[string]struct{}) {
wordMap = make(map[string]struct{})
func (r *T) GetWordsFromContent(ev *event.T) (wordMap map[string]int) {
wordMap = make(map[string]int)
if ev.Kind.IsText() {
content := ev.Content
seg := words.NewSegmenter(content)
var counter int
for seg.Next() {
w := seg.Bytes()
w = bytes.ToLower(w)
var ru rune
ru, _ = utf8.DecodeRune(w)
// ignore the most common things that aren't words\
// ignore the most common things that aren't words
if !unicode.IsSpace(ru) &&
!unicode.IsPunct(ru) &&
!unicode.IsSymbol(ru) &&
@@ -80,8 +104,8 @@ func (r *T) GetWordsFromContent(ev *event.T) (wordMap map[string]struct{}) {
continue
}
}
wordMap[string(w)] = struct{}{}
wordMap[string(w)] = counter
counter++
}
}
content = content[:0]
@@ -92,27 +116,35 @@ func (r *T) GetWordsFromContent(ev *event.T) (wordMap map[string]struct{}) {
func IsEntity(w []byte) (is bool) {
var b []byte
b = []byte("nostr:")
if bytes.Contains(w, b) && len(b) < len(w) {
if bytes.Contains(w, b) && len(b)+10 < len(w) {
return true
}
b = []byte("npub")
if bytes.Contains(w, b) && len(b) < len(w) {
if bytes.Contains(w, b) && len(b)+5 < len(w) {
return true
}
b = []byte("nsec")
if bytes.Contains(w, b) && len(b) < len(w) {
if bytes.Contains(w, b) && len(b)+5 < len(w) {
return true
}
b = []byte("nevent")
if bytes.Contains(w, b) && len(b) < len(w) {
if bytes.Contains(w, b) && len(b)+5 < len(w) {
return true
}
b = []byte("naddr")
if bytes.Contains(w, b) && len(b) < len(w) {
if bytes.Contains(w, b) && len(b)+5 < len(w) {
return true
}
b = []byte("note")
if bytes.Contains(w, b) && len(b)+20 < len(w) {
return true
}
b = []byte("lnurl")
if bytes.Contains(w, b) && len(b)+20 < len(w) {
return true
}
b = []byte("cashu")
if bytes.Contains(w, b) && len(b) < len(w) {
if bytes.Contains(w, b) && len(b)+20 < len(w) {
return true
}
return

View File

@@ -29,7 +29,7 @@ func (c *T) Read(buf io.Reader) (el keys.Element) {
if n, err := buf.Read(b); chk.E(err) || n != Len {
return nil
}
c.Val = timestamp.FromUnix(int64(binary.BigEndian.Uint64(b)))
c.Val = timestamp.FromUnix(int64(binary.LittleEndian.Uint64(b)))
return c
}

View File

@@ -0,0 +1,55 @@
package integer
import (
"bytes"
"encoding/binary"
"io"
"golang.org/x/exp/constraints"
"realy.lol/chk"
"realy.lol/ratel/keys"
)
const Len = 4
// T is a 32-bit integer number value.
type T struct {
Val uint32
}
var _ keys.Element = &T{}
func New[V constraints.Integer](val ...V) (m *T) {
if len(val) == 0 {
m = new(T)
return
}
m = &T{uint32(val[0])}
return
}
func NewFrom(b []byte) (s *T) {
buf := bytes.NewBuffer(b)
s = &T{}
s.Read(buf)
return
}
func (s *T) Write(buf io.Writer) {
v := make([]byte, Len)
binary.LittleEndian.PutUint32(v, s.Val)
buf.Write(v)
}
func (s *T) Read(buf io.Reader) (el keys.Element) {
v := make([]byte, Len)
if n, err := buf.Read(v); chk.E(err) || n != Len {
return nil
}
// log.I.T(v)
s.Val = binary.LittleEndian.Uint32(v)
return s
}
func (s *T) Len() int { return Len }

View File

@@ -0,0 +1,27 @@
package integer_test
import (
"bytes"
"testing"
"lukechampine.com/frand"
"realy.lol/ratel/keys/integer"
)
func TestT(t *testing.T) {
for range 50 {
number := frand.Bytes(integer.Len)
v := integer.NewFrom(number)
// log.I.S(number)
buf := new(bytes.Buffer)
v.Write(buf)
buf2 := bytes.NewBuffer(buf.Bytes())
v2 := &integer.T{} // or can use New(nil)
el := v2.Read(buf2).(*integer.T)
if el.Val != v.Val {
t.Fatalf("expected %x got %x", v.Val, el.Val)
}
// log.I.S(el, v, v2)
}
}

View File

@@ -0,0 +1,93 @@
package prefixes
import (
"realy.lol/ec/schnorr"
"realy.lol/errorf"
"realy.lol/eventid"
"realy.lol/kind"
"realy.lol/ratel/keys/index"
"realy.lol/ratel/keys/integer"
"realy.lol/ratel/keys/serial"
"realy.lol/timestamp"
)
const StartOfWord = index.Len
// these are all offsets from the end
// [ 15 ][ word ][ 32 bytes eventid.T ][ 32 bytes pubkey ][ 8 bytes timestamp.T ][ 2 bytes kind ][ 4 bytes sequence number of word in text ][ 8 bytes Serial ]
const StartOfEventId = eventid.Len + schnorr.PubKeyBytesLen +
timestamp.Len + kind.Len + integer.Len + serial.Len
const StartOfPubkey = StartOfEventId - eventid.Len
const StartOfTimestamp = StartOfPubkey - schnorr.PubKeyBytesLen
const StartOfKind = StartOfTimestamp - timestamp.Len
const StartOfSequence = StartOfKind - kind.Len
const StartOfSerial = StartOfSequence - integer.Len
const Len = StartOfWord + StartOfEventId
type FulltextIndexKey struct {
key []byte
endOfWord int
word []byte
eventid *eventid.T
pubkey []byte
timestamp *timestamp.T
kind *kind.T
sequence uint32
serial *serial.T
}
func NewFulltextIndexKey(key []byte) (idx *FulltextIndexKey, err error) {
if len(key) < Len {
err = errorf.E("fulltext index key is too short, got %d, minimum is %d", len(key), Len)
return
}
idx = &FulltextIndexKey{key: key, endOfWord: len(key) - StartOfEventId}
return
}
func (f *FulltextIndexKey) Segment(start, end int) []byte {
return f.key[len(f.key)-start : len(f.key)-end]
}
func (f *FulltextIndexKey) Word() (v []byte) {
v = f.key[index.Len:f.endOfWord]
f.word = v
return
}
func (f *FulltextIndexKey) EventId() (v *eventid.T) {
v = eventid.NewWith(f.Segment(StartOfEventId, StartOfPubkey))
f.eventid = v
return
}
func (f *FulltextIndexKey) Pubkey() (v []byte) {
v = f.Segment(StartOfPubkey, StartOfTimestamp)
f.pubkey = v
return
}
func (f *FulltextIndexKey) Timestamp() (v *timestamp.T) {
v = timestamp.FromBytes(f.Segment(StartOfTimestamp, StartOfKind))
f.timestamp = v
return
}
func (f *FulltextIndexKey) Kind() (v *kind.T) {
v = kind.NewFromBytes(f.Segment(StartOfKind, StartOfSequence))
f.kind = v
return
}
func (f *FulltextIndexKey) Sequence() (v *integer.T) {
v = integer.NewFrom(f.Segment(StartOfSequence, StartOfSerial))
f.sequence = v.Val
return
}
func (f *FulltextIndexKey) Serial() (v *serial.T) {
v = serial.New(f.Segment(StartOfSerial, len(f.key)))
f.serial = v
return
}

View File

@@ -1,21 +0,0 @@
package prefixes
import (
"bytes"
"testing"
"realy.lol/ratel/keys/index"
)
func TestT(t *testing.T) {
v := Version.Key()
// v := New(n)
// buf := new(bytes.Buffer)
// v.Write(buf)
buf2 := bytes.NewBuffer(v)
v2 := index.New(0)
el := v2.Read(buf2).(*index.T)
if el.Val[0] != v[0] {
t.Fatalf("expected %d got %d", v[0], el.Val)
}
}

View File

@@ -107,7 +107,7 @@ const (
// from its created_at field. The serial acts as a "first seen" ordering, then
// you also have the (claimed) chronological ordering.
//
// [ 13 ][ 8 bytes Serial ][ 32 bytes eventid.T ][ 32 bytes pubkey ][ 8 bytes timestamp.T ]
// [ 13 ][ 8 bytes Serial ][ 32 bytes eventid.T ][ 32 bytes pubkey ][ 8 bytes timestamp.T
FullIndex
// Configuration is a free-form minified JSON object that contains a collection of
@@ -120,7 +120,10 @@ const (
// The keys are written with the word and serial to make handling them easier, even though
// it's a little bigger, it's much faster to search, which is what matters.
//
// [ 15 ][ word ][ serial ]
// The index contains pubkey, event ID, timestamp and kind, to enable filtering out results
// by all of these as expected from a regular filter search.
//
// [ 15 ][ word ][ 32 bytes eventid.T ][ 32 bytes pubkey ][ 8 bytes timestamp.T ][ 2 bytes kind ][ 4 bytes sequence number of word in text ][ 8 bytes Serial ]
FulltextIndex
// LangIndex is an index of events with language tags. These use ISO639-2 3-letter codes
@@ -160,6 +163,8 @@ var AllPrefixes = [][]byte{
{PubkeyIndex.B()},
{FullIndex.B()},
{Configuration.B()},
{FulltextIndex.B()},
{LangIndex.B()},
}
// KeySizes are the byte size of keys of each type of key prefix. int(P) or call the P.I() method

View File

@@ -217,6 +217,7 @@ func (r *T) ProcessFoundEvent(item *badger.Item, delEvs [][]byte) (dEvs [][]byte
func (r *T) UpdateAccessed(accessed map[string]struct{}) {
var err error
now := timestamp.Now()
retry:
if err = r.Update(func(txn *badger.Txn) (err error) {
for ser := range accessed {
seri := serial.New([]byte(ser))
@@ -231,7 +232,7 @@ func (r *T) UpdateAccessed(accessed map[string]struct{}) {
}
}
return nil
}); chk.E(err) {
return
}); err != nil {
goto retry
}
}

View File

@@ -146,6 +146,7 @@ func (r *T) GenerateFulltextIndex(ev *event.T, ser *serial.T) (err error) {
w = &Words{
ser: ser,
wordMap: ww,
ev: ev,
}
// log.I.F("indexing words: %v", w.wordMap)
if err = r.WriteFulltextIndex(w); chk.E(err) {

View File

@@ -3,15 +3,12 @@ package realy
import (
"bytes"
"realy.lol/chk"
"realy.lol/context"
"realy.lol/ec/schnorr"
"realy.lol/event"
"realy.lol/hex"
"realy.lol/kind"
"realy.lol/log"
"realy.lol/tag"
"realy.lol/tag/atag"
)
func (s *Server) acceptEvent(c context.T, evt *event.T, authedPubkey []byte,
@@ -52,66 +49,67 @@ func (s *Server) acceptEvent(c context.T, evt *event.T, authedPubkey []byte,
return false, notice, nil
}
}
for _, o := range s.owners {
log.T.F("%0x,%0x", o, evt.Pubkey)
if bytes.Equal(o, evt.Pubkey) {
// prevent owners from deleting their own mute/follow lists in case of bad
// client implementation
if evt.Kind.Equal(kind.Deletion) {
// check all a tags present are not follow/mute lists of the owners
aTags := evt.Tags.GetAll(tag.New("a"))
for _, at := range aTags.ToSliceOfTags() {
a := &atag.T{}
var rem []byte
var err error
if rem, err = a.Unmarshal(at.Value()); chk.E(err) {
continue
}
if len(rem) > 0 {
log.I.S("remainder", evt, rem)
}
if a.Kind.Equal(kind.Deletion) {
// we don't delete delete events, period
return false, "delete event kind may not be deleted", nil
}
// if the kind is not parameterised replaceable, the tag is invalid and the
// delete event will not be saved.
if !a.Kind.IsParameterizedReplaceable() {
return false, "delete tags with a tags containing " +
"non-parameterized-replaceable events cannot be processed", nil
}
for _, own := range s.owners {
// don't allow owners to delete their mute or follow lists because
// they should not want to, can simply replace it, and malicious
// clients may do this specifically to attack the owner's realy (s)
if bytes.Equal(own, a.PubKey) ||
a.Kind.Equal(kind.MuteList) ||
a.Kind.Equal(kind.FollowList) {
notice = "owners may not delete their own " +
"mute or follow lists, they can be replaced"
log.I.F("%s %s", remote, notice)
return false, notice, nil
}
}
}
return
}
log.W.Ln("event is from owner")
// for _, o := range s.owners {
// log.T.F("%0x,%0x", o, evt.Pubkey)
// if bytes.Equal(o, evt.Pubkey) {
// // prevent owners from deleting their own mute/follow lists in case of bad
// // client implementation
// if evt.Kind.Equal(kind.Deletion) {
// // check all a tags present are not follow/mute lists of the owners
// aTags := evt.Tags.GetAll(tag.New("a"))
// for _, at := range aTags.ToSliceOfTags() {
// a := &atag.T{}
// var rem []byte
// var err error
// if rem, err = a.Unmarshal(at.Value()); chk.E(err) {
// continue
// }
// if len(rem) > 0 {
// log.I.S("remainder", evt, rem)
// }
// if a.Kind.Equal(kind.Deletion) {
// // we don't delete delete events, period
// return false, "delete event kind may not be deleted", nil
// }
// // if the kind is not parameterised replaceable, the tag is invalid and the
// // delete event will not be saved.
// if !a.Kind.IsParameterizedReplaceable() {
// return false, "delete tags with a tags containing " +
// "non-parameterized-replaceable events cannot be processed", nil
// }
// for _, own := range s.owners {
// // don't allow owners to delete their mute or follow lists because
// // they should not want to, can simply replace it, and malicious
// // clients may do this specifically to attack the owner's realy (s)
// if bytes.Equal(own, a.PubKey) ||
// a.Kind.Equal(kind.MuteList) ||
// a.Kind.Equal(kind.FollowList) {
// notice = "owners may not delete their own " +
// "mute or follow lists, they can be replaced"
// log.I.F("%s %s", remote, notice)
// return false, notice, nil
// }
// }
// }
// return
// }
// // log.W.Ln("event is from owner")
// // accept = true
// return
// }
// }
// check the authed pubkey is in the follow list
for pk := range s.followed {
// allow all events from follows of owners
if bytes.Equal(authedPubkey, []byte(pk)) {
log.I.F("accepting event %0x because %0x on owner follow list",
evt.Id, []byte(pk))
// log.I.S([]byte(pk))
accept = true
return
}
// for all else, check the authed pubkey is in the follow list
for pk := range s.followed {
// allow all events from follows of owners
if bytes.Equal(authedPubkey, []byte(pk)) {
log.I.F("accepting event %0x because %0x on owner follow list",
evt.Id, []byte(pk))
accept = true
return
}
}
log.E.F("did not find pubkey in followed list %0x", authedPubkey)
}
log.E.F("did not find pubkey in followed list %0x", authedPubkey)
}
// if auth is enabled and there is no moderators we just check that the pubkey
// has been loaded via the auth function.

View File

@@ -33,7 +33,7 @@ func (s *Server) UpdateConfiguration() (err error) {
continue
}
dst := make([]byte, len(src)/2)
if _, err = hex.DecBytes(dst, []byte(src)); chk.E(err) {
if _, err = hex.DecBytes(dst, []byte(src)); err != nil {
if dst, err = bech32encoding.NpubToBytes([]byte(src)); chk.E(err) {
continue
}
@@ -54,7 +54,7 @@ func (s *Server) UpdateConfiguration() (err error) {
continue
}
dst := make([]byte, len(src)/2)
if _, err = hex.DecBytes(dst, []byte(src)); chk.E(err) {
if _, err = hex.DecBytes(dst, []byte(src)); err != nil {
if dst, err = bech32encoding.NpubToBytes([]byte(src)); chk.E(err) {
continue
}

View File

@@ -129,6 +129,7 @@ func (s *Server) CheckOwnerLists(c context.T) {
if p, err = hex.Dec(string(t.Value())); chk.E(err) {
continue
}
// log.I.F("muted %0x", p)
s.muted[string(p)] = struct{}{}
}
}

View File

@@ -14,6 +14,8 @@ import (
"realy.lol/ints"
)
const Len = 8
// T is a convenience type for UNIX 64 bit timestamps of 1 second
// precision.
type T struct{ V int64 }
@@ -65,7 +67,7 @@ func (t *T) Int() int {
// Bytes returns a timestamp as an 8 byte thing.
func (t *T) Bytes() (b []byte) {
b = make([]byte, 8)
binary.BigEndian.PutUint64(b, uint64(t.V))
binary.LittleEndian.PutUint64(b, uint64(t.V))
return
}
@@ -78,7 +80,7 @@ func FromUnix(t int64) *T { return &T{t} }
func (t *T) FromInt(i int) { *t = T{int64(i)} }
// FromBytes converts from a string of raw bytes.
func FromBytes(b []byte) *T { return &T{int64(binary.BigEndian.Uint64(b))} }
func FromBytes(b []byte) *T { return &T{int64(binary.LittleEndian.Uint64(b))} }
// FromVarint decodes a varint and returns the remainder of the bytes and the encoded
// timestamp.T.