make lmdb and badger backends use a binary encoding instead of nson.

This commit is contained in:
fiatjaf
2023-11-02 15:35:55 -03:00
parent 16258ae338
commit ddb950d8b5
10 changed files with 28 additions and 28 deletions

91
lmdb/count.go Normal file
View File

@@ -0,0 +1,91 @@
package lmdb
import (
"bytes"
"context"
"encoding/binary"
"github.com/bmatsuo/lmdb-go/lmdb"
"github.com/nbd-wtf/go-nostr"
nostr_binary "github.com/nbd-wtf/go-nostr/binary"
)
func (b *LMDBBackend) CountEvents(ctx context.Context, filter nostr.Filter) (int64, error) {
var count int64 = 0
dbi, queries, extraFilter, since, prefixLen, err := b.prepareQueries(filter)
if err != nil {
return 0, err
}
err = b.lmdbEnv.View(func(txn *lmdb.Txn) error {
// actually iterate
for _, q := range queries {
cursor, err := txn.OpenCursor(dbi)
if err != nil {
continue
}
var k []byte
var idx []byte
var iterr error
if _, _, errsr := cursor.Get(q.startingPoint, nil, lmdb.SetRange); errsr != nil {
if operr, ok := errsr.(*lmdb.OpError); !ok || operr.Errno != lmdb.NotFound {
// in this case it's really an error
panic(err)
} else {
// we're at the end and we just want notes before this,
// so we just need to set the cursor the last key, this is not a real error
k, idx, iterr = cursor.Get(nil, nil, lmdb.Last)
}
} else {
// move one back as the first step
k, idx, iterr = cursor.Get(nil, nil, lmdb.Prev)
}
for {
// we already have a k and a v and an err from the cursor setup, so check and use these
if iterr != nil || !bytes.Equal(q.prefix, k[0:prefixLen]) {
break
}
if !q.skipTimestamp {
createdAt := binary.BigEndian.Uint32(k[prefixLen:])
if createdAt < since {
break
}
}
// fetch actual event
val, err := txn.Get(b.rawEventStore, idx)
if err != nil {
panic(err)
}
if extraFilter == nil {
count++
} else {
evt := &nostr.Event{}
if err := nostr_binary.Unmarshal(val, evt); err != nil {
return err
}
// check if this matches the other filters that were not part of the index
if extraFilter == nil || extraFilter.Matches(evt) {
count++
}
return nil
}
// move one back (we'll look into k and v and err in the next iteration)
k, idx, iterr = cursor.Get(nil, nil, lmdb.Prev)
}
}
return nil
})
return count, err
}

50
lmdb/delete.go Normal file
View File

@@ -0,0 +1,50 @@
package lmdb
import (
"context"
"encoding/hex"
"github.com/bmatsuo/lmdb-go/lmdb"
"github.com/nbd-wtf/go-nostr"
nostr_binary "github.com/nbd-wtf/go-nostr/binary"
)
func (b *LMDBBackend) DeleteEvent(ctx context.Context, evt *nostr.Event) error {
err := b.lmdbEnv.Update(func(txn *lmdb.Txn) error {
id, _ := hex.DecodeString(evt.ID)
idx, err := txn.Get(b.indexId, id)
if operr, ok := err.(*lmdb.OpError); ok && operr.Errno == lmdb.NotFound {
// we already do not have this
return nil
}
if err != nil {
return err
}
// fetch the event
val, err := txn.Get(b.rawEventStore, idx)
if err != nil {
return err
}
evt := &nostr.Event{}
if err := nostr_binary.Unmarshal(val, evt); err != nil {
return err
}
// calculate all index keys we have for this event and delete them
for _, k := range b.getIndexKeysForEvent(evt) {
if err := txn.Del(k.dbi, k.key, nil); err != nil {
return err
}
}
// delete the raw event
return txn.Del(b.rawEventStore, idx, nil)
})
if err != nil {
return err
}
return nil
}

211
lmdb/lib.go Normal file
View File

@@ -0,0 +1,211 @@
package lmdb
import (
"encoding/binary"
"encoding/hex"
"sync/atomic"
"github.com/bmatsuo/lmdb-go/lmdb"
"github.com/fiatjaf/eventstore"
"github.com/nbd-wtf/go-nostr"
)
const (
maxuint16 = 65535
maxuint32 = 4294967295
)
var _ eventstore.Store = (*LMDBBackend)(nil)
type LMDBBackend struct {
Path string
MaxLimit int
lmdbEnv *lmdb.Env
rawEventStore lmdb.DBI
indexCreatedAt lmdb.DBI
indexId lmdb.DBI
indexKind lmdb.DBI
indexPubkey lmdb.DBI
indexPubkeyKind lmdb.DBI
indexTag lmdb.DBI
lastId atomic.Uint32
}
func (b *LMDBBackend) Init() error {
if b.MaxLimit == 0 {
b.MaxLimit = 500
}
// open lmdb
env, err := lmdb.NewEnv()
if err != nil {
return err
}
env.SetMaxDBs(7)
env.SetMaxReaders(500)
env.SetMapSize(1 << 38) // ~273GB
err = env.Open(b.Path, lmdb.NoTLS, 0644)
if err != nil {
return err
}
b.lmdbEnv = env
// open each db
if err := b.lmdbEnv.Update(func(txn *lmdb.Txn) error {
if dbi, err := txn.OpenDBI("raw", lmdb.Create); err != nil {
return err
} else {
b.rawEventStore = dbi
return nil
}
}); err != nil {
return err
}
if err := b.lmdbEnv.Update(func(txn *lmdb.Txn) error {
if dbi, err := txn.OpenDBI("created_at", lmdb.Create); err != nil {
return err
} else {
b.indexCreatedAt = dbi
}
if dbi, err := txn.OpenDBI("id", lmdb.Create); err != nil {
return err
} else {
b.indexId = dbi
}
if dbi, err := txn.OpenDBI("kind", lmdb.Create); err != nil {
return err
} else {
b.indexKind = dbi
}
if dbi, err := txn.OpenDBI("pubkey", lmdb.Create); err != nil {
return err
} else {
b.indexPubkey = dbi
}
if dbi, err := txn.OpenDBI("pubkeyKind", lmdb.Create); err != nil {
return err
} else {
b.indexPubkeyKind = dbi
}
if dbi, err := txn.OpenDBI("tag", lmdb.Create); err != nil {
return err
} else {
b.indexTag = dbi
}
return nil
}); err != nil {
return err
}
// get lastId
if err := b.lmdbEnv.View(func(txn *lmdb.Txn) error {
txn.RawRead = true
cursor, err := txn.OpenCursor(b.rawEventStore)
if err != nil {
return err
}
defer cursor.Close()
k, _, err := cursor.Get(nil, nil, lmdb.Last)
if operr, ok := err.(*lmdb.OpError); ok && operr.Errno == lmdb.NotFound {
// nothing found, so we're at zero
return nil
}
if err != nil {
}
b.lastId.Store(binary.BigEndian.Uint32(k))
return nil
}); err != nil {
return err
}
return nil
}
func (b *LMDBBackend) Close() {
b.lmdbEnv.Close()
}
func (b *LMDBBackend) Serial() []byte {
v := b.lastId.Add(1)
vb := make([]byte, 4)
binary.BigEndian.PutUint32(vb[:], uint32(v))
return vb
}
type key struct {
dbi lmdb.DBI
key []byte
}
func (b *LMDBBackend) getIndexKeysForEvent(evt *nostr.Event) []key {
keys := make([]key, 0, 18)
// indexes
{
// ~ by id
k, _ := hex.DecodeString(evt.ID)
keys = append(keys, key{dbi: b.indexId, key: k})
}
{
// ~ by pubkey+date
pubkey, _ := hex.DecodeString(evt.PubKey)
k := make([]byte, 32+4)
copy(k[:], pubkey)
binary.BigEndian.PutUint32(k[32:], uint32(evt.CreatedAt))
keys = append(keys, key{dbi: b.indexPubkey, key: k})
}
{
// ~ by kind+date
k := make([]byte, 2+4)
binary.BigEndian.PutUint16(k[:], uint16(evt.Kind))
binary.BigEndian.PutUint32(k[2:], uint32(evt.CreatedAt))
keys = append(keys, key{dbi: b.indexKind, key: k})
}
{
// ~ by pubkey+kind+date
pubkey, _ := hex.DecodeString(evt.PubKey)
k := make([]byte, 32+2+4)
copy(k[:], pubkey)
binary.BigEndian.PutUint16(k[32:], uint16(evt.Kind))
binary.BigEndian.PutUint32(k[32+2:], uint32(evt.CreatedAt))
keys = append(keys, key{dbi: b.indexPubkeyKind, key: k})
}
// ~ by tagvalue+date
for _, tag := range evt.Tags {
if len(tag) < 2 || len(tag[0]) != 1 || len(tag[1]) == 0 || len(tag[1]) > 100 {
continue
}
var v []byte
if vb, _ := hex.DecodeString(tag[1]); len(vb) == 32 {
// store value as bytes
v = vb
} else {
v = []byte(tag[1])
}
k := make([]byte, len(v)+4)
copy(k[:], v)
binary.BigEndian.PutUint32(k[len(v):], uint32(evt.CreatedAt))
keys = append(keys, key{dbi: b.indexTag, key: k})
}
{
// ~ by date only
k := make([]byte, 4)
binary.BigEndian.PutUint32(k[:], uint32(evt.CreatedAt))
keys = append(keys, key{dbi: b.indexCreatedAt, key: k})
}
return keys
}

321
lmdb/query.go Normal file
View File

@@ -0,0 +1,321 @@
package lmdb
import (
"bytes"
"container/heap"
"context"
"encoding/binary"
"encoding/hex"
"fmt"
"github.com/bmatsuo/lmdb-go/lmdb"
"github.com/nbd-wtf/go-nostr"
nostr_binary "github.com/nbd-wtf/go-nostr/binary"
)
type query struct {
i int
prefix []byte
startingPoint []byte
results chan *nostr.Event
skipTimestamp bool
}
type queryEvent struct {
*nostr.Event
query int
}
func (b *LMDBBackend) QueryEvents(ctx context.Context, filter nostr.Filter) (chan *nostr.Event, error) {
ch := make(chan *nostr.Event)
dbi, queries, extraFilter, since, prefixLen, err := b.prepareQueries(filter)
if err != nil {
return nil, err
}
go func() {
err := b.lmdbEnv.View(func(txn *lmdb.Txn) error {
// actually iterate
cursorClosers := make([]func(), len(queries))
for i, q := range queries {
go func(i int, q query) {
defer close(q.results)
cursor, err := txn.OpenCursor(dbi)
if err != nil {
return
}
cursorClosers[i] = cursor.Close
var k []byte
var idx []byte
var iterr error
if _, _, errsr := cursor.Get(q.startingPoint, nil, lmdb.SetRange); errsr != nil {
if operr, ok := errsr.(*lmdb.OpError); !ok || operr.Errno != lmdb.NotFound {
// in this case it's really an error
panic(err)
} else {
// we're at the end and we just want notes before this,
// so we just need to set the cursor the last key, this is not a real error
k, idx, iterr = cursor.Get(nil, nil, lmdb.Last)
}
} else {
// move one back as the first step
k, idx, iterr = cursor.Get(nil, nil, lmdb.Prev)
}
for {
select {
case <-ctx.Done():
break
default:
}
// we already have a k and a v and an err from the cursor setup, so check and use these
if iterr != nil || !bytes.Equal(q.prefix, k[0:prefixLen]) {
return
}
if !q.skipTimestamp {
createdAt := binary.BigEndian.Uint32(k[prefixLen:])
if createdAt < since {
break
}
}
// fetch actual event
val, err := txn.Get(b.rawEventStore, idx)
if err != nil {
panic(err)
}
evt := &nostr.Event{}
if err := nostr_binary.Unmarshal(val, evt); err != nil {
panic(err)
}
// check if this matches the other filters that were not part of the index
if extraFilter == nil || extraFilter.Matches(evt) {
q.results <- evt
}
// move one back (we'll look into k and v and err in the next iteration)
k, idx, iterr = cursor.Get(nil, nil, lmdb.Prev)
}
}(i, q)
}
// max number of events we'll return
limit := b.MaxLimit
if filter.Limit > 0 && filter.Limit < limit {
limit = filter.Limit
}
// receive results and ensure we only return the most recent ones always
emittedEvents := 0
// first pass
emitQueue := make(priorityQueue, 0, len(queries)+limit)
for _, q := range queries {
evt, ok := <-q.results
if ok {
emitQueue = append(emitQueue, &queryEvent{Event: evt, query: q.i})
}
}
// now it's a good time to schedule this
defer func() {
close(ch)
for _, cclose := range cursorClosers {
cclose()
}
}()
// queue may be empty here if we have literally nothing
if len(emitQueue) == 0 {
return nil
}
heap.Init(&emitQueue)
// iterate until we've emitted all events required
for {
// emit latest event in queue
latest := emitQueue[0]
ch <- latest.Event
// stop when reaching limit
emittedEvents++
if emittedEvents >= limit {
break
}
// fetch a new one from query results and replace the previous one with it
if evt, ok := <-queries[latest.query].results; ok {
emitQueue[0].Event = evt
heap.Fix(&emitQueue, 0)
} else {
// if this query has no more events we just remove this and proceed normally
heap.Remove(&emitQueue, 0)
// check if the list is empty and end
if len(emitQueue) == 0 {
break
}
}
}
return nil
})
if err != nil {
panic(err)
}
}()
return ch, nil
}
type priorityQueue []*queryEvent
func (pq priorityQueue) Len() int { return len(pq) }
func (pq priorityQueue) Less(i, j int) bool {
return pq[i].CreatedAt > pq[j].CreatedAt
}
func (pq priorityQueue) Swap(i, j int) {
pq[i], pq[j] = pq[j], pq[i]
}
func (pq *priorityQueue) Push(x any) {
item := x.(*queryEvent)
*pq = append(*pq, item)
}
func (pq *priorityQueue) Pop() any {
old := *pq
n := len(old)
item := old[n-1]
old[n-1] = nil // avoid memory leak
*pq = old[0 : n-1]
return item
}
func (b *LMDBBackend) prepareQueries(filter nostr.Filter) (
dbi lmdb.DBI,
queries []query,
extraFilter *nostr.Filter,
since uint32,
prefixLen int,
err error,
) {
if len(filter.IDs) > 0 {
dbi = b.indexId
queries = make([]query, len(filter.IDs))
for i, idHex := range filter.IDs {
prefix, _ := hex.DecodeString(idHex)
if len(prefix) != 32 {
return dbi, nil, nil, 0, 0, fmt.Errorf("invalid id '%s'", idHex)
}
queries[i] = query{i: i, prefix: prefix, skipTimestamp: true}
}
} else if len(filter.Authors) > 0 {
if len(filter.Kinds) == 0 {
dbi = b.indexPubkey
queries = make([]query, len(filter.Authors))
for i, pubkeyHex := range filter.Authors {
prefix, _ := hex.DecodeString(pubkeyHex)
if len(prefix) != 32 {
return dbi, nil, nil, 0, 0, fmt.Errorf("invalid pubkey '%s'", pubkeyHex)
}
queries[i] = query{i: i, prefix: prefix}
}
} else {
dbi = b.indexPubkeyKind
queries = make([]query, len(filter.Authors)*len(filter.Kinds))
i := 0
for _, pubkeyHex := range filter.Authors {
for _, kind := range filter.Kinds {
pubkey, _ := hex.DecodeString(pubkeyHex)
if len(pubkey) != 32 {
return dbi, nil, nil, 0, 0, fmt.Errorf("invalid pubkey '%s'", pubkeyHex)
}
prefix := make([]byte, 32+2)
copy(prefix[:], pubkey)
binary.BigEndian.PutUint16(prefix[+32:], uint16(kind))
queries[i] = query{i: i, prefix: prefix}
i++
}
}
}
extraFilter = &nostr.Filter{Tags: filter.Tags}
} else if len(filter.Tags) > 0 {
dbi = b.indexTag
// determine the size of the queries array by inspecting all tags sizes
size := 0
for _, values := range filter.Tags {
size += len(values)
}
queries = make([]query, size)
extraFilter = &nostr.Filter{Kinds: filter.Kinds}
i := 0
for _, values := range filter.Tags {
for _, value := range values {
bv, _ := hex.DecodeString(value)
var size int
if len(bv) == 32 {
// hex tag
size = 32
} else {
// string tag
bv = []byte(value)
size = len(bv)
}
prefix := make([]byte, size)
copy(prefix[:], bv)
queries[i] = query{i: i, prefix: prefix}
i++
}
}
} else if len(filter.Kinds) > 0 {
dbi = b.indexKind
queries = make([]query, len(filter.Kinds))
for i, kind := range filter.Kinds {
prefix := make([]byte, 2)
binary.BigEndian.PutUint16(prefix[:], uint16(kind))
queries[i] = query{i: i, prefix: prefix}
}
} else {
dbi = b.indexCreatedAt
queries = make([]query, 1)
prefix := make([]byte, 0)
queries[0] = query{i: 0, prefix: prefix}
extraFilter = nil
}
prefixLen = len(queries[0].prefix)
var until uint32 = 4294967295
if filter.Until != nil {
if fu := uint32(*filter.Until); fu < until {
until = fu + 1
}
}
for i, q := range queries {
queries[i].startingPoint = binary.BigEndian.AppendUint32(q.prefix, uint32(until))
queries[i].results = make(chan *nostr.Event, 12)
}
// this is where we'll end the iteration
if filter.Since != nil {
if fs := uint32(*filter.Since); fs > since {
since = fs
}
}
return dbi, queries, extraFilter, since, prefixLen, nil
}

38
lmdb/save.go Normal file
View File

@@ -0,0 +1,38 @@
package lmdb
import (
"context"
"fmt"
"github.com/bmatsuo/lmdb-go/lmdb"
"github.com/nbd-wtf/go-nostr"
nostr_binary "github.com/nbd-wtf/go-nostr/binary"
)
func (b *LMDBBackend) SaveEvent(ctx context.Context, evt *nostr.Event) error {
// sanity checking
if evt.CreatedAt > maxuint32 || evt.Kind > maxuint16 {
return fmt.Errorf("event with values out of expected boundaries")
}
return b.lmdbEnv.Update(func(txn *lmdb.Txn) error {
bin, err := nostr_binary.Marshal(evt)
if err != nil {
return err
}
idx := b.Serial()
// raw event store
if err := txn.Put(b.rawEventStore, idx, bin, 0); err != nil {
return err
}
for _, k := range b.getIndexKeysForEvent(evt) {
if err := txn.Put(k.dbi, k.key, idx, 0); err != nil {
return err
}
}
return nil
})
}