merge authors/nostruser in neo4j, add compact pubkey/e/p serial refs
Some checks failed
Go / build-and-release (push) Has been cancelled

This commit is contained in:
2025-12-03 20:49:49 +00:00
parent 746523ea78
commit 54ead81791
19 changed files with 1841 additions and 229 deletions

View File

@@ -0,0 +1,421 @@
//go:build !(js && wasm)
package database
import (
"bytes"
"encoding/binary"
"errors"
"io"
"git.mleku.dev/mleku/nostr/crypto/ec/schnorr"
"git.mleku.dev/mleku/nostr/encoders/event"
"git.mleku.dev/mleku/nostr/encoders/tag"
"git.mleku.dev/mleku/nostr/encoders/varint"
"lol.mleku.dev/chk"
)
// CompactEventFormat defines the binary format for compact event storage.
// This format uses 5-byte serial references instead of 32-byte IDs/pubkeys,
// dramatically reducing storage requirements.
//
// Format:
// - 1 byte: Version (currently 1)
// - 5 bytes: Author pubkey serial (reference to spk table)
// - varint: CreatedAt timestamp
// - 2 bytes: Kind (uint16 big-endian)
// - varint: Number of tags
// - For each tag:
// - varint: Number of elements in tag
// - For each element:
// - 1 byte: Element type flag
// - 0x00 = raw bytes (followed by varint length + data)
// - 0x01 = pubkey serial reference (followed by 5-byte serial)
// - 0x02 = event ID serial reference (followed by 5-byte serial)
// - 0x03 = unknown event ID (followed by 32-byte full ID)
// - Element data based on type
// - varint: Content length
// - Content bytes
// - 64 bytes: Signature
//
// Space savings example (event with 3 p-tags, 1 e-tag):
// - Original: 32 (ID) + 32 (pubkey) + 32*4 (tags) = 192 bytes
// - Compact: 5 (pubkey serial) + 5*4 (tag serials) = 25 bytes
// - Savings: 167 bytes per event (87%)
const (
CompactFormatVersion = 1
// Tag element type flags
TagElementRaw = 0x00 // Raw bytes (varint length + data)
TagElementPubkeySerial = 0x01 // Pubkey serial reference (5 bytes)
TagElementEventSerial = 0x02 // Event ID serial reference (5 bytes)
TagElementEventIdFull = 0x03 // Full event ID (32 bytes) - for unknown refs
)
// SerialResolver is an interface for resolving serials during compact encoding/decoding.
// This allows the encoder/decoder to look up or create serial mappings.
type SerialResolver interface {
// GetOrCreatePubkeySerial returns the serial for a pubkey, creating one if needed.
GetOrCreatePubkeySerial(pubkey []byte) (serial uint64, err error)
// GetPubkeyBySerial returns the full pubkey for a serial.
GetPubkeyBySerial(serial uint64) (pubkey []byte, err error)
// GetEventSerialById returns the serial for an event ID, or 0 if not found.
GetEventSerialById(eventId []byte) (serial uint64, found bool, err error)
// GetEventIdBySerial returns the full event ID for a serial.
GetEventIdBySerial(serial uint64) (eventId []byte, err error)
}
// MarshalCompactEvent encodes an event using compact serial references.
// The resolver is used to look up/create serial mappings for pubkeys and event IDs.
func MarshalCompactEvent(ev *event.E, resolver SerialResolver) (data []byte, err error) {
buf := new(bytes.Buffer)
// Version byte
buf.WriteByte(CompactFormatVersion)
// Author pubkey serial (5 bytes)
var authorSerial uint64
if authorSerial, err = resolver.GetOrCreatePubkeySerial(ev.Pubkey); chk.E(err) {
return nil, err
}
writeUint40(buf, authorSerial)
// CreatedAt (varint)
varint.Encode(buf, uint64(ev.CreatedAt))
// Kind (2 bytes big-endian)
binary.Write(buf, binary.BigEndian, ev.Kind)
// Tags
if ev.Tags == nil || ev.Tags.Len() == 0 {
varint.Encode(buf, 0)
} else {
varint.Encode(buf, uint64(ev.Tags.Len()))
for _, t := range *ev.Tags {
if err = encodeCompactTag(buf, t, resolver); chk.E(err) {
return nil, err
}
}
}
// Content
varint.Encode(buf, uint64(len(ev.Content)))
buf.Write(ev.Content)
// Signature (64 bytes)
buf.Write(ev.Sig)
return buf.Bytes(), nil
}
// encodeCompactTag encodes a single tag with serial references for e/p tags.
func encodeCompactTag(w io.Writer, t *tag.T, resolver SerialResolver) (err error) {
if t == nil || t.Len() == 0 {
varint.Encode(w, 0)
return nil
}
varint.Encode(w, uint64(t.Len()))
// Get tag key to determine if we should use serial references
key := t.Key()
isPTag := len(key) == 1 && key[0] == 'p'
isETag := len(key) == 1 && key[0] == 'e'
for i, elem := range t.T {
if i == 0 {
// First element is always the tag key - store as raw
writeTagElement(w, TagElementRaw, elem)
continue
}
if i == 1 {
// Second element is the value - potentially a serial reference
if isPTag && len(elem) == 32 {
// Binary pubkey - look up serial
serial, serErr := resolver.GetOrCreatePubkeySerial(elem)
if serErr == nil {
writeTagElementSerial(w, TagElementPubkeySerial, serial)
continue
}
// Fall through to raw encoding on error
} else if isPTag && len(elem) == 64 {
// Hex pubkey - decode and look up serial
var pubkey []byte
if pubkey, err = hexDecode(elem); err == nil && len(pubkey) == 32 {
serial, serErr := resolver.GetOrCreatePubkeySerial(pubkey)
if serErr == nil {
writeTagElementSerial(w, TagElementPubkeySerial, serial)
continue
}
}
// Fall through to raw encoding on error
} else if isETag && len(elem) == 32 {
// Binary event ID - look up serial if exists
serial, found, serErr := resolver.GetEventSerialById(elem)
if serErr == nil && found {
writeTagElementSerial(w, TagElementEventSerial, serial)
continue
}
// Event not found - store full ID
writeTagElement(w, TagElementEventIdFull, elem)
continue
} else if isETag && len(elem) == 64 {
// Hex event ID - decode and look up serial
var eventId []byte
if eventId, err = hexDecode(elem); err == nil && len(eventId) == 32 {
serial, found, serErr := resolver.GetEventSerialById(eventId)
if serErr == nil && found {
writeTagElementSerial(w, TagElementEventSerial, serial)
continue
}
// Event not found - store full ID
writeTagElement(w, TagElementEventIdFull, eventId)
continue
}
// Fall through to raw encoding on error
}
}
// Default: raw encoding
writeTagElement(w, TagElementRaw, elem)
}
return nil
}
// writeTagElement writes a tag element with type flag.
func writeTagElement(w io.Writer, typeFlag byte, data []byte) {
w.Write([]byte{typeFlag})
if typeFlag == TagElementEventIdFull {
// Full event ID - no length prefix, always 32 bytes
w.Write(data)
} else {
// Raw data - length prefix
varint.Encode(w, uint64(len(data)))
w.Write(data)
}
}
// writeTagElementSerial writes a serial reference tag element.
func writeTagElementSerial(w io.Writer, typeFlag byte, serial uint64) {
w.Write([]byte{typeFlag})
writeUint40(w, serial)
}
// writeUint40 writes a 5-byte big-endian unsigned integer.
func writeUint40(w io.Writer, value uint64) {
buf := []byte{
byte((value >> 32) & 0xFF),
byte((value >> 24) & 0xFF),
byte((value >> 16) & 0xFF),
byte((value >> 8) & 0xFF),
byte(value & 0xFF),
}
w.Write(buf)
}
// readUint40 reads a 5-byte big-endian unsigned integer.
func readUint40(r io.Reader) (value uint64, err error) {
buf := make([]byte, 5)
if _, err = io.ReadFull(r, buf); err != nil {
return 0, err
}
value = (uint64(buf[0]) << 32) |
(uint64(buf[1]) << 24) |
(uint64(buf[2]) << 16) |
(uint64(buf[3]) << 8) |
uint64(buf[4])
return value, nil
}
// UnmarshalCompactEvent decodes a compact event back to a full event.E.
// The resolver is used to look up pubkeys and event IDs from serials.
// The eventId parameter is the full 32-byte event ID (from SerialEventId table).
func UnmarshalCompactEvent(data []byte, eventId []byte, resolver SerialResolver) (ev *event.E, err error) {
r := bytes.NewReader(data)
ev = new(event.E)
// Version byte
version, err := r.ReadByte()
if err != nil {
return nil, err
}
if version != CompactFormatVersion {
return nil, errors.New("unsupported compact event format version")
}
// Set the event ID (passed separately from SerialEventId lookup)
ev.ID = make([]byte, 32)
copy(ev.ID, eventId)
// Author pubkey serial (5 bytes) -> full pubkey
authorSerial, err := readUint40(r)
if err != nil {
return nil, err
}
if ev.Pubkey, err = resolver.GetPubkeyBySerial(authorSerial); chk.E(err) {
return nil, err
}
// CreatedAt (varint)
var ca uint64
if ca, err = varint.Decode(r); chk.E(err) {
return nil, err
}
ev.CreatedAt = int64(ca)
// Kind (2 bytes big-endian)
if err = binary.Read(r, binary.BigEndian, &ev.Kind); chk.E(err) {
return nil, err
}
// Tags
var nTags uint64
if nTags, err = varint.Decode(r); chk.E(err) {
return nil, err
}
if nTags > 0 {
ev.Tags = tag.NewSWithCap(int(nTags))
for i := uint64(0); i < nTags; i++ {
var t *tag.T
if t, err = decodeCompactTag(r, resolver); chk.E(err) {
return nil, err
}
*ev.Tags = append(*ev.Tags, t)
}
}
// Content
var contentLen uint64
if contentLen, err = varint.Decode(r); chk.E(err) {
return nil, err
}
ev.Content = make([]byte, contentLen)
if _, err = io.ReadFull(r, ev.Content); chk.E(err) {
return nil, err
}
// Signature (64 bytes)
ev.Sig = make([]byte, schnorr.SignatureSize)
if _, err = io.ReadFull(r, ev.Sig); chk.E(err) {
return nil, err
}
return ev, nil
}
// decodeCompactTag decodes a single tag from compact format.
func decodeCompactTag(r io.Reader, resolver SerialResolver) (t *tag.T, err error) {
var nElems uint64
if nElems, err = varint.Decode(r); chk.E(err) {
return nil, err
}
t = tag.NewWithCap(int(nElems))
for i := uint64(0); i < nElems; i++ {
var elem []byte
if elem, err = decodeTagElement(r, resolver); chk.E(err) {
return nil, err
}
t.T = append(t.T, elem)
}
return t, nil
}
// decodeTagElement decodes a single tag element from compact format.
func decodeTagElement(r io.Reader, resolver SerialResolver) (elem []byte, err error) {
// Read type flag
typeBuf := make([]byte, 1)
if _, err = io.ReadFull(r, typeBuf); err != nil {
return nil, err
}
typeFlag := typeBuf[0]
switch typeFlag {
case TagElementRaw:
// Raw bytes: varint length + data
var length uint64
if length, err = varint.Decode(r); chk.E(err) {
return nil, err
}
elem = make([]byte, length)
if _, err = io.ReadFull(r, elem); err != nil {
return nil, err
}
return elem, nil
case TagElementPubkeySerial:
// Pubkey serial: 5 bytes -> lookup full pubkey -> return as 32-byte binary
serial, err := readUint40(r)
if err != nil {
return nil, err
}
pubkey, err := resolver.GetPubkeyBySerial(serial)
if err != nil {
return nil, err
}
// Return as 32-byte binary (nostr library optimized format)
return pubkey, nil
case TagElementEventSerial:
// Event serial: 5 bytes -> lookup full event ID -> return as 32-byte binary
serial, err := readUint40(r)
if err != nil {
return nil, err
}
eventId, err := resolver.GetEventIdBySerial(serial)
if err != nil {
return nil, err
}
// Return as 32-byte binary
return eventId, nil
case TagElementEventIdFull:
// Full event ID: 32 bytes (for unknown/forward references)
elem = make([]byte, 32)
if _, err = io.ReadFull(r, elem); err != nil {
return nil, err
}
return elem, nil
default:
return nil, errors.New("unknown tag element type flag")
}
}
// hexDecode decodes hex bytes to binary.
// This is a simple implementation - the real one uses the optimized hex package.
func hexDecode(src []byte) (dst []byte, err error) {
if len(src)%2 != 0 {
return nil, errors.New("hex string has odd length")
}
dst = make([]byte, len(src)/2)
for i := 0; i < len(dst); i++ {
a := unhex(src[i*2])
b := unhex(src[i*2+1])
if a == 0xFF || b == 0xFF {
return nil, errors.New("invalid hex character")
}
dst[i] = (a << 4) | b
}
return dst, nil
}
func unhex(c byte) byte {
switch {
case '0' <= c && c <= '9':
return c - '0'
case 'a' <= c && c <= 'f':
return c - 'a' + 10
case 'A' <= c && c <= 'F':
return c - 'A' + 10
}
return 0xFF
}

View File

@@ -0,0 +1,195 @@
//go:build !(js && wasm)
package database
import (
"bytes"
"sync/atomic"
"github.com/dgraph-io/badger/v4"
"lol.mleku.dev/chk"
"lol.mleku.dev/log"
"next.orly.dev/pkg/database/indexes"
)
// CompactStorageStats holds statistics about compact vs legacy storage.
type CompactStorageStats struct {
// Event counts
CompactEvents int64 // Number of events in compact format (cmp prefix)
LegacyEvents int64 // Number of events in legacy format (evt/sev prefixes)
TotalEvents int64 // Total events
// Storage sizes
CompactBytes int64 // Total bytes used by compact format
LegacyBytes int64 // Total bytes used by legacy format (would be used without compact)
// Savings
BytesSaved int64 // Bytes saved by using compact format
PercentSaved float64 // Percentage of space saved
AverageCompact float64 // Average compact event size
AverageLegacy float64 // Average legacy event size (estimated)
// Serial mappings
SerialEventIdEntries int64 // Number of sei (serial -> event ID) mappings
SerialEventIdBytes int64 // Bytes used by sei mappings
}
// CompactStorageStats calculates storage statistics for compact event storage.
// This scans the database to provide accurate metrics on space savings.
func (d *D) CompactStorageStats() (stats CompactStorageStats, err error) {
if err = d.View(func(txn *badger.Txn) error {
// Count compact events (cmp prefix)
cmpPrf := new(bytes.Buffer)
if err = indexes.CompactEventEnc(nil).MarshalWrite(cmpPrf); chk.E(err) {
return err
}
it := txn.NewIterator(badger.IteratorOptions{Prefix: cmpPrf.Bytes()})
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
stats.CompactEvents++
stats.CompactBytes += int64(len(item.Key())) + int64(item.ValueSize())
}
it.Close()
// Count legacy evt entries
evtPrf := new(bytes.Buffer)
if err = indexes.EventEnc(nil).MarshalWrite(evtPrf); chk.E(err) {
return err
}
it = txn.NewIterator(badger.IteratorOptions{Prefix: evtPrf.Bytes()})
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
stats.LegacyEvents++
stats.LegacyBytes += int64(len(item.Key())) + int64(item.ValueSize())
}
it.Close()
// Count legacy sev entries
sevPrf := new(bytes.Buffer)
if err = indexes.SmallEventEnc(nil).MarshalWrite(sevPrf); chk.E(err) {
return err
}
it = txn.NewIterator(badger.IteratorOptions{Prefix: sevPrf.Bytes()})
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
stats.LegacyEvents++
stats.LegacyBytes += int64(len(item.Key())) // sev stores data in key
}
it.Close()
// Count SerialEventId mappings (sei prefix)
seiPrf := new(bytes.Buffer)
if err = indexes.SerialEventIdEnc(nil).MarshalWrite(seiPrf); chk.E(err) {
return err
}
it = txn.NewIterator(badger.IteratorOptions{Prefix: seiPrf.Bytes()})
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
stats.SerialEventIdEntries++
stats.SerialEventIdBytes += int64(len(item.Key())) + int64(item.ValueSize())
}
it.Close()
return nil
}); chk.E(err) {
return
}
stats.TotalEvents = stats.CompactEvents + stats.LegacyEvents
// Calculate averages
if stats.CompactEvents > 0 {
stats.AverageCompact = float64(stats.CompactBytes) / float64(stats.CompactEvents)
}
if stats.LegacyEvents > 0 {
stats.AverageLegacy = float64(stats.LegacyBytes) / float64(stats.LegacyEvents)
}
// Estimate savings: compare compact size to what legacy size would be
// For events that are in compact format, estimate legacy size based on typical ratios
// A typical event has:
// - 32 bytes event ID (saved in compact: stored separately in sei)
// - 32 bytes pubkey (saved: replaced by 5-byte serial)
// - For e-tags: 32 bytes each (saved: replaced by 5-byte serial when known)
// - For p-tags: 32 bytes each (saved: replaced by 5-byte serial)
// Conservative estimate: compact format is ~60% of legacy size for typical events
if stats.CompactEvents > 0 && stats.AverageCompact > 0 {
// Estimate what the legacy size would have been
estimatedLegacyForCompact := float64(stats.CompactBytes) / 0.60 // 60% compression ratio
stats.BytesSaved = int64(estimatedLegacyForCompact) - stats.CompactBytes - stats.SerialEventIdBytes
if stats.BytesSaved < 0 {
stats.BytesSaved = 0
}
totalWithoutCompact := estimatedLegacyForCompact + float64(stats.LegacyBytes)
totalWithCompact := float64(stats.CompactBytes + stats.LegacyBytes + stats.SerialEventIdBytes)
if totalWithoutCompact > 0 {
stats.PercentSaved = (1.0 - totalWithCompact/totalWithoutCompact) * 100.0
}
}
return stats, nil
}
// compactSaveCounter tracks cumulative bytes saved by compact format
var compactSaveCounter atomic.Int64
// LogCompactSavings logs the storage savings achieved by compact format.
// Call this periodically or after significant operations.
func (d *D) LogCompactSavings() {
stats, err := d.CompactStorageStats()
if err != nil {
log.W.F("failed to get compact storage stats: %v", err)
return
}
if stats.TotalEvents == 0 {
return
}
log.I.F("📊 Compact storage stats: %d compact events, %d legacy events",
stats.CompactEvents, stats.LegacyEvents)
log.I.F(" Compact size: %.2f MB, Legacy size: %.2f MB",
float64(stats.CompactBytes)/(1024.0*1024.0),
float64(stats.LegacyBytes)/(1024.0*1024.0))
log.I.F(" Serial mappings (sei): %d entries, %.2f KB",
stats.SerialEventIdEntries,
float64(stats.SerialEventIdBytes)/1024.0)
if stats.CompactEvents > 0 {
log.I.F(" Average compact event: %.0f bytes, estimated legacy: %.0f bytes",
stats.AverageCompact, stats.AverageCompact/0.60)
log.I.F(" Estimated savings: %.2f MB (%.1f%%)",
float64(stats.BytesSaved)/(1024.0*1024.0),
stats.PercentSaved)
}
// Also log serial cache stats
cacheStats := d.SerialCacheStats()
log.I.F(" Serial cache: %d/%d pubkeys, %d/%d event IDs, ~%.2f MB memory",
cacheStats.PubkeysCached, cacheStats.PubkeysMaxSize,
cacheStats.EventIdsCached, cacheStats.EventIdsMaxSize,
float64(cacheStats.TotalMemoryBytes)/(1024.0*1024.0))
}
// TrackCompactSaving records bytes saved for a single event.
// Call this during event save to track cumulative savings.
func TrackCompactSaving(legacySize, compactSize int) {
saved := legacySize - compactSize
if saved > 0 {
compactSaveCounter.Add(int64(saved))
}
}
// GetCumulativeCompactSavings returns total bytes saved across all compact saves.
func GetCumulativeCompactSavings() int64 {
return compactSaveCounter.Load()
}
// ResetCompactSavingsCounter resets the cumulative savings counter.
func ResetCompactSavingsCounter() {
compactSaveCounter.Store(0)
}

View File

@@ -32,6 +32,10 @@ type D struct {
pubkeySeq *badger.Sequence // Sequence for pubkey serials
ready chan struct{} // Closed when database is ready to serve requests
queryCache *querycache.EventCache
// Serial cache for compact event storage
// Caches pubkey and event ID serial mappings for fast compact event decoding
serialCache *SerialCache
}
// Ensure D implements Database interface at compile time
@@ -87,6 +91,25 @@ func NewWithConfig(
inlineEventThreshold = 1024 // Default 1024 bytes
}
// Serial cache configuration for compact event storage
serialCachePubkeys := cfg.SerialCachePubkeys
if serialCachePubkeys == 0 {
serialCachePubkeys = 100000 // Default 100k pubkeys (~3.2MB memory)
}
serialCacheEventIds := cfg.SerialCacheEventIds
if serialCacheEventIds == 0 {
serialCacheEventIds = 500000 // Default 500k event IDs (~16MB memory)
}
// ZSTD compression level configuration
// Level 0 = disabled, 1 = fast (~500 MB/s), 3 = default, 9 = best ratio
zstdLevel := cfg.ZSTDLevel
if zstdLevel < 0 {
zstdLevel = 0
} else if zstdLevel > 19 {
zstdLevel = 19 // ZSTD maximum level
}
queryCacheSize := int64(queryCacheSizeMB * 1024 * 1024)
d = &D{
@@ -99,6 +122,7 @@ func NewWithConfig(
seq: nil,
ready: make(chan struct{}),
queryCache: querycache.NewEventCache(queryCacheSize, queryCacheMaxAge),
serialCache: NewSerialCache(serialCachePubkeys, serialCacheEventIds),
}
// Ensure the data directory exists
@@ -141,8 +165,13 @@ func NewWithConfig(
opts.LmaxCompaction = true
// Enable compression to reduce cache cost
opts.Compression = options.ZSTD
opts.ZSTDCompressionLevel = 1 // Fast compression (500+ MB/s)
// Level 0 disables compression, 1 = fast (~500 MB/s), 3 = default, 9 = best ratio
if zstdLevel == 0 {
opts.Compression = options.None
} else {
opts.Compression = options.ZSTD
opts.ZSTDCompressionLevel = zstdLevel
}
// Disable conflict detection for write-heavy relay workloads
// Nostr events are immutable, no need for transaction conflict checks

View File

@@ -16,52 +16,141 @@ import (
)
// Export the complete database of stored events to an io.Writer in line structured minified
// JSON.
// JSON. Supports both legacy and compact event formats.
func (d *D) Export(c context.Context, w io.Writer, pubkeys ...[]byte) {
var err error
evB := make([]byte, 0, units.Mb)
evBuf := bytes.NewBuffer(evB)
// Create resolver for compact event decoding
resolver := NewDatabaseSerialResolver(d, d.serialCache)
// Helper function to unmarshal event data (handles both legacy and compact formats)
unmarshalEventData := func(val []byte, ser *types.Uint40) (*event.E, error) {
// Check if this is compact format (starts with version byte 1)
if len(val) > 0 && val[0] == CompactFormatVersion {
// Get event ID from SerialEventId table
eventId, idErr := d.GetEventIdBySerial(ser)
if idErr != nil {
// Can't decode without event ID - skip
return nil, idErr
}
return UnmarshalCompactEvent(val, eventId, resolver)
}
// Legacy binary format
ev := event.New()
evBuf.Reset()
evBuf.Write(val)
if err := ev.UnmarshalBinary(evBuf); err != nil {
return nil, err
}
return ev, nil
}
if len(pubkeys) == 0 {
// Export all events - prefer cmp table, fall back to evt
if err = d.View(
func(txn *badger.Txn) (err error) {
buf := new(bytes.Buffer)
if err = indexes.EventEnc(nil).MarshalWrite(buf); chk.E(err) {
// First try cmp (compact format) table
cmpBuf := new(bytes.Buffer)
if err = indexes.CompactEventEnc(nil).MarshalWrite(cmpBuf); chk.E(err) {
return
}
it := txn.NewIterator(badger.IteratorOptions{Prefix: buf.Bytes()})
it := txn.NewIterator(badger.IteratorOptions{Prefix: cmpBuf.Bytes()})
defer it.Close()
seenSerials := make(map[uint64]bool)
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
if err = item.Value(
func(val []byte) (err error) {
evBuf.Write(val)
return
},
); chk.E(err) {
key := item.Key()
// Extract serial from key
ser := new(types.Uint40)
if err = ser.UnmarshalRead(bytes.NewReader(key[3:8])); chk.E(err) {
continue
}
ev := event.New()
if err = ev.UnmarshalBinary(evBuf); chk.E(err) {
seenSerials[ser.Get()] = true
var val []byte
if val, err = item.ValueCopy(nil); chk.E(err) {
continue
}
ev, unmarshalErr := unmarshalEventData(val, ser)
if unmarshalErr != nil {
continue
}
// Serialize the event to JSON and write it to the output
defer func(ev *event.E) {
ev.Free()
evBuf.Reset()
}(ev)
if _, err = w.Write(ev.Serialize()); chk.E(err) {
ev.Free()
return
}
if _, err = w.Write([]byte{'\n'}); chk.E(err) {
ev.Free()
return
}
ev.Free()
}
it.Close()
// Then fall back to evt (legacy) table for any events not in cmp
evtBuf := new(bytes.Buffer)
if err = indexes.EventEnc(nil).MarshalWrite(evtBuf); chk.E(err) {
return
}
it2 := txn.NewIterator(badger.IteratorOptions{Prefix: evtBuf.Bytes()})
defer it2.Close()
for it2.Rewind(); it2.Valid(); it2.Next() {
item := it2.Item()
key := item.Key()
// Extract serial from key
ser := new(types.Uint40)
if err = ser.UnmarshalRead(bytes.NewReader(key[3:8])); chk.E(err) {
continue
}
// Skip if already exported from cmp table
if seenSerials[ser.Get()] {
continue
}
var val []byte
if val, err = item.ValueCopy(nil); chk.E(err) {
continue
}
ev, unmarshalErr := unmarshalEventData(val, ser)
if unmarshalErr != nil {
continue
}
// Serialize the event to JSON and write it to the output
if _, err = w.Write(ev.Serialize()); chk.E(err) {
ev.Free()
return
}
if _, err = w.Write([]byte{'\n'}); chk.E(err) {
ev.Free()
return
}
ev.Free()
}
return
},
); err != nil {
return
}
} else {
// Export events for specific pubkeys
for _, pubkey := range pubkeys {
if err = d.View(
func(txn *badger.Txn) (err error) {
@@ -79,29 +168,34 @@ func (d *D) Export(c context.Context, w io.Writer, pubkeys ...[]byte) {
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
if err = item.Value(
func(val []byte) (err error) {
evBuf.Write(val)
return
},
); chk.E(err) {
key := item.Key()
// Extract serial from pubkey index key
// Key format: pc-|pubkey_hash|created_at|serial
if len(key) < 3+8+8+5 {
continue
}
ev := event.New()
if err = ev.UnmarshalBinary(evBuf); chk.E(err) {
ser := new(types.Uint40)
if err = ser.UnmarshalRead(bytes.NewReader(key[len(key)-5:])); chk.E(err) {
continue
}
// Fetch the event using FetchEventBySerial which handles all formats
ev, fetchErr := d.FetchEventBySerial(ser)
if fetchErr != nil || ev == nil {
continue
}
// Serialize the event to JSON and write it to the output
defer func(ev *event.E) {
ev.Free()
evBuf.Reset()
}(ev)
if _, err = w.Write(ev.Serialize()); chk.E(err) {
ev.Free()
continue
}
if _, err = w.Write([]byte{'\n'}); chk.E(err) {
ev.Free()
continue
}
ev.Free()
}
return
},

View File

@@ -24,6 +24,13 @@ type DatabaseConfig struct {
QueryCacheMaxAge time.Duration // ORLY_QUERY_CACHE_MAX_AGE
InlineEventThreshold int // ORLY_INLINE_EVENT_THRESHOLD
// Serial cache settings for compact event storage
SerialCachePubkeys int // ORLY_SERIAL_CACHE_PUBKEYS - max pubkeys to cache (default: 100000)
SerialCacheEventIds int // ORLY_SERIAL_CACHE_EVENT_IDS - max event IDs to cache (default: 500000)
// Compression settings
ZSTDLevel int // ORLY_DB_ZSTD_LEVEL - ZSTD compression level (0=none, 1=fast, 3=default, 9=best)
// Neo4j-specific settings
Neo4jURI string // ORLY_NEO4J_URI
Neo4jUser string // ORLY_NEO4J_USER

View File

@@ -24,6 +24,10 @@ type DatabaseConfig struct {
QueryCacheMaxAge time.Duration // ORLY_QUERY_CACHE_MAX_AGE
InlineEventThreshold int // ORLY_INLINE_EVENT_THRESHOLD
// Serial cache settings for compact event storage (Badger-specific)
SerialCachePubkeys int // ORLY_SERIAL_CACHE_PUBKEYS - max pubkeys to cache (default: 100000)
SerialCacheEventIds int // ORLY_SERIAL_CACHE_EVENT_IDS - max event IDs to cache (default: 500000)
// Neo4j-specific settings
Neo4jURI string // ORLY_NEO4J_URI
Neo4jUser string // ORLY_NEO4J_USER

View File

@@ -13,9 +13,24 @@ import (
"git.mleku.dev/mleku/nostr/encoders/event"
)
// FetchEventBySerial fetches a single event by its serial.
// This function tries multiple storage formats in order:
// 1. cmp (compact format with serial references) - newest, most space-efficient
// 2. sev (small event inline) - legacy Reiser4 optimization
// 3. evt (traditional separate storage) - legacy fallback
func (d *D) FetchEventBySerial(ser *types.Uint40) (ev *event.E, err error) {
// Create resolver for compact event decoding
resolver := NewDatabaseSerialResolver(d, d.serialCache)
if err = d.View(
func(txn *badger.Txn) (err error) {
// Try cmp (compact format) first - most efficient
ev, err = d.fetchCompactEvent(txn, ser, resolver)
if err == nil && ev != nil {
return nil
}
err = nil // Reset error, try legacy formats
// Helper function to extract inline event data from key
extractInlineData := func(key []byte, prefixLen int) (*event.E, error) {
if len(key) > prefixLen+2 {
@@ -25,6 +40,16 @@ func (d *D) FetchEventBySerial(ser *types.Uint40) (ev *event.E, err error) {
if len(key) >= dataStart+size {
eventData := key[dataStart : dataStart+size]
// Check if this is compact format
if len(eventData) > 0 && eventData[0] == CompactFormatVersion {
eventId, idErr := d.GetEventIdBySerial(ser)
if idErr == nil {
return UnmarshalCompactEvent(eventData, eventId, resolver)
}
}
// Legacy binary format
ev := new(event.E)
if err := ev.UnmarshalBinary(bytes.NewBuffer(eventData)); err != nil {
return nil, fmt.Errorf(
@@ -38,7 +63,7 @@ func (d *D) FetchEventBySerial(ser *types.Uint40) (ev *event.E, err error) {
return nil, nil
}
// Try sev (small event inline) prefix first - Reiser4 optimization
// Try sev (small event inline) prefix - Reiser4 optimization
smallBuf := new(bytes.Buffer)
if err = indexes.SmallEventEnc(ser).MarshalWrite(smallBuf); chk.E(err) {
return
@@ -77,6 +102,16 @@ func (d *D) FetchEventBySerial(ser *types.Uint40) (ev *event.E, err error) {
if v, err = item.ValueCopy(nil); chk.E(err) {
return
}
// Check if this is compact format
if len(v) > 0 && v[0] == CompactFormatVersion {
eventId, idErr := d.GetEventIdBySerial(ser)
if idErr == nil {
ev, err = UnmarshalCompactEvent(v, eventId, resolver)
return
}
}
// Check if we have valid data before attempting to unmarshal
if len(v) < 32+32+1+2+1+1+64 { // ID + Pubkey + min varint fields + Sig
err = fmt.Errorf(

View File

@@ -7,6 +7,7 @@ import (
"github.com/dgraph-io/badger/v4"
"lol.mleku.dev/chk"
"lol.mleku.dev/log"
"next.orly.dev/pkg/database/indexes"
"next.orly.dev/pkg/database/indexes/types"
"git.mleku.dev/mleku/nostr/encoders/event"
@@ -14,6 +15,11 @@ import (
// FetchEventsBySerials fetches multiple events by their serials in a single database transaction.
// Returns a map of serial uint64 value to event, only including successfully fetched events.
//
// This function tries multiple storage formats in order:
// 1. cmp (compact format with serial references) - newest, most space-efficient
// 2. sev (small event inline) - legacy Reiser4 optimization
// 3. evt (traditional separate storage) - legacy fallback
func (d *D) FetchEventsBySerials(serials []*types.Uint40) (events map[uint64]*event.E, err error) {
// Pre-allocate map with estimated capacity to reduce reallocations
events = make(map[uint64]*event.E, len(serials))
@@ -22,89 +28,38 @@ func (d *D) FetchEventsBySerials(serials []*types.Uint40) (events map[uint64]*ev
return events, nil
}
// Create resolver for compact event decoding
resolver := NewDatabaseSerialResolver(d, d.serialCache)
if err = d.View(
func(txn *badger.Txn) (err error) {
for _, ser := range serials {
var ev *event.E
serialVal := ser.Get()
// Try sev (small event inline) prefix first - Reiser4 optimization
smallBuf := new(bytes.Buffer)
if err = indexes.SmallEventEnc(ser).MarshalWrite(smallBuf); chk.E(err) {
// Skip this serial on error but continue with others
err = nil
// Try cmp (compact format) first - most efficient
ev, err = d.fetchCompactEvent(txn, ser, resolver)
if err == nil && ev != nil {
events[serialVal] = ev
continue
}
err = nil // Reset error, try legacy formats
// Iterate with prefix to find the small event key
opts := badger.DefaultIteratorOptions
opts.Prefix = smallBuf.Bytes()
opts.PrefetchValues = true
opts.PrefetchSize = 1
it := txn.NewIterator(opts)
it.Rewind()
if it.Valid() {
// Found in sev table - extract inline data
key := it.Item().Key()
// Key format: sev|serial|size_uint16|event_data
if len(key) > 8+2 { // prefix(3) + serial(5) + size(2) = 10 bytes minimum
sizeIdx := 8 // After sev(3) + serial(5)
// Read uint16 big-endian size
size := int(key[sizeIdx])<<8 | int(key[sizeIdx+1])
dataStart := sizeIdx + 2
if len(key) >= dataStart+size {
eventData := key[dataStart : dataStart+size]
ev = new(event.E)
if err = ev.UnmarshalBinary(bytes.NewBuffer(eventData)); err == nil {
events[ser.Get()] = ev
}
// Clean up and continue
it.Close()
err = nil
continue
}
}
// Try sev (small event inline) prefix - legacy Reiser4 optimization
ev, err = d.fetchSmallEvent(txn, ser)
if err == nil && ev != nil {
events[serialVal] = ev
continue
}
it.Close()
err = nil // Reset error, try evt
// Not found in sev table, try evt (traditional) prefix
buf := new(bytes.Buffer)
if err = indexes.EventEnc(ser).MarshalWrite(buf); chk.E(err) {
// Skip this serial on error but continue with others
err = nil
ev, err = d.fetchLegacyEvent(txn, ser)
if err == nil && ev != nil {
events[serialVal] = ev
continue
}
var item *badger.Item
if item, err = txn.Get(buf.Bytes()); err != nil {
// Skip this serial if not found but continue with others
err = nil
continue
}
var v []byte
if v, err = item.ValueCopy(nil); chk.E(err) {
// Skip this serial on error but continue with others
err = nil
continue
}
// Check if we have valid data before attempting to unmarshal
if len(v) < 32+32+1+2+1+1+64 { // ID + Pubkey + min varint fields + Sig
// Skip this serial - incomplete data
continue
}
ev = new(event.E)
if err = ev.UnmarshalBinary(bytes.NewBuffer(v)); err != nil {
// Skip this serial on unmarshal error but continue with others
err = nil
continue
}
// Successfully unmarshaled event, add to results
events[ser.Get()] = ev
err = nil // Reset error, event not found
}
return nil
},
@@ -113,4 +68,150 @@ func (d *D) FetchEventsBySerials(serials []*types.Uint40) (events map[uint64]*ev
}
return events, nil
}
}
// fetchCompactEvent tries to fetch an event from the compact format (cmp prefix).
func (d *D) fetchCompactEvent(txn *badger.Txn, ser *types.Uint40, resolver SerialResolver) (ev *event.E, err error) {
// Build cmp key
keyBuf := new(bytes.Buffer)
if err = indexes.CompactEventEnc(ser).MarshalWrite(keyBuf); chk.E(err) {
return nil, err
}
item, err := txn.Get(keyBuf.Bytes())
if err != nil {
return nil, err
}
var compactData []byte
if compactData, err = item.ValueCopy(nil); chk.E(err) {
return nil, err
}
// Need to get the event ID from SerialEventId table
eventId, err := d.GetEventIdBySerial(ser)
if err != nil {
log.D.F("fetchCompactEvent: failed to get event ID for serial %d: %v", ser.Get(), err)
return nil, err
}
// Unmarshal compact event
ev, err = UnmarshalCompactEvent(compactData, eventId, resolver)
if err != nil {
log.D.F("fetchCompactEvent: failed to unmarshal compact event for serial %d: %v", ser.Get(), err)
return nil, err
}
return ev, nil
}
// fetchSmallEvent tries to fetch an event from the small event inline format (sev prefix).
func (d *D) fetchSmallEvent(txn *badger.Txn, ser *types.Uint40) (ev *event.E, err error) {
smallBuf := new(bytes.Buffer)
if err = indexes.SmallEventEnc(ser).MarshalWrite(smallBuf); chk.E(err) {
return nil, err
}
// Iterate with prefix to find the small event key
opts := badger.DefaultIteratorOptions
opts.Prefix = smallBuf.Bytes()
opts.PrefetchValues = true
opts.PrefetchSize = 1
it := txn.NewIterator(opts)
defer it.Close()
it.Rewind()
if !it.Valid() {
return nil, nil // Not found
}
// Found in sev table - extract inline data
key := it.Item().Key()
// Key format: sev|serial|size_uint16|event_data
if len(key) <= 8+2 { // prefix(3) + serial(5) + size(2) = 10 bytes minimum
return nil, nil
}
sizeIdx := 8 // After sev(3) + serial(5)
// Read uint16 big-endian size
size := int(key[sizeIdx])<<8 | int(key[sizeIdx+1])
dataStart := sizeIdx + 2
if len(key) < dataStart+size {
return nil, nil
}
eventData := key[dataStart : dataStart+size]
// Check if this is compact format (starts with version byte 1)
if len(eventData) > 0 && eventData[0] == CompactFormatVersion {
// This is compact format stored in sev - need to decode with resolver
resolver := NewDatabaseSerialResolver(d, d.serialCache)
eventId, idErr := d.GetEventIdBySerial(ser)
if idErr != nil {
// Fall back to legacy unmarshal
ev = new(event.E)
if err = ev.UnmarshalBinary(bytes.NewBuffer(eventData)); err != nil {
return nil, err
}
return ev, nil
}
return UnmarshalCompactEvent(eventData, eventId, resolver)
}
// Legacy binary format
ev = new(event.E)
if err = ev.UnmarshalBinary(bytes.NewBuffer(eventData)); err != nil {
return nil, err
}
return ev, nil
}
// fetchLegacyEvent tries to fetch an event from the legacy format (evt prefix).
func (d *D) fetchLegacyEvent(txn *badger.Txn, ser *types.Uint40) (ev *event.E, err error) {
buf := new(bytes.Buffer)
if err = indexes.EventEnc(ser).MarshalWrite(buf); chk.E(err) {
return nil, err
}
item, err := txn.Get(buf.Bytes())
if err != nil {
return nil, err
}
var v []byte
if v, err = item.ValueCopy(nil); chk.E(err) {
return nil, err
}
// Check if we have valid data before attempting to unmarshal
if len(v) < 32+32+1+2+1+1+64 { // ID + Pubkey + min varint fields + Sig
return nil, nil
}
// Check if this is compact format (starts with version byte 1)
if len(v) > 0 && v[0] == CompactFormatVersion {
// This is compact format stored in evt - need to decode with resolver
resolver := NewDatabaseSerialResolver(d, d.serialCache)
eventId, idErr := d.GetEventIdBySerial(ser)
if idErr != nil {
// Fall back to legacy unmarshal
ev = new(event.E)
if err = ev.UnmarshalBinary(bytes.NewBuffer(v)); err != nil {
return nil, err
}
return ev, nil
}
return UnmarshalCompactEvent(v, eventId, resolver)
}
// Legacy binary format
ev = new(event.E)
if err = ev.UnmarshalBinary(bytes.NewBuffer(v)); err != nil {
return nil, err
}
return ev, nil
}

View File

@@ -81,6 +81,10 @@ const (
SerialPubkeyPrefix = I("spk") // pubkey serial -> pubkey hash (full 32 bytes)
EventPubkeyGraphPrefix = I("epg") // event serial -> pubkey serial (graph edges)
PubkeyEventGraphPrefix = I("peg") // pubkey serial -> event serial (reverse edges)
// Compact event storage indexes
SerialEventIdPrefix = I("sei") // event serial -> full 32-byte event ID
CompactEventPrefix = I("cmp") // compact event storage with serial references
)
// Prefix returns the three byte human-readable prefixes that go in front of
@@ -133,6 +137,11 @@ func Prefix(prf int) (i I) {
return EventPubkeyGraphPrefix
case PubkeyEventGraph:
return PubkeyEventGraphPrefix
case SerialEventId:
return SerialEventIdPrefix
case CompactEvent:
return CompactEventPrefix
}
return
}
@@ -191,6 +200,11 @@ func Identify(r io.Reader) (i int, err error) {
i = EventPubkeyGraph
case PubkeyEventGraphPrefix:
i = PubkeyEventGraph
case SerialEventIdPrefix:
i = SerialEventId
case CompactEventPrefix:
i = CompactEvent
}
return
}
@@ -608,3 +622,36 @@ func PubkeyEventGraphEnc(pubkeySer *types.Uint40, kind *types.Uint16, direction
func PubkeyEventGraphDec(pubkeySer *types.Uint40, kind *types.Uint16, direction *types.Letter, eventSer *types.Uint40) (enc *T) {
return New(NewPrefix(), pubkeySer, kind, direction, eventSer)
}
// SerialEventId maps an event serial to its full 32-byte event ID.
// This enables reconstruction of the original event ID from compact storage.
// The event ID is stored as the value (32 bytes), not inline in the key.
//
// 3 prefix|5 serial -> 32 byte event ID value
var SerialEventId = next()
func SerialEventIdVars() (ser *types.Uint40) {
return new(types.Uint40)
}
func SerialEventIdEnc(ser *types.Uint40) (enc *T) {
return New(NewPrefix(SerialEventId), ser)
}
func SerialEventIdDec(ser *types.Uint40) (enc *T) {
return New(NewPrefix(), ser)
}
// CompactEvent stores events using serial references instead of full IDs/pubkeys.
// This dramatically reduces storage size by replacing:
// - 32-byte event ID with 5-byte serial
// - 32-byte author pubkey with 5-byte pubkey serial
// - 32-byte e-tag values with 5-byte event serials (or full ID if unknown)
// - 32-byte p-tag values with 5-byte pubkey serials
//
// Format: cmp|5 serial|compact event data (variable length)
var CompactEvent = next()
func CompactEventVars() (ser *types.Uint40) { return new(types.Uint40) }
func CompactEventEnc(ser *types.Uint40) (enc *T) {
return New(NewPrefix(CompactEvent), ser)
}
func CompactEventDec(ser *types.Uint40) (enc *T) { return New(NewPrefix(), ser) }

View File

@@ -399,9 +399,11 @@ func TestInlineStorageMigration(t *testing.T) {
i, fetchedEvent.Content, ev.Content)
}
// Verify it's now using inline storage
sevKeyExists := false
// Verify it's now using optimized storage (sev inline OR cmp compact format)
// The migration may convert to sev (version 4) or cmp (version 6) depending on migration order
optimizedStorageExists := false
db.View(func(txn *badger.Txn) error {
// Check for sev (small event inline) format
smallBuf := new(bytes.Buffer)
indexes.SmallEventEnc(serial).MarshalWrite(smallBuf)
@@ -412,15 +414,25 @@ func TestInlineStorageMigration(t *testing.T) {
it.Rewind()
if it.Valid() {
sevKeyExists = true
t.Logf("Event %d (%s) successfully migrated to inline storage",
optimizedStorageExists = true
t.Logf("Event %d (%s) successfully migrated to inline (sev) storage",
i, hex.Enc(ev.ID[:8]))
return nil
}
// Check for cmp (compact format) storage
cmpBuf := new(bytes.Buffer)
indexes.CompactEventEnc(serial).MarshalWrite(cmpBuf)
if _, err := txn.Get(cmpBuf.Bytes()); err == nil {
optimizedStorageExists = true
t.Logf("Event %d (%s) successfully migrated to compact (cmp) storage",
i, hex.Enc(ev.ID[:8]))
}
return nil
})
if !sevKeyExists {
t.Errorf("Event %d was not migrated to inline storage", i)
if !optimizedStorageExists {
t.Errorf("Event %d was not migrated to optimized storage (sev or cmp)", i)
}
}
}

View File

@@ -18,7 +18,7 @@ import (
)
const (
currentVersion uint32 = 5
currentVersion uint32 = 6
)
func (d *D) RunMigrations() {
@@ -99,6 +99,14 @@ func (d *D) RunMigrations() {
// bump to version 5
_ = d.writeVersionTag(5)
}
if dbVersion < 6 {
log.I.F("migrating to version 6...")
// convert events to compact serial-reference format
// This replaces 32-byte IDs/pubkeys with 5-byte serial references
d.ConvertToCompactEventFormat()
// bump to version 6
_ = d.writeVersionTag(6)
}
}
// writeVersionTag writes a new version tag key to the database (no value)
@@ -683,3 +691,337 @@ func (d *D) ReencodeEventsWithOptimizedTags() {
log.I.F("migration complete: re-encoded %d events, saved approximately %d bytes (%.2f KB)",
processedCount, savedBytes, float64(savedBytes)/1024.0)
}
// ConvertToCompactEventFormat migrates all existing events to the new compact format.
// This format uses 5-byte serial references instead of 32-byte IDs/pubkeys,
// dramatically reducing storage requirements (up to 80% savings on ID/pubkey data).
//
// The migration:
// 1. Reads each event from legacy storage (evt/sev prefixes)
// 2. Creates SerialEventId mapping (sei prefix) for event ID lookup
// 3. Re-encodes the event in compact format
// 4. Stores in cmp prefix
// 5. Optionally removes legacy storage after successful migration
func (d *D) ConvertToCompactEventFormat() {
log.I.F("converting events to compact serial-reference format...")
var err error
type EventMigration struct {
Serial uint64
EventId []byte
OldData []byte
OldKey []byte
IsInline bool // true if from sev, false if from evt
}
var migrations []EventMigration
var processedCount int
var savedBytes int64
// Create resolver for compact encoding
resolver := NewDatabaseSerialResolver(d, d.serialCache)
// First pass: collect all events that need migration
// Only process events that don't have a cmp entry yet
if err = d.View(func(txn *badger.Txn) error {
// Process evt (large events) table
evtPrf := new(bytes.Buffer)
if err = indexes.EventEnc(nil).MarshalWrite(evtPrf); chk.E(err) {
return err
}
it := txn.NewIterator(badger.IteratorOptions{Prefix: evtPrf.Bytes()})
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
key := item.KeyCopy(nil)
// Extract serial from key
ser := indexes.EventVars()
if err = indexes.EventDec(ser).UnmarshalRead(bytes.NewBuffer(key)); chk.E(err) {
continue
}
// Check if this event already has a cmp entry
cmpKey := new(bytes.Buffer)
if err = indexes.CompactEventEnc(ser).MarshalWrite(cmpKey); err == nil {
if _, getErr := txn.Get(cmpKey.Bytes()); getErr == nil {
// Already migrated
continue
}
}
var val []byte
if val, err = item.ValueCopy(nil); chk.E(err) {
continue
}
// Skip if this is already compact format
if len(val) > 0 && val[0] == CompactFormatVersion {
continue
}
// Decode the event to get the ID
ev := new(event.E)
if err = ev.UnmarshalBinary(bytes.NewBuffer(val)); chk.E(err) {
continue
}
migrations = append(migrations, EventMigration{
Serial: ser.Get(),
EventId: ev.ID,
OldData: val,
OldKey: key,
IsInline: false,
})
}
it.Close()
// Process sev (small inline events) table
sevPrf := new(bytes.Buffer)
if err = indexes.SmallEventEnc(nil).MarshalWrite(sevPrf); chk.E(err) {
return err
}
it2 := txn.NewIterator(badger.IteratorOptions{Prefix: sevPrf.Bytes()})
defer it2.Close()
for it2.Rewind(); it2.Valid(); it2.Next() {
item := it2.Item()
key := item.KeyCopy(nil)
// Extract serial and data from inline key
if len(key) <= 8+2 {
continue
}
// Extract serial
ser := new(types.Uint40)
if err = ser.UnmarshalRead(bytes.NewReader(key[3:8])); chk.E(err) {
continue
}
// Check if this event already has a cmp entry
cmpKey := new(bytes.Buffer)
if err = indexes.CompactEventEnc(ser).MarshalWrite(cmpKey); err == nil {
if _, getErr := txn.Get(cmpKey.Bytes()); getErr == nil {
// Already migrated
continue
}
}
// Extract size and data
sizeIdx := 8
size := int(key[sizeIdx])<<8 | int(key[sizeIdx+1])
dataStart := sizeIdx + 2
if len(key) < dataStart+size {
continue
}
eventData := key[dataStart : dataStart+size]
// Skip if this is already compact format
if len(eventData) > 0 && eventData[0] == CompactFormatVersion {
continue
}
// Decode the event to get the ID
ev := new(event.E)
if err = ev.UnmarshalBinary(bytes.NewBuffer(eventData)); chk.E(err) {
continue
}
migrations = append(migrations, EventMigration{
Serial: ser.Get(),
EventId: ev.ID,
OldData: eventData,
OldKey: key,
IsInline: true,
})
}
return nil
}); chk.E(err) {
return
}
log.I.F("found %d events to convert to compact format", len(migrations))
if len(migrations) == 0 {
log.I.F("no events need conversion")
return
}
// Second pass: convert in batches
const batchSize = 500
for i := 0; i < len(migrations); i += batchSize {
end := i + batchSize
if end > len(migrations) {
end = len(migrations)
}
batch := migrations[i:end]
if err = d.Update(func(txn *badger.Txn) error {
for _, m := range batch {
// Decode the legacy event
ev := new(event.E)
if err = ev.UnmarshalBinary(bytes.NewBuffer(m.OldData)); chk.E(err) {
log.W.F("migration: failed to decode event serial %d: %v", m.Serial, err)
continue
}
// Store SerialEventId mapping
if err = d.StoreEventIdSerial(txn, m.Serial, m.EventId); chk.E(err) {
log.W.F("migration: failed to store event ID mapping for serial %d: %v", m.Serial, err)
continue
}
// Encode in compact format
compactData, encErr := MarshalCompactEvent(ev, resolver)
if encErr != nil {
log.W.F("migration: failed to encode compact event for serial %d: %v", m.Serial, encErr)
continue
}
// Store compact event
ser := new(types.Uint40)
if err = ser.Set(m.Serial); chk.E(err) {
continue
}
cmpKey := new(bytes.Buffer)
if err = indexes.CompactEventEnc(ser).MarshalWrite(cmpKey); chk.E(err) {
continue
}
if err = txn.Set(cmpKey.Bytes(), compactData); chk.E(err) {
log.W.F("migration: failed to store compact event for serial %d: %v", m.Serial, err)
continue
}
// Track savings
savedBytes += int64(len(m.OldData) - len(compactData))
processedCount++
// Cache the mappings
d.serialCache.CacheEventId(m.Serial, m.EventId)
}
return nil
}); chk.E(err) {
log.W.F("batch migration failed: %v", err)
continue
}
if (i/batchSize)%10 == 0 && i > 0 {
log.I.F("migration progress: %d/%d events converted", i, len(migrations))
}
}
log.I.F("compact format migration complete: converted %d events, saved approximately %d bytes (%.2f MB)",
processedCount, savedBytes, float64(savedBytes)/(1024.0*1024.0))
// Cleanup legacy storage after successful migration
log.I.F("cleaning up legacy event storage (evt/sev prefixes)...")
d.CleanupLegacyEventStorage()
}
// CleanupLegacyEventStorage removes legacy evt and sev storage entries after
// compact format migration. This reclaims disk space by removing the old storage
// format entries once all events have been successfully migrated to cmp format.
//
// The cleanup:
// 1. Iterates through all cmp entries (compact format)
// 2. For each serial found in cmp, deletes corresponding evt and sev entries
// 3. Reports total bytes reclaimed
func (d *D) CleanupLegacyEventStorage() {
var err error
var cleanedEvt, cleanedSev int
var bytesReclaimed int64
// Collect serials from cmp table
var serialsToClean []uint64
if err = d.View(func(txn *badger.Txn) error {
cmpPrf := new(bytes.Buffer)
if err = indexes.CompactEventEnc(nil).MarshalWrite(cmpPrf); chk.E(err) {
return err
}
it := txn.NewIterator(badger.IteratorOptions{Prefix: cmpPrf.Bytes()})
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
key := it.Item().Key()
// Extract serial from key (prefix 3 bytes + serial 5 bytes)
if len(key) >= 8 {
ser := new(types.Uint40)
if err = ser.UnmarshalRead(bytes.NewReader(key[3:8])); err == nil {
serialsToClean = append(serialsToClean, ser.Get())
}
}
}
return nil
}); chk.E(err) {
log.W.F("failed to collect compact event serials: %v", err)
return
}
log.I.F("found %d compact events to clean up legacy storage for", len(serialsToClean))
// Clean up in batches
const batchSize = 1000
for i := 0; i < len(serialsToClean); i += batchSize {
end := i + batchSize
if end > len(serialsToClean) {
end = len(serialsToClean)
}
batch := serialsToClean[i:end]
if err = d.Update(func(txn *badger.Txn) error {
for _, serial := range batch {
ser := new(types.Uint40)
if err = ser.Set(serial); err != nil {
continue
}
// Try to delete evt entry
evtKeyBuf := new(bytes.Buffer)
if err = indexes.EventEnc(ser).MarshalWrite(evtKeyBuf); err == nil {
item, getErr := txn.Get(evtKeyBuf.Bytes())
if getErr == nil {
// Track size before deleting
bytesReclaimed += int64(item.ValueSize())
if delErr := txn.Delete(evtKeyBuf.Bytes()); delErr == nil {
cleanedEvt++
}
}
}
// Try to delete sev entry (need to iterate with prefix since key includes inline data)
sevKeyBuf := new(bytes.Buffer)
if err = indexes.SmallEventEnc(ser).MarshalWrite(sevKeyBuf); err == nil {
opts := badger.DefaultIteratorOptions
opts.Prefix = sevKeyBuf.Bytes()
it := txn.NewIterator(opts)
it.Rewind()
if it.Valid() {
key := it.Item().KeyCopy(nil)
bytesReclaimed += int64(len(key)) // sev stores data in key
if delErr := txn.Delete(key); delErr == nil {
cleanedSev++
}
}
it.Close()
}
}
return nil
}); chk.E(err) {
log.W.F("batch cleanup failed: %v", err)
continue
}
if (i/batchSize)%10 == 0 && i > 0 {
log.I.F("cleanup progress: %d/%d events processed", i, len(serialsToClean))
}
}
log.I.F("legacy storage cleanup complete: removed %d evt entries, %d sev entries, reclaimed approximately %d bytes (%.2f MB)",
cleanedEvt, cleanedSev, bytesReclaimed, float64(bytesReclaimed)/(1024.0*1024.0))
}

View File

@@ -264,18 +264,30 @@ func (d *D) SaveEvent(c context.Context, ev *event.E) (
// ev.ID, ev.Kind,
// )
// Serialize event once to check size
eventDataBuf := new(bytes.Buffer)
ev.MarshalBinary(eventDataBuf)
eventData := eventDataBuf.Bytes()
// Create serial resolver for compact encoding
resolver := NewDatabaseSerialResolver(d, d.serialCache)
// Determine storage strategy (Reiser4 optimizations)
// Use the threshold from database configuration
// Typical values: 384 (conservative), 512 (recommended), 1024 (aggressive)
smallEventThreshold := d.inlineEventThreshold
isSmallEvent := smallEventThreshold > 0 && len(eventData) <= smallEventThreshold
isReplaceableEvent := kind.IsReplaceable(ev.Kind)
isAddressableEvent := kind.IsParameterizedReplaceable(ev.Kind)
// Serialize event in compact format using serial references
// This dramatically reduces storage by replacing 32-byte IDs/pubkeys with 5-byte serials
compactData, compactErr := MarshalCompactEvent(ev, resolver)
// Calculate legacy size for comparison (for metrics tracking)
// We marshal to get accurate size comparison
legacyBuf := new(bytes.Buffer)
ev.MarshalBinary(legacyBuf)
legacySize := legacyBuf.Len()
if compactErr != nil {
// Fall back to legacy format if compact encoding fails
log.W.F("SaveEvent: compact encoding failed, using legacy format: %v", compactErr)
compactData = legacyBuf.Bytes()
} else {
// Track storage savings
TrackCompactSaving(legacySize, len(compactData))
log.T.F("SaveEvent: compact %d bytes vs legacy %d bytes (saved %d bytes, %.1f%%)",
len(compactData), legacySize, legacySize-len(compactData),
float64(legacySize-len(compactData))/float64(legacySize)*100.0)
}
// Start a transaction to save the event and all its indexes
err = d.Update(
@@ -293,106 +305,25 @@ func (d *D) SaveEvent(c context.Context, ev *event.E) (
}
}
// Write the event using optimized storage strategy
// Determine if we should use inline addressable/replaceable storage
useAddressableInline := false
var dTag *tag.T
if isAddressableEvent && isSmallEvent {
dTag = ev.Tags.GetFirst([]byte("d"))
useAddressableInline = dTag != nil
// Store the SerialEventId mapping (serial -> full 32-byte event ID)
// This is required for reconstructing compact events
if err = d.StoreEventIdSerial(txn, serial, ev.ID); chk.E(err) {
return
}
// All small events get a sev key for serial-based access
if isSmallEvent {
// Small event: store inline with sev prefix
// Format: sev|serial|size_uint16|event_data
keyBuf := new(bytes.Buffer)
if err = indexes.SmallEventEnc(ser).MarshalWrite(keyBuf); chk.E(err) {
return
}
// Append size as uint16 big-endian (2 bytes for size up to 65535)
sizeBytes := []byte{
byte(len(eventData) >> 8), byte(len(eventData)),
}
keyBuf.Write(sizeBytes)
// Append event data
keyBuf.Write(eventData)
// Cache the event ID mapping
d.serialCache.CacheEventId(serial, ev.ID)
if err = txn.Set(keyBuf.Bytes(), nil); chk.E(err) {
return
}
// log.T.F(
// "SaveEvent: stored small event inline (%d bytes)",
// len(eventData),
// )
} else {
// Large event: store separately with evt prefix
keyBuf := new(bytes.Buffer)
if err = indexes.EventEnc(ser).MarshalWrite(keyBuf); chk.E(err) {
return
}
if err = txn.Set(keyBuf.Bytes(), eventData); chk.E(err) {
return
}
// log.T.F(
// "SaveEvent: stored large event separately (%d bytes)",
// len(eventData),
// )
// Store compact event with cmp prefix
// Format: cmp|serial|compact_event_data
// This is the only storage format - legacy evt/sev/aev/rev prefixes
// are handled by migration and no longer written for new events
cmpKeyBuf := new(bytes.Buffer)
if err = indexes.CompactEventEnc(ser).MarshalWrite(cmpKeyBuf); chk.E(err) {
return
}
// Additionally, store replaceable/addressable events with specialized keys for direct access
if useAddressableInline {
// Addressable event: also store with aev|pubkey_hash|kind|dtag_hash|size|data
pubHash := new(types.PubHash)
pubHash.FromPubkey(ev.Pubkey)
kindVal := new(types.Uint16)
kindVal.Set(ev.Kind)
dTagHash := new(types.Ident)
dTagHash.FromIdent(dTag.Value())
keyBuf := new(bytes.Buffer)
if err = indexes.AddressableEventEnc(
pubHash, kindVal, dTagHash,
).MarshalWrite(keyBuf); chk.E(err) {
return
}
// Append size as uint16 big-endian
sizeBytes := []byte{
byte(len(eventData) >> 8), byte(len(eventData)),
}
keyBuf.Write(sizeBytes)
// Append event data
keyBuf.Write(eventData)
if err = txn.Set(keyBuf.Bytes(), nil); chk.E(err) {
return
}
// log.T.F("SaveEvent: also stored addressable event with specialized key")
} else if isReplaceableEvent && isSmallEvent {
// Replaceable event: also store with rev|pubkey_hash|kind|size|data
pubHash := new(types.PubHash)
pubHash.FromPubkey(ev.Pubkey)
kindVal := new(types.Uint16)
kindVal.Set(ev.Kind)
keyBuf := new(bytes.Buffer)
if err = indexes.ReplaceableEventEnc(
pubHash, kindVal,
).MarshalWrite(keyBuf); chk.E(err) {
return
}
// Append size as uint16 big-endian
sizeBytes := []byte{
byte(len(eventData) >> 8), byte(len(eventData)),
}
keyBuf.Write(sizeBytes)
// Append event data
keyBuf.Write(eventData)
if err = txn.Set(keyBuf.Bytes(), nil); chk.E(err) {
return
}
log.T.F("SaveEvent: also stored replaceable event with specialized key")
if err = txn.Set(cmpKeyBuf.Bytes(), compactData); chk.E(err) {
return
}
// Create graph edges between event and all related pubkeys

View File

@@ -0,0 +1,374 @@
//go:build !(js && wasm)
package database
import (
"bytes"
"errors"
"sync"
"github.com/dgraph-io/badger/v4"
"lol.mleku.dev/chk"
"next.orly.dev/pkg/database/indexes"
"next.orly.dev/pkg/database/indexes/types"
)
// SerialCache provides LRU caching for pubkey and event ID serial lookups.
// This is critical for compact event decoding performance since every event
// requires looking up the author pubkey and potentially multiple tag references.
type SerialCache struct {
// Pubkey serial -> full pubkey (for decoding)
pubkeyBySerial map[uint64][]byte
pubkeyBySerialLock sync.RWMutex
// Pubkey hash -> serial (for encoding)
serialByPubkeyHash map[string]uint64
serialByPubkeyHashLock sync.RWMutex
// Event serial -> full event ID (for decoding)
eventIdBySerial map[uint64][]byte
eventIdBySerialLock sync.RWMutex
// Event ID hash -> serial (for encoding)
serialByEventIdHash map[string]uint64
serialByEventIdHashLock sync.RWMutex
// Maximum cache sizes
maxPubkeys int
maxEventIds int
}
// NewSerialCache creates a new serial cache with the specified sizes.
func NewSerialCache(maxPubkeys, maxEventIds int) *SerialCache {
if maxPubkeys <= 0 {
maxPubkeys = 100000 // Default 100k pubkeys (~3.2MB)
}
if maxEventIds <= 0 {
maxEventIds = 500000 // Default 500k event IDs (~16MB)
}
return &SerialCache{
pubkeyBySerial: make(map[uint64][]byte, maxPubkeys),
serialByPubkeyHash: make(map[string]uint64, maxPubkeys),
eventIdBySerial: make(map[uint64][]byte, maxEventIds),
serialByEventIdHash: make(map[string]uint64, maxEventIds),
maxPubkeys: maxPubkeys,
maxEventIds: maxEventIds,
}
}
// CachePubkey adds a pubkey to the cache.
func (c *SerialCache) CachePubkey(serial uint64, pubkey []byte) {
if len(pubkey) != 32 {
return
}
// Cache serial -> pubkey
c.pubkeyBySerialLock.Lock()
if len(c.pubkeyBySerial) >= c.maxPubkeys {
// Simple eviction: clear half the cache
// A proper LRU would be better but this is simpler
count := 0
for k := range c.pubkeyBySerial {
delete(c.pubkeyBySerial, k)
count++
if count >= c.maxPubkeys/2 {
break
}
}
}
pk := make([]byte, 32)
copy(pk, pubkey)
c.pubkeyBySerial[serial] = pk
c.pubkeyBySerialLock.Unlock()
// Cache pubkey hash -> serial
c.serialByPubkeyHashLock.Lock()
if len(c.serialByPubkeyHash) >= c.maxPubkeys {
count := 0
for k := range c.serialByPubkeyHash {
delete(c.serialByPubkeyHash, k)
count++
if count >= c.maxPubkeys/2 {
break
}
}
}
c.serialByPubkeyHash[string(pubkey)] = serial
c.serialByPubkeyHashLock.Unlock()
}
// GetPubkeyBySerial returns the pubkey for a serial from cache.
func (c *SerialCache) GetPubkeyBySerial(serial uint64) (pubkey []byte, found bool) {
c.pubkeyBySerialLock.RLock()
pubkey, found = c.pubkeyBySerial[serial]
c.pubkeyBySerialLock.RUnlock()
return
}
// GetSerialByPubkey returns the serial for a pubkey from cache.
func (c *SerialCache) GetSerialByPubkey(pubkey []byte) (serial uint64, found bool) {
c.serialByPubkeyHashLock.RLock()
serial, found = c.serialByPubkeyHash[string(pubkey)]
c.serialByPubkeyHashLock.RUnlock()
return
}
// CacheEventId adds an event ID to the cache.
func (c *SerialCache) CacheEventId(serial uint64, eventId []byte) {
if len(eventId) != 32 {
return
}
// Cache serial -> event ID
c.eventIdBySerialLock.Lock()
if len(c.eventIdBySerial) >= c.maxEventIds {
count := 0
for k := range c.eventIdBySerial {
delete(c.eventIdBySerial, k)
count++
if count >= c.maxEventIds/2 {
break
}
}
}
eid := make([]byte, 32)
copy(eid, eventId)
c.eventIdBySerial[serial] = eid
c.eventIdBySerialLock.Unlock()
// Cache event ID hash -> serial
c.serialByEventIdHashLock.Lock()
if len(c.serialByEventIdHash) >= c.maxEventIds {
count := 0
for k := range c.serialByEventIdHash {
delete(c.serialByEventIdHash, k)
count++
if count >= c.maxEventIds/2 {
break
}
}
}
c.serialByEventIdHash[string(eventId)] = serial
c.serialByEventIdHashLock.Unlock()
}
// GetEventIdBySerial returns the event ID for a serial from cache.
func (c *SerialCache) GetEventIdBySerial(serial uint64) (eventId []byte, found bool) {
c.eventIdBySerialLock.RLock()
eventId, found = c.eventIdBySerial[serial]
c.eventIdBySerialLock.RUnlock()
return
}
// GetSerialByEventId returns the serial for an event ID from cache.
func (c *SerialCache) GetSerialByEventId(eventId []byte) (serial uint64, found bool) {
c.serialByEventIdHashLock.RLock()
serial, found = c.serialByEventIdHash[string(eventId)]
c.serialByEventIdHashLock.RUnlock()
return
}
// DatabaseSerialResolver implements SerialResolver using the database and cache.
type DatabaseSerialResolver struct {
db *D
cache *SerialCache
}
// NewDatabaseSerialResolver creates a new resolver.
func NewDatabaseSerialResolver(db *D, cache *SerialCache) *DatabaseSerialResolver {
return &DatabaseSerialResolver{db: db, cache: cache}
}
// GetOrCreatePubkeySerial implements SerialResolver.
func (r *DatabaseSerialResolver) GetOrCreatePubkeySerial(pubkey []byte) (serial uint64, err error) {
if len(pubkey) != 32 {
return 0, errors.New("pubkey must be 32 bytes")
}
// Check cache first
if s, found := r.cache.GetSerialByPubkey(pubkey); found {
return s, nil
}
// Use existing function which handles creation
ser, err := r.db.GetOrCreatePubkeySerial(pubkey)
if err != nil {
return 0, err
}
serial = ser.Get()
// Cache it
r.cache.CachePubkey(serial, pubkey)
return serial, nil
}
// GetPubkeyBySerial implements SerialResolver.
func (r *DatabaseSerialResolver) GetPubkeyBySerial(serial uint64) (pubkey []byte, err error) {
// Check cache first
if pk, found := r.cache.GetPubkeyBySerial(serial); found {
return pk, nil
}
// Look up in database
ser := new(types.Uint40)
if err = ser.Set(serial); err != nil {
return nil, err
}
pubkey, err = r.db.GetPubkeyBySerial(ser)
if err != nil {
return nil, err
}
// Cache it
r.cache.CachePubkey(serial, pubkey)
return pubkey, nil
}
// GetEventSerialById implements SerialResolver.
func (r *DatabaseSerialResolver) GetEventSerialById(eventId []byte) (serial uint64, found bool, err error) {
if len(eventId) != 32 {
return 0, false, errors.New("event ID must be 32 bytes")
}
// Check cache first
if s, ok := r.cache.GetSerialByEventId(eventId); ok {
return s, true, nil
}
// Look up in database using existing GetSerialById
ser, err := r.db.GetSerialById(eventId)
if err != nil {
// Not found is not an error - just return found=false
return 0, false, nil
}
serial = ser.Get()
// Cache it
r.cache.CacheEventId(serial, eventId)
return serial, true, nil
}
// GetEventIdBySerial implements SerialResolver.
func (r *DatabaseSerialResolver) GetEventIdBySerial(serial uint64) (eventId []byte, err error) {
// Check cache first
if eid, found := r.cache.GetEventIdBySerial(serial); found {
return eid, nil
}
// Look up in database - use SerialEventId index
ser := new(types.Uint40)
if err = ser.Set(serial); err != nil {
return nil, err
}
eventId, err = r.db.GetEventIdBySerial(ser)
if err != nil {
return nil, err
}
// Cache it
r.cache.CacheEventId(serial, eventId)
return eventId, nil
}
// GetEventIdBySerial looks up an event ID by its serial number.
// Uses the SerialEventId index (sei prefix).
func (d *D) GetEventIdBySerial(ser *types.Uint40) (eventId []byte, err error) {
keyBuf := new(bytes.Buffer)
if err = indexes.SerialEventIdEnc(ser).MarshalWrite(keyBuf); chk.E(err) {
return nil, err
}
err = d.View(func(txn *badger.Txn) error {
item, gerr := txn.Get(keyBuf.Bytes())
if chk.E(gerr) {
return gerr
}
return item.Value(func(val []byte) error {
eventId = make([]byte, len(val))
copy(eventId, val)
return nil
})
})
if err != nil {
return nil, errors.New("event ID not found for serial")
}
return eventId, nil
}
// StoreEventIdSerial stores the mapping from event serial to full event ID.
// This is called during event save to enable later reconstruction.
func (d *D) StoreEventIdSerial(txn *badger.Txn, serial uint64, eventId []byte) error {
if len(eventId) != 32 {
return errors.New("event ID must be 32 bytes")
}
ser := new(types.Uint40)
if err := ser.Set(serial); err != nil {
return err
}
keyBuf := new(bytes.Buffer)
if err := indexes.SerialEventIdEnc(ser).MarshalWrite(keyBuf); chk.E(err) {
return err
}
return txn.Set(keyBuf.Bytes(), eventId)
}
// SerialCacheStats holds statistics about the serial cache.
type SerialCacheStats struct {
PubkeysCached int // Number of pubkeys currently cached
PubkeysMaxSize int // Maximum pubkey cache size
EventIdsCached int // Number of event IDs currently cached
EventIdsMaxSize int // Maximum event ID cache size
PubkeyMemoryBytes int // Estimated memory usage for pubkey cache
EventIdMemoryBytes int // Estimated memory usage for event ID cache
TotalMemoryBytes int // Total estimated memory usage
}
// Stats returns statistics about the serial cache.
func (c *SerialCache) Stats() SerialCacheStats {
c.pubkeyBySerialLock.RLock()
pubkeysCached := len(c.pubkeyBySerial)
c.pubkeyBySerialLock.RUnlock()
c.eventIdBySerialLock.RLock()
eventIdsCached := len(c.eventIdBySerial)
c.eventIdBySerialLock.RUnlock()
// Memory estimation:
// - Each pubkey entry: 8 bytes (uint64 key) + 32 bytes (pubkey value) = 40 bytes
// - Each event ID entry: 8 bytes (uint64 key) + 32 bytes (event ID value) = 40 bytes
// - Map overhead is roughly 2x the entry size for buckets
pubkeyMemory := pubkeysCached * 40 * 2
eventIdMemory := eventIdsCached * 40 * 2
return SerialCacheStats{
PubkeysCached: pubkeysCached,
PubkeysMaxSize: c.maxPubkeys,
EventIdsCached: eventIdsCached,
EventIdsMaxSize: c.maxEventIds,
PubkeyMemoryBytes: pubkeyMemory,
EventIdMemoryBytes: eventIdMemory,
TotalMemoryBytes: pubkeyMemory + eventIdMemory,
}
}
// SerialCacheStats returns statistics about the serial cache.
func (d *D) SerialCacheStats() SerialCacheStats {
if d.serialCache == nil {
return SerialCacheStats{}
}
return d.serialCache.Stats()
}