implement event table subtyping for small events in value log
Some checks failed
Go / build (push) Has been cancelled
Go / release (push) Has been cancelled

This commit is contained in:
2025-11-14 12:15:52 +00:00
parent 7169a2158f
commit 29e175efb0
11 changed files with 2275 additions and 43 deletions

View File

@@ -12,10 +12,11 @@ import (
"next.orly.dev/pkg/database/indexes/types"
"next.orly.dev/pkg/encoders/event"
"next.orly.dev/pkg/encoders/ints"
"next.orly.dev/pkg/encoders/kind"
)
const (
currentVersion uint32 = 3
currentVersion uint32 = 4
)
func (d *D) RunMigrations() {
@@ -82,6 +83,13 @@ func (d *D) RunMigrations() {
// bump to version 3
_ = d.writeVersionTag(3)
}
if dbVersion < 4 {
log.I.F("migrating to version 4...")
// convert small events to inline storage (Reiser4 optimization)
d.ConvertSmallEventsToInline()
// bump to version 4
_ = d.writeVersionTag(4)
}
}
// writeVersionTag writes a new version tag key to the database (no value)
@@ -323,3 +331,209 @@ func (d *D) CleanupEphemeralEvents() {
log.I.F("cleaned up %d ephemeral events from database", deletedCount)
}
// ConvertSmallEventsToInline migrates small events (<=384 bytes) to inline storage.
// This is a Reiser4-inspired optimization that stores small event data in the key itself,
// avoiding a second database lookup and improving query performance.
// Also handles replaceable and addressable events with specialized storage.
func (d *D) ConvertSmallEventsToInline() {
log.I.F("converting events to optimized inline storage (Reiser4 optimization)...")
var err error
const smallEventThreshold = 384
type EventData struct {
Serial uint64
EventData []byte
OldKey []byte
IsReplaceable bool
IsAddressable bool
Pubkey []byte
Kind uint16
DTag []byte
}
var events []EventData
var convertedCount int
var deletedCount int
// Helper function for counting by predicate
countBy := func(events []EventData, predicate func(EventData) bool) int {
count := 0
for _, e := range events {
if predicate(e) {
count++
}
}
return count
}
// First pass: identify events in evt table that can benefit from inline storage
if err = d.View(
func(txn *badger.Txn) (err error) {
prf := new(bytes.Buffer)
if err = indexes.EventEnc(nil).MarshalWrite(prf); chk.E(err) {
return
}
it := txn.NewIterator(badger.IteratorOptions{Prefix: prf.Bytes()})
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
var val []byte
if val, err = item.ValueCopy(nil); chk.E(err) {
continue
}
// Check if event data is small enough for inline storage
if len(val) <= smallEventThreshold {
// Decode event to check if it's replaceable or addressable
ev := new(event.E)
if err = ev.UnmarshalBinary(bytes.NewBuffer(val)); chk.E(err) {
continue
}
// Extract serial from key
key := item.KeyCopy(nil)
ser := indexes.EventVars()
if err = indexes.EventDec(ser).UnmarshalRead(bytes.NewBuffer(key)); chk.E(err) {
continue
}
eventData := EventData{
Serial: ser.Get(),
EventData: val,
OldKey: key,
IsReplaceable: kind.IsReplaceable(ev.Kind),
IsAddressable: kind.IsParameterizedReplaceable(ev.Kind),
Pubkey: ev.Pubkey,
Kind: ev.Kind,
}
// Extract d-tag for addressable events
if eventData.IsAddressable {
dTag := ev.Tags.GetFirst([]byte("d"))
if dTag != nil {
eventData.DTag = dTag.Value()
}
}
events = append(events, eventData)
}
}
return nil
},
); chk.E(err) {
return
}
log.I.F("found %d events to convert (%d regular, %d replaceable, %d addressable)",
len(events),
countBy(events, func(e EventData) bool { return !e.IsReplaceable && !e.IsAddressable }),
countBy(events, func(e EventData) bool { return e.IsReplaceable }),
countBy(events, func(e EventData) bool { return e.IsAddressable }),
)
// Second pass: convert in batches to avoid large transactions
const batchSize = 1000
for i := 0; i < len(events); i += batchSize {
end := i + batchSize
if end > len(events) {
end = len(events)
}
batch := events[i:end]
// Write new inline keys and delete old keys
if err = d.Update(
func(txn *badger.Txn) (err error) {
for _, e := range batch {
// First, write the sev key for serial-based access (all small events)
sevKeyBuf := new(bytes.Buffer)
ser := new(types.Uint40)
if err = ser.Set(e.Serial); chk.E(err) {
continue
}
if err = indexes.SmallEventEnc(ser).MarshalWrite(sevKeyBuf); chk.E(err) {
continue
}
// Append size as uint16 big-endian (2 bytes)
sizeBytes := []byte{byte(len(e.EventData) >> 8), byte(len(e.EventData))}
sevKeyBuf.Write(sizeBytes)
// Append event data
sevKeyBuf.Write(e.EventData)
// Write sev key (no value needed)
if err = txn.Set(sevKeyBuf.Bytes(), nil); chk.E(err) {
log.W.F("failed to write sev key for serial %d: %v", e.Serial, err)
continue
}
convertedCount++
// Additionally, for replaceable/addressable events, write specialized keys
if e.IsAddressable && len(e.DTag) > 0 {
// Addressable event: aev|pubkey_hash|kind|dtag_hash|size|data
aevKeyBuf := new(bytes.Buffer)
pubHash := new(types.PubHash)
pubHash.FromPubkey(e.Pubkey)
kindVal := new(types.Uint16)
kindVal.Set(e.Kind)
dTagHash := new(types.Ident)
dTagHash.FromIdent(e.DTag)
if err = indexes.AddressableEventEnc(pubHash, kindVal, dTagHash).MarshalWrite(aevKeyBuf); chk.E(err) {
continue
}
// Append size and data
aevKeyBuf.Write(sizeBytes)
aevKeyBuf.Write(e.EventData)
if err = txn.Set(aevKeyBuf.Bytes(), nil); chk.E(err) {
log.W.F("failed to write aev key for serial %d: %v", e.Serial, err)
continue
}
} else if e.IsReplaceable {
// Replaceable event: rev|pubkey_hash|kind|size|data
revKeyBuf := new(bytes.Buffer)
pubHash := new(types.PubHash)
pubHash.FromPubkey(e.Pubkey)
kindVal := new(types.Uint16)
kindVal.Set(e.Kind)
if err = indexes.ReplaceableEventEnc(pubHash, kindVal).MarshalWrite(revKeyBuf); chk.E(err) {
continue
}
// Append size and data
revKeyBuf.Write(sizeBytes)
revKeyBuf.Write(e.EventData)
if err = txn.Set(revKeyBuf.Bytes(), nil); chk.E(err) {
log.W.F("failed to write rev key for serial %d: %v", e.Serial, err)
continue
}
}
// Delete old evt key
if err = txn.Delete(e.OldKey); chk.E(err) {
log.W.F("failed to delete old event key for serial %d: %v", e.Serial, err)
continue
}
deletedCount++
}
return nil
},
); chk.E(err) {
log.W.F("batch update failed: %v", err)
continue
}
if (i/batchSize)%10 == 0 && i > 0 {
log.I.F("progress: %d/%d events converted", i, len(events))
}
}
log.I.F("migration complete: converted %d events to optimized inline storage, deleted %d old keys", convertedCount, deletedCount)
}