Files
next.orly.dev/pkg/bbolt/bbolt.go
woikos 9fed1261ad Add BBolt database backend for HDD-optimized archival relays (v0.48.0)
- BBolt B+tree backend with sequential access patterns for spinning disks
- Write batching (5000 events / 128MB / 30s flush) to reduce disk thrashing
- Adjacency list storage for graph data (one key per vertex, not per edge)
- Bloom filter for fast negative edge existence checks (~12MB for 10M edges)
- No query cache (saves RAM, B+tree reads are fast enough on HDD)
- Migration tool: orly migrate --from badger --to bbolt
- Configuration: ORLY_BBOLT_* environment variables

Files modified:
- app/config/config.go: Added BBolt configuration options
- main.go: Added migrate subcommand and BBolt config wiring
- pkg/database/factory.go: Added BBolt factory registration
- pkg/bbolt/*: New BBolt database backend implementation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-06 06:50:58 +01:00

326 lines
8.2 KiB
Go

//go:build !(js && wasm)
package bbolt
import (
"context"
"errors"
"os"
"path/filepath"
"sync"
"time"
bolt "go.etcd.io/bbolt"
"lol.mleku.dev"
"lol.mleku.dev/chk"
"next.orly.dev/pkg/database"
"next.orly.dev/pkg/utils/apputil"
)
// Bucket names - map to existing index prefixes but without the 3-byte prefix in keys
var (
bucketEvt = []byte("evt") // Event storage: serial -> compact event data
bucketEid = []byte("eid") // Event ID index
bucketFpc = []byte("fpc") // Full ID/pubkey index
bucketC = []byte("c--") // Created at index
bucketKc = []byte("kc-") // Kind + created index
bucketPc = []byte("pc-") // Pubkey + created index
bucketKpc = []byte("kpc") // Kind + pubkey + created
bucketTc = []byte("tc-") // Tag + created
bucketTkc = []byte("tkc") // Tag + kind + created
bucketTpc = []byte("tpc") // Tag + pubkey + created
bucketTkp = []byte("tkp") // Tag + kind + pubkey + created
bucketWrd = []byte("wrd") // Word search index
bucketExp = []byte("exp") // Expiration index
bucketPks = []byte("pks") // Pubkey hash -> serial
bucketSpk = []byte("spk") // Serial -> pubkey
bucketSei = []byte("sei") // Serial -> event ID
bucketCmp = []byte("cmp") // Compact event storage
bucketEv = []byte("ev") // Event vertices (adjacency list)
bucketPv = []byte("pv") // Pubkey vertices (adjacency list)
bucketMeta = []byte("_meta") // Markers, version, serial counter, bloom filter
)
// All buckets that need to be created on init
var allBuckets = [][]byte{
bucketEvt, bucketEid, bucketFpc, bucketC, bucketKc, bucketPc, bucketKpc,
bucketTc, bucketTkc, bucketTpc, bucketTkp, bucketWrd, bucketExp,
bucketPks, bucketSpk, bucketSei, bucketCmp, bucketEv, bucketPv, bucketMeta,
}
// B implements the database.Database interface using BBolt as the storage backend.
// Optimized for HDD with write batching and adjacency list graph storage.
type B struct {
ctx context.Context
cancel context.CancelFunc
dataDir string
Logger *Logger
db *bolt.DB
ready chan struct{}
// Write batching
batcher *WriteBatcher
// Serial management
serialMu sync.Mutex
nextSerial uint64
nextPubkeySeq uint64
// Edge bloom filter for fast negative lookups
edgeBloom *EdgeBloomFilter
// Configuration
cfg *BboltConfig
}
// BboltConfig holds bbolt-specific configuration
type BboltConfig struct {
DataDir string
LogLevel string
// Batch settings (tuned for 7200rpm HDD)
BatchMaxEvents int // Max events before flush (default: 5000)
BatchMaxBytes int64 // Max bytes before flush (default: 128MB)
BatchFlushTimeout time.Duration // Max time before flush (default: 30s)
// Bloom filter settings
BloomSizeMB int // Bloom filter size in MB (default: 16)
// BBolt settings
NoSync bool // Disable fsync for performance (DANGEROUS)
InitialMmapSize int // Initial mmap size in bytes
}
// Ensure B implements Database interface at compile time
var _ database.Database = (*B)(nil)
// New creates a new BBolt database instance with default configuration.
func New(
ctx context.Context, cancel context.CancelFunc, dataDir, logLevel string,
) (b *B, err error) {
cfg := &BboltConfig{
DataDir: dataDir,
LogLevel: logLevel,
BatchMaxEvents: 5000,
BatchMaxBytes: 128 * 1024 * 1024, // 128MB
BatchFlushTimeout: 30 * time.Second,
BloomSizeMB: 16,
InitialMmapSize: 8 * 1024 * 1024 * 1024, // 8GB
}
return NewWithConfig(ctx, cancel, cfg)
}
// NewWithConfig creates a new BBolt database instance with full configuration.
func NewWithConfig(
ctx context.Context, cancel context.CancelFunc, cfg *BboltConfig,
) (b *B, err error) {
// Apply defaults
if cfg.BatchMaxEvents <= 0 {
cfg.BatchMaxEvents = 5000
}
if cfg.BatchMaxBytes <= 0 {
cfg.BatchMaxBytes = 128 * 1024 * 1024
}
if cfg.BatchFlushTimeout <= 0 {
cfg.BatchFlushTimeout = 30 * time.Second
}
if cfg.BloomSizeMB <= 0 {
cfg.BloomSizeMB = 16
}
if cfg.InitialMmapSize <= 0 {
cfg.InitialMmapSize = 8 * 1024 * 1024 * 1024
}
b = &B{
ctx: ctx,
cancel: cancel,
dataDir: cfg.DataDir,
Logger: NewLogger(lol.GetLogLevel(cfg.LogLevel), cfg.DataDir),
ready: make(chan struct{}),
cfg: cfg,
}
// Ensure the data directory exists
if err = os.MkdirAll(cfg.DataDir, 0755); chk.E(err) {
return
}
if err = apputil.EnsureDir(filepath.Join(cfg.DataDir, "dummy")); chk.E(err) {
return
}
// Open BBolt database
dbPath := filepath.Join(cfg.DataDir, "orly.db")
opts := &bolt.Options{
Timeout: 10 * time.Second,
NoSync: cfg.NoSync,
InitialMmapSize: cfg.InitialMmapSize,
}
if b.db, err = bolt.Open(dbPath, 0600, opts); chk.E(err) {
return
}
// Create all buckets
if err = b.db.Update(func(tx *bolt.Tx) error {
for _, bucket := range allBuckets {
if _, err := tx.CreateBucketIfNotExists(bucket); err != nil {
return err
}
}
return nil
}); chk.E(err) {
return
}
// Initialize serial counters
if err = b.initSerialCounters(); chk.E(err) {
return
}
// Initialize bloom filter
b.edgeBloom, err = NewEdgeBloomFilter(cfg.BloomSizeMB, b.db)
if chk.E(err) {
return
}
// Initialize write batcher
b.batcher = NewWriteBatcher(b.db, b.edgeBloom, cfg, b.Logger)
// Run migrations
b.RunMigrations()
// Start warmup and mark ready
go b.warmup()
// Start background maintenance
go b.backgroundLoop()
return
}
// Path returns the path where the database files are stored.
func (b *B) Path() string { return b.dataDir }
// Init initializes the database with the given path.
func (b *B) Init(path string) error {
b.dataDir = path
return nil
}
// Sync flushes the database buffers to disk.
func (b *B) Sync() error {
// Flush pending writes
if err := b.batcher.Flush(); err != nil {
return err
}
// Persist bloom filter
if err := b.edgeBloom.Persist(b.db); err != nil {
return err
}
// Persist serial counters
if err := b.persistSerialCounters(); err != nil {
return err
}
// Sync BBolt
return b.db.Sync()
}
// Close releases resources and closes the database.
func (b *B) Close() (err error) {
b.Logger.Infof("bbolt: closing database...")
// Stop accepting new writes and flush pending
if b.batcher != nil {
if err = b.batcher.Shutdown(); chk.E(err) {
// Log but continue cleanup
}
}
// Persist bloom filter
if b.edgeBloom != nil {
if err = b.edgeBloom.Persist(b.db); chk.E(err) {
// Log but continue cleanup
}
}
// Persist serial counters
if err = b.persistSerialCounters(); chk.E(err) {
// Log but continue cleanup
}
// Close BBolt database
if b.db != nil {
if err = b.db.Close(); chk.E(err) {
return
}
}
b.Logger.Infof("bbolt: database closed")
return
}
// Wipe deletes all data in the database.
func (b *B) Wipe() error {
return b.db.Update(func(tx *bolt.Tx) error {
for _, bucket := range allBuckets {
if err := tx.DeleteBucket(bucket); err != nil && !errors.Is(err, bolt.ErrBucketNotFound) {
return err
}
if _, err := tx.CreateBucket(bucket); err != nil {
return err
}
}
// Reset serial counters
b.serialMu.Lock()
b.nextSerial = 1
b.nextPubkeySeq = 1
b.serialMu.Unlock()
// Reset bloom filter
b.edgeBloom.Reset()
return nil
})
}
// SetLogLevel changes the logging level.
func (b *B) SetLogLevel(level string) {
b.Logger.SetLogLevel(lol.GetLogLevel(level))
}
// Ready returns a channel that closes when the database is ready to serve requests.
func (b *B) Ready() <-chan struct{} {
return b.ready
}
// warmup performs database warmup operations and closes the ready channel when complete.
func (b *B) warmup() {
defer close(b.ready)
// Give the database time to settle
time.Sleep(1 * time.Second)
b.Logger.Infof("bbolt: database warmup complete, ready to serve requests")
}
// backgroundLoop runs periodic maintenance tasks.
func (b *B) backgroundLoop() {
expirationTicker := time.NewTicker(10 * time.Minute)
bloomPersistTicker := time.NewTicker(5 * time.Minute)
defer expirationTicker.Stop()
defer bloomPersistTicker.Stop()
for {
select {
case <-expirationTicker.C:
b.DeleteExpired()
case <-bloomPersistTicker.C:
if err := b.edgeBloom.Persist(b.db); chk.E(err) {
b.Logger.Warningf("bbolt: failed to persist bloom filter: %v", err)
}
case <-b.ctx.Done():
b.cancel()
return
}
}
}