Files
next.orly.dev/pkg/bbolt/bloom.go
woikos 9fed1261ad Add BBolt database backend for HDD-optimized archival relays (v0.48.0)
- BBolt B+tree backend with sequential access patterns for spinning disks
- Write batching (5000 events / 128MB / 30s flush) to reduce disk thrashing
- Adjacency list storage for graph data (one key per vertex, not per edge)
- Bloom filter for fast negative edge existence checks (~12MB for 10M edges)
- No query cache (saves RAM, B+tree reads are fast enough on HDD)
- Migration tool: orly migrate --from badger --to bbolt
- Configuration: ORLY_BBOLT_* environment variables

Files modified:
- app/config/config.go: Added BBolt configuration options
- main.go: Added migrate subcommand and BBolt config wiring
- pkg/database/factory.go: Added BBolt factory registration
- pkg/bbolt/*: New BBolt database backend implementation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-06 06:50:58 +01:00

193 lines
4.7 KiB
Go

//go:build !(js && wasm)
package bbolt
import (
"bytes"
"encoding/binary"
"sync"
"github.com/bits-and-blooms/bloom/v3"
bolt "go.etcd.io/bbolt"
"lol.mleku.dev/chk"
)
const bloomFilterKey = "edge_bloom_filter"
// EdgeBloomFilter provides fast negative lookups for edge existence checks.
// Uses a bloom filter to avoid disk seeks when checking if an edge exists.
type EdgeBloomFilter struct {
mu sync.RWMutex
filter *bloom.BloomFilter
// Track if filter has been modified since last persist
dirty bool
}
// NewEdgeBloomFilter creates or loads the edge bloom filter.
// sizeMB is the approximate size in megabytes.
// With 1% false positive rate, 16MB can hold ~10 million edges.
func NewEdgeBloomFilter(sizeMB int, db *bolt.DB) (*EdgeBloomFilter, error) {
ebf := &EdgeBloomFilter{}
// Try to load from database
var loaded bool
err := db.View(func(tx *bolt.Tx) error {
bucket := tx.Bucket(bucketMeta)
if bucket == nil {
return nil
}
data := bucket.Get([]byte(bloomFilterKey))
if data == nil {
return nil
}
// Deserialize bloom filter
reader := bytes.NewReader(data)
filter := &bloom.BloomFilter{}
if _, err := filter.ReadFrom(reader); err != nil {
return err
}
ebf.filter = filter
loaded = true
return nil
})
if chk.E(err) {
return nil, err
}
if !loaded {
// Create new filter
// Calculate parameters: m bits, k hash functions
// For 1% false positive rate: m/n ≈ 9.6, k ≈ 7
bitsPerMB := 8 * 1024 * 1024
totalBits := uint(sizeMB * bitsPerMB)
// Estimate capacity based on 10 bits per element for 1% FPR
estimatedCapacity := uint(totalBits / 10)
ebf.filter = bloom.NewWithEstimates(estimatedCapacity, 0.01)
}
return ebf, nil
}
// Add adds an edge to the bloom filter.
// An edge is represented by source and destination serials plus edge type.
func (ebf *EdgeBloomFilter) Add(srcSerial, dstSerial uint64, edgeType byte) {
ebf.mu.Lock()
defer ebf.mu.Unlock()
key := ebf.makeKey(srcSerial, dstSerial, edgeType)
ebf.filter.Add(key)
ebf.dirty = true
}
// AddBatch adds multiple edges to the bloom filter.
func (ebf *EdgeBloomFilter) AddBatch(edges []EdgeKey) {
ebf.mu.Lock()
defer ebf.mu.Unlock()
for _, edge := range edges {
key := ebf.makeKey(edge.SrcSerial, edge.DstSerial, edge.EdgeType)
ebf.filter.Add(key)
}
ebf.dirty = true
}
// MayExist checks if an edge might exist.
// Returns false if definitely doesn't exist (no disk access needed).
// Returns true if might exist (need to check disk to confirm).
func (ebf *EdgeBloomFilter) MayExist(srcSerial, dstSerial uint64, edgeType byte) bool {
ebf.mu.RLock()
defer ebf.mu.RUnlock()
key := ebf.makeKey(srcSerial, dstSerial, edgeType)
return ebf.filter.Test(key)
}
// Persist saves the bloom filter to the database.
func (ebf *EdgeBloomFilter) Persist(db *bolt.DB) error {
ebf.mu.Lock()
if !ebf.dirty {
ebf.mu.Unlock()
return nil
}
// Serialize while holding lock
var buf bytes.Buffer
if _, err := ebf.filter.WriteTo(&buf); err != nil {
ebf.mu.Unlock()
return err
}
data := buf.Bytes()
ebf.dirty = false
ebf.mu.Unlock()
// Write to database
return db.Update(func(tx *bolt.Tx) error {
bucket := tx.Bucket(bucketMeta)
if bucket == nil {
return nil
}
return bucket.Put([]byte(bloomFilterKey), data)
})
}
// Reset clears the bloom filter.
func (ebf *EdgeBloomFilter) Reset() {
ebf.mu.Lock()
defer ebf.mu.Unlock()
ebf.filter.ClearAll()
ebf.dirty = true
}
// makeKey creates a unique key for an edge.
func (ebf *EdgeBloomFilter) makeKey(srcSerial, dstSerial uint64, edgeType byte) []byte {
key := make([]byte, 17) // 8 + 8 + 1
binary.BigEndian.PutUint64(key[0:8], srcSerial)
binary.BigEndian.PutUint64(key[8:16], dstSerial)
key[16] = edgeType
return key
}
// Stats returns bloom filter statistics.
func (ebf *EdgeBloomFilter) Stats() BloomStats {
ebf.mu.RLock()
defer ebf.mu.RUnlock()
approxCount := uint64(ebf.filter.ApproximatedSize())
cap := ebf.filter.Cap()
return BloomStats{
ApproxCount: approxCount,
Cap: cap,
}
}
// BloomStats contains bloom filter statistics.
type BloomStats struct {
ApproxCount uint64 // Approximate number of elements
Cap uint // Capacity in bits
}
// EdgeKey represents an edge for batch operations.
type EdgeKey struct {
SrcSerial uint64
DstSerial uint64
EdgeType byte
}
// Edge type constants
const (
EdgeTypeAuthor byte = 0 // Event author relationship
EdgeTypePTag byte = 1 // P-tag reference (event mentions pubkey)
EdgeTypeETag byte = 2 // E-tag reference (event references event)
EdgeTypeFollows byte = 3 // Kind 3 follows relationship
EdgeTypeReaction byte = 4 // Kind 7 reaction
EdgeTypeRepost byte = 5 // Kind 6 repost
EdgeTypeReply byte = 6 // Reply (kind 1 with e-tag)
)