Add LRU cache for serial lookups with dynamic scaling (v0.36.17)
Some checks failed
Go / build-and-release (push) Has been cancelled

- Add generic LRUCache[K, V] implementation using container/list for O(1) ops
- Replace random 50% eviction with proper LRU eviction in SerialCache
- Cache now starts empty and grows on demand up to configured limits
- Use [32]byte keys instead of string([]byte) to avoid allocation overhead
- Single-entry eviction at capacity instead of 50% bulk clearing
- Add comprehensive unit tests and benchmarks for LRUCache
- Benchmarks show ~32-34 ns/op with 0 allocations for Get/Put

Files modified:
- pkg/database/lrucache.go: New generic LRU cache implementation
- pkg/database/lrucache_test.go: Unit tests and benchmarks
- pkg/database/serial_cache.go: Refactored to use LRUCache

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-25 06:25:21 +01:00
parent eddd05eabf
commit 77d153a9c7
4 changed files with 458 additions and 124 deletions

119
pkg/database/lrucache.go Normal file
View File

@@ -0,0 +1,119 @@
//go:build !(js && wasm)
package database
import (
"container/list"
"sync"
)
// LRUCache provides a thread-safe LRU cache with configurable max size.
// It starts empty and grows on demand up to maxSize. When at capacity,
// the least recently used entry is evicted to make room for new entries.
type LRUCache[K comparable, V any] struct {
mu sync.Mutex
items map[K]*list.Element
order *list.List // Front = most recent, Back = least recent
maxSize int
}
// lruEntry holds a key-value pair for the LRU list.
type lruEntry[K comparable, V any] struct {
key K
value V
}
// NewLRUCache creates a new LRU cache with the given maximum size.
// The cache starts empty and grows on demand.
func NewLRUCache[K comparable, V any](maxSize int) *LRUCache[K, V] {
if maxSize <= 0 {
maxSize = 1000 // Default minimum
}
return &LRUCache[K, V]{
items: make(map[K]*list.Element),
order: list.New(),
maxSize: maxSize,
}
}
// Get retrieves a value by key and marks it as recently used.
// Returns the value and true if found, zero value and false otherwise.
func (c *LRUCache[K, V]) Get(key K) (value V, found bool) {
c.mu.Lock()
defer c.mu.Unlock()
if elem, ok := c.items[key]; ok {
c.order.MoveToFront(elem)
entry := elem.Value.(*lruEntry[K, V])
return entry.value, true
}
var zero V
return zero, false
}
// Put adds or updates a value, evicting the LRU entry if at capacity.
func (c *LRUCache[K, V]) Put(key K, value V) {
c.mu.Lock()
defer c.mu.Unlock()
// Update existing entry
if elem, ok := c.items[key]; ok {
c.order.MoveToFront(elem)
elem.Value.(*lruEntry[K, V]).value = value
return
}
// Evict LRU if at capacity
if len(c.items) >= c.maxSize {
oldest := c.order.Back()
if oldest != nil {
entry := oldest.Value.(*lruEntry[K, V])
delete(c.items, entry.key)
c.order.Remove(oldest)
}
}
// Add new entry
entry := &lruEntry[K, V]{key: key, value: value}
elem := c.order.PushFront(entry)
c.items[key] = elem
}
// Delete removes an entry from the cache.
func (c *LRUCache[K, V]) Delete(key K) {
c.mu.Lock()
defer c.mu.Unlock()
if elem, ok := c.items[key]; ok {
delete(c.items, key)
c.order.Remove(elem)
}
}
// Len returns the current number of entries in the cache.
func (c *LRUCache[K, V]) Len() int {
c.mu.Lock()
defer c.mu.Unlock()
return len(c.items)
}
// MaxSize returns the maximum capacity of the cache.
func (c *LRUCache[K, V]) MaxSize() int {
return c.maxSize
}
// Clear removes all entries from the cache.
func (c *LRUCache[K, V]) Clear() {
c.mu.Lock()
defer c.mu.Unlock()
c.items = make(map[K]*list.Element)
c.order.Init()
}
// Contains returns true if the key exists in the cache without updating LRU order.
func (c *LRUCache[K, V]) Contains(key K) bool {
c.mu.Lock()
defer c.mu.Unlock()
_, ok := c.items[key]
return ok
}

View File

@@ -0,0 +1,261 @@
//go:build !(js && wasm)
package database
import (
"sync"
"testing"
)
func TestLRUCache_BasicOperations(t *testing.T) {
c := NewLRUCache[string, int](10)
// Test Put and Get
c.Put("a", 1)
c.Put("b", 2)
c.Put("c", 3)
if v, ok := c.Get("a"); !ok || v != 1 {
t.Errorf("Get('a') = %d, %v; want 1, true", v, ok)
}
if v, ok := c.Get("b"); !ok || v != 2 {
t.Errorf("Get('b') = %d, %v; want 2, true", v, ok)
}
if v, ok := c.Get("c"); !ok || v != 3 {
t.Errorf("Get('c') = %d, %v; want 3, true", v, ok)
}
// Test non-existent key
if _, ok := c.Get("d"); ok {
t.Error("Get('d') should return false for non-existent key")
}
// Test Len
if c.Len() != 3 {
t.Errorf("Len() = %d; want 3", c.Len())
}
}
func TestLRUCache_Update(t *testing.T) {
c := NewLRUCache[string, int](10)
c.Put("a", 1)
c.Put("a", 2) // Update
if v, ok := c.Get("a"); !ok || v != 2 {
t.Errorf("Get('a') = %d, %v; want 2, true", v, ok)
}
if c.Len() != 1 {
t.Errorf("Len() = %d; want 1 (update should not add new entry)", c.Len())
}
}
func TestLRUCache_Eviction(t *testing.T) {
c := NewLRUCache[int, string](3)
// Fill cache
c.Put(1, "one")
c.Put(2, "two")
c.Put(3, "three")
// All should be present
if c.Len() != 3 {
t.Errorf("Len() = %d; want 3", c.Len())
}
// Add one more - should evict "1" (oldest)
c.Put(4, "four")
if c.Len() != 3 {
t.Errorf("Len() = %d; want 3 after eviction", c.Len())
}
// "1" should be evicted
if _, ok := c.Get(1); ok {
t.Error("Key 1 should have been evicted")
}
// Others should still be present
if _, ok := c.Get(2); !ok {
t.Error("Key 2 should still be present")
}
if _, ok := c.Get(3); !ok {
t.Error("Key 3 should still be present")
}
if _, ok := c.Get(4); !ok {
t.Error("Key 4 should be present")
}
}
func TestLRUCache_LRUOrder(t *testing.T) {
c := NewLRUCache[int, string](3)
// Fill cache
c.Put(1, "one")
c.Put(2, "two")
c.Put(3, "three")
// Access "1" - makes it most recent
c.Get(1)
// Add "4" - should evict "2" (now oldest)
c.Put(4, "four")
// "1" should still be present (was accessed recently)
if _, ok := c.Get(1); !ok {
t.Error("Key 1 should still be present after being accessed")
}
// "2" should be evicted
if _, ok := c.Get(2); ok {
t.Error("Key 2 should have been evicted (oldest)")
}
}
func TestLRUCache_Delete(t *testing.T) {
c := NewLRUCache[string, int](10)
c.Put("a", 1)
c.Put("b", 2)
c.Delete("a")
if _, ok := c.Get("a"); ok {
t.Error("Key 'a' should be deleted")
}
if c.Len() != 1 {
t.Errorf("Len() = %d; want 1", c.Len())
}
// Delete non-existent key should not panic
c.Delete("nonexistent")
}
func TestLRUCache_Clear(t *testing.T) {
c := NewLRUCache[int, int](10)
for i := 0; i < 5; i++ {
c.Put(i, i*10)
}
c.Clear()
if c.Len() != 0 {
t.Errorf("Len() = %d; want 0 after Clear()", c.Len())
}
// Should be able to add after clear
c.Put(100, 1000)
if v, ok := c.Get(100); !ok || v != 1000 {
t.Errorf("Get(100) = %d, %v; want 1000, true", v, ok)
}
}
func TestLRUCache_Contains(t *testing.T) {
c := NewLRUCache[string, int](10)
c.Put("a", 1)
if !c.Contains("a") {
t.Error("Contains('a') should return true")
}
if c.Contains("b") {
t.Error("Contains('b') should return false")
}
}
func TestLRUCache_ByteArrayKey(t *testing.T) {
// Test with [32]byte keys (like pubkeys/event IDs)
c := NewLRUCache[[32]byte, uint64](100)
var key1, key2 [32]byte
key1[0] = 1
key2[0] = 2
c.Put(key1, 100)
c.Put(key2, 200)
if v, ok := c.Get(key1); !ok || v != 100 {
t.Errorf("Get(key1) = %d, %v; want 100, true", v, ok)
}
if v, ok := c.Get(key2); !ok || v != 200 {
t.Errorf("Get(key2) = %d, %v; want 200, true", v, ok)
}
}
func TestLRUCache_Concurrent(t *testing.T) {
c := NewLRUCache[int, int](1000)
var wg sync.WaitGroup
// Concurrent writes
for i := 0; i < 10; i++ {
wg.Add(1)
go func(base int) {
defer wg.Done()
for j := 0; j < 100; j++ {
c.Put(base*100+j, j)
}
}(i)
}
// Concurrent reads
for i := 0; i < 10; i++ {
wg.Add(1)
go func(base int) {
defer wg.Done()
for j := 0; j < 100; j++ {
c.Get(base*100 + j)
}
}(i)
}
wg.Wait()
// Cache should not exceed max size
if c.Len() > c.MaxSize() {
t.Errorf("Len() = %d exceeds MaxSize() = %d", c.Len(), c.MaxSize())
}
}
func BenchmarkLRUCache_Put(b *testing.B) {
c := NewLRUCache[uint64, []byte](10000)
value := make([]byte, 32)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
c.Put(uint64(i%10000), value)
}
}
func BenchmarkLRUCache_Get(b *testing.B) {
c := NewLRUCache[uint64, []byte](10000)
value := make([]byte, 32)
// Pre-fill cache
for i := 0; i < 10000; i++ {
c.Put(uint64(i), value)
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
c.Get(uint64(i % 10000))
}
}
func BenchmarkLRUCache_PutGet(b *testing.B) {
c := NewLRUCache[uint64, []byte](10000)
value := make([]byte, 32)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
key := uint64(i % 10000)
c.Put(key, value)
c.Get(key)
}
}

View File

@@ -4,7 +4,6 @@ package database
import ( import (
"errors" "errors"
"sync"
"github.com/dgraph-io/badger/v4" "github.com/dgraph-io/badger/v4"
"lol.mleku.dev/chk" "lol.mleku.dev/chk"
@@ -16,156 +15,114 @@ import (
// SerialCache provides LRU caching for pubkey and event ID serial lookups. // SerialCache provides LRU caching for pubkey and event ID serial lookups.
// This is critical for compact event decoding performance since every event // This is critical for compact event decoding performance since every event
// requires looking up the author pubkey and potentially multiple tag references. // requires looking up the author pubkey and potentially multiple tag references.
//
// The cache uses LRU eviction and starts empty, growing on demand up to the
// configured limits. This provides better memory efficiency than pre-allocation
// and better hit rates than random eviction.
type SerialCache struct { type SerialCache struct {
// Pubkey serial -> full pubkey (for decoding) // Pubkey serial -> full pubkey (for decoding)
pubkeyBySerial map[uint64][]byte pubkeyBySerial *LRUCache[uint64, []byte]
pubkeyBySerialLock sync.RWMutex
// Pubkey hash -> serial (for encoding) // Pubkey bytes -> serial (for encoding)
serialByPubkeyHash map[string]uint64 // Uses [32]byte as key since []byte isn't comparable
serialByPubkeyHashLock sync.RWMutex serialByPubkey *LRUCache[[32]byte, uint64]
// Event serial -> full event ID (for decoding) // Event serial -> full event ID (for decoding)
eventIdBySerial map[uint64][]byte eventIdBySerial *LRUCache[uint64, []byte]
eventIdBySerialLock sync.RWMutex
// Event ID hash -> serial (for encoding) // Event ID bytes -> serial (for encoding)
serialByEventIdHash map[string]uint64 serialByEventId *LRUCache[[32]byte, uint64]
serialByEventIdHashLock sync.RWMutex
// Maximum cache sizes // Limits (for stats reporting)
maxPubkeys int maxPubkeys int
maxEventIds int maxEventIds int
} }
// NewSerialCache creates a new serial cache with the specified sizes. // NewSerialCache creates a new serial cache with the specified maximum sizes.
// The cache starts empty and grows on demand up to these limits.
func NewSerialCache(maxPubkeys, maxEventIds int) *SerialCache { func NewSerialCache(maxPubkeys, maxEventIds int) *SerialCache {
if maxPubkeys <= 0 { if maxPubkeys <= 0 {
maxPubkeys = 100000 // Default 100k pubkeys (~3.2MB) maxPubkeys = 100000 // Default 100k pubkeys
} }
if maxEventIds <= 0 { if maxEventIds <= 0 {
maxEventIds = 500000 // Default 500k event IDs (~16MB) maxEventIds = 500000 // Default 500k event IDs
} }
return &SerialCache{ return &SerialCache{
pubkeyBySerial: make(map[uint64][]byte, maxPubkeys), pubkeyBySerial: NewLRUCache[uint64, []byte](maxPubkeys),
serialByPubkeyHash: make(map[string]uint64, maxPubkeys), serialByPubkey: NewLRUCache[[32]byte, uint64](maxPubkeys),
eventIdBySerial: make(map[uint64][]byte, maxEventIds), eventIdBySerial: NewLRUCache[uint64, []byte](maxEventIds),
serialByEventIdHash: make(map[string]uint64, maxEventIds), serialByEventId: NewLRUCache[[32]byte, uint64](maxEventIds),
maxPubkeys: maxPubkeys, maxPubkeys: maxPubkeys,
maxEventIds: maxEventIds, maxEventIds: maxEventIds,
} }
} }
// CachePubkey adds a pubkey to the cache. // CachePubkey adds a pubkey to the cache in both directions.
func (c *SerialCache) CachePubkey(serial uint64, pubkey []byte) { func (c *SerialCache) CachePubkey(serial uint64, pubkey []byte) {
if len(pubkey) != 32 { if len(pubkey) != 32 {
return return
} }
// Cache serial -> pubkey // Copy pubkey to avoid referencing external slice
c.pubkeyBySerialLock.Lock()
if len(c.pubkeyBySerial) >= c.maxPubkeys {
// Simple eviction: clear half the cache
// A proper LRU would be better but this is simpler
count := 0
for k := range c.pubkeyBySerial {
delete(c.pubkeyBySerial, k)
count++
if count >= c.maxPubkeys/2 {
break
}
}
}
pk := make([]byte, 32) pk := make([]byte, 32)
copy(pk, pubkey) copy(pk, pubkey)
c.pubkeyBySerial[serial] = pk
c.pubkeyBySerialLock.Unlock()
// Cache pubkey hash -> serial // Cache serial -> pubkey (for decoding)
c.serialByPubkeyHashLock.Lock() c.pubkeyBySerial.Put(serial, pk)
if len(c.serialByPubkeyHash) >= c.maxPubkeys {
count := 0 // Cache pubkey -> serial (for encoding)
for k := range c.serialByPubkeyHash { var key [32]byte
delete(c.serialByPubkeyHash, k) copy(key[:], pubkey)
count++ c.serialByPubkey.Put(key, serial)
if count >= c.maxPubkeys/2 {
break
}
}
}
c.serialByPubkeyHash[string(pubkey)] = serial
c.serialByPubkeyHashLock.Unlock()
} }
// GetPubkeyBySerial returns the pubkey for a serial from cache. // GetPubkeyBySerial returns the pubkey for a serial from cache.
func (c *SerialCache) GetPubkeyBySerial(serial uint64) (pubkey []byte, found bool) { func (c *SerialCache) GetPubkeyBySerial(serial uint64) (pubkey []byte, found bool) {
c.pubkeyBySerialLock.RLock() return c.pubkeyBySerial.Get(serial)
pubkey, found = c.pubkeyBySerial[serial]
c.pubkeyBySerialLock.RUnlock()
return
} }
// GetSerialByPubkey returns the serial for a pubkey from cache. // GetSerialByPubkey returns the serial for a pubkey from cache.
func (c *SerialCache) GetSerialByPubkey(pubkey []byte) (serial uint64, found bool) { func (c *SerialCache) GetSerialByPubkey(pubkey []byte) (serial uint64, found bool) {
c.serialByPubkeyHashLock.RLock() if len(pubkey) != 32 {
serial, found = c.serialByPubkeyHash[string(pubkey)] return 0, false
c.serialByPubkeyHashLock.RUnlock() }
return var key [32]byte
copy(key[:], pubkey)
return c.serialByPubkey.Get(key)
} }
// CacheEventId adds an event ID to the cache. // CacheEventId adds an event ID to the cache in both directions.
func (c *SerialCache) CacheEventId(serial uint64, eventId []byte) { func (c *SerialCache) CacheEventId(serial uint64, eventId []byte) {
if len(eventId) != 32 { if len(eventId) != 32 {
return return
} }
// Cache serial -> event ID // Copy event ID to avoid referencing external slice
c.eventIdBySerialLock.Lock()
if len(c.eventIdBySerial) >= c.maxEventIds {
count := 0
for k := range c.eventIdBySerial {
delete(c.eventIdBySerial, k)
count++
if count >= c.maxEventIds/2 {
break
}
}
}
eid := make([]byte, 32) eid := make([]byte, 32)
copy(eid, eventId) copy(eid, eventId)
c.eventIdBySerial[serial] = eid
c.eventIdBySerialLock.Unlock()
// Cache event ID hash -> serial // Cache serial -> event ID (for decoding)
c.serialByEventIdHashLock.Lock() c.eventIdBySerial.Put(serial, eid)
if len(c.serialByEventIdHash) >= c.maxEventIds {
count := 0 // Cache event ID -> serial (for encoding)
for k := range c.serialByEventIdHash { var key [32]byte
delete(c.serialByEventIdHash, k) copy(key[:], eventId)
count++ c.serialByEventId.Put(key, serial)
if count >= c.maxEventIds/2 {
break
}
}
}
c.serialByEventIdHash[string(eventId)] = serial
c.serialByEventIdHashLock.Unlock()
} }
// GetEventIdBySerial returns the event ID for a serial from cache. // GetEventIdBySerial returns the event ID for a serial from cache.
func (c *SerialCache) GetEventIdBySerial(serial uint64) (eventId []byte, found bool) { func (c *SerialCache) GetEventIdBySerial(serial uint64) (eventId []byte, found bool) {
c.eventIdBySerialLock.RLock() return c.eventIdBySerial.Get(serial)
eventId, found = c.eventIdBySerial[serial]
c.eventIdBySerialLock.RUnlock()
return
} }
// GetSerialByEventId returns the serial for an event ID from cache. // GetSerialByEventId returns the serial for an event ID from cache.
func (c *SerialCache) GetSerialByEventId(eventId []byte) (serial uint64, found bool) { func (c *SerialCache) GetSerialByEventId(eventId []byte) (serial uint64, found bool) {
c.serialByEventIdHashLock.RLock() if len(eventId) != 32 {
serial, found = c.serialByEventIdHash[string(eventId)] return 0, false
c.serialByEventIdHashLock.RUnlock() }
return var key [32]byte
copy(key[:], eventId)
return c.serialByEventId.Get(key)
} }
// DatabaseSerialResolver implements SerialResolver using the database and cache. // DatabaseSerialResolver implements SerialResolver using the database and cache.
@@ -341,20 +298,17 @@ type SerialCacheStats struct {
// Stats returns statistics about the serial cache. // Stats returns statistics about the serial cache.
func (c *SerialCache) Stats() SerialCacheStats { func (c *SerialCache) Stats() SerialCacheStats {
c.pubkeyBySerialLock.RLock() pubkeysCached := c.pubkeyBySerial.Len()
pubkeysCached := len(c.pubkeyBySerial) eventIdsCached := c.eventIdBySerial.Len()
c.pubkeyBySerialLock.RUnlock()
c.eventIdBySerialLock.RLock()
eventIdsCached := len(c.eventIdBySerial)
c.eventIdBySerialLock.RUnlock()
// Memory estimation: // Memory estimation:
// - Each pubkey entry: 8 bytes (uint64 key) + 32 bytes (pubkey value) = 40 bytes // Each entry has: key + value + list.Element overhead + map entry overhead
// - Each event ID entry: 8 bytes (uint64 key) + 32 bytes (event ID value) = 40 bytes // - Pubkey by serial: 8 (key) + 32 (value) + ~80 (list) + ~16 (map) ≈ 136 bytes
// - Map overhead is roughly 2x the entry size for buckets // - Serial by pubkey: 32 (key) + 8 (value) + ~80 (list) + ~16 (map) ≈ 136 bytes
pubkeyMemory := pubkeysCached * 40 * 2 // Total per pubkey (both directions): ~272 bytes
eventIdMemory := eventIdsCached * 40 * 2 // Similarly for event IDs: ~272 bytes per entry (both directions)
pubkeyMemory := pubkeysCached * 272
eventIdMemory := eventIdsCached * 272
return SerialCacheStats{ return SerialCacheStats{
PubkeysCached: pubkeysCached, PubkeysCached: pubkeysCached,

View File

@@ -1 +1 @@
v0.36.16 v0.36.17