Add LRU cache for serial lookups with dynamic scaling (v0.36.17)

- Add generic LRUCache[K, V] implementation using container/list for O(1) ops - Replace random 50% eviction with proper LRU eviction in SerialCache - Cache now starts empty and grows on demand up to configured limits - Use [32]byte keys instead of string([]byte) to avoid allocation overhead - Single-entry eviction at capacity instead of 50% bulk clearing - Add comprehensive unit tests and benchmarks for LRUCache - Benchmarks show ~32-34 ns/op with 0 allocations for Get/Put Files modified: - pkg/database/lrucache.go: New generic LRU cache implementation - pkg/database/lrucache_test.go: Unit tests and benchmarks - pkg/database/serial_cache.go: Refactored to use LRUCache 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-25 06:25:21 +01:00
parent eddd05eabf
commit 77d153a9c7
4 changed files with 458 additions and 124 deletions
--- a/pkg/database/lrucache.go
+++ b/pkg/database/lrucache.go
@@ -0,0 +1,119 @@
 //go:build !(js && wasm)
 package database
 import (
 	"container/list"
 	"sync"
 )
 // LRUCache provides a thread-safe LRU cache with configurable max size.
 // It starts empty and grows on demand up to maxSize. When at capacity,
 // the least recently used entry is evicted to make room for new entries.
 type LRUCache[K comparable, V any] struct {
 	mu      sync.Mutex
 	items   map[K]*list.Element
 	order   *list.List // Front = most recent, Back = least recent
 	maxSize int
 }
 // lruEntry holds a key-value pair for the LRU list.
 type lruEntry[K comparable, V any] struct {
 	key   K
 	value V
 }
 // NewLRUCache creates a new LRU cache with the given maximum size.
 // The cache starts empty and grows on demand.
 func NewLRUCache[K comparable, V any](maxSize int) *LRUCache[K, V] {
 	if maxSize <= 0 {
 		maxSize = 1000 // Default minimum
 	}
 	return &LRUCache[K, V]{
 		items:   make(map[K]*list.Element),
 		order:   list.New(),
 		maxSize: maxSize,
 	}
 }
 // Get retrieves a value by key and marks it as recently used.
 // Returns the value and true if found, zero value and false otherwise.
 func (c *LRUCache[K, V]) Get(key K) (value V, found bool) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 	if elem, ok := c.items[key]; ok {
 		c.order.MoveToFront(elem)
 		entry := elem.Value.(*lruEntry[K, V])
 		return entry.value, true
 	}
 	var zero V
 	return zero, false
 }
 // Put adds or updates a value, evicting the LRU entry if at capacity.
 func (c *LRUCache[K, V]) Put(key K, value V) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 	// Update existing entry
 	if elem, ok := c.items[key]; ok {
 		c.order.MoveToFront(elem)
 		elem.Value.(*lruEntry[K, V]).value = value
 		return
 	}
 	// Evict LRU if at capacity
 	if len(c.items) >= c.maxSize {
 		oldest := c.order.Back()
 		if oldest != nil {
 			entry := oldest.Value.(*lruEntry[K, V])
 			delete(c.items, entry.key)
 			c.order.Remove(oldest)
 		}
 	}
 	// Add new entry
 	entry := &lruEntry[K, V]{key: key, value: value}
 	elem := c.order.PushFront(entry)
 	c.items[key] = elem
 }
 // Delete removes an entry from the cache.
 func (c *LRUCache[K, V]) Delete(key K) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 	if elem, ok := c.items[key]; ok {
 		delete(c.items, key)
 		c.order.Remove(elem)
 	}
 }
 // Len returns the current number of entries in the cache.
 func (c *LRUCache[K, V]) Len() int {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 	return len(c.items)
 }
 // MaxSize returns the maximum capacity of the cache.
 func (c *LRUCache[K, V]) MaxSize() int {
 	return c.maxSize
 }
 // Clear removes all entries from the cache.
 func (c *LRUCache[K, V]) Clear() {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 	c.items = make(map[K]*list.Element)
 	c.order.Init()
 }
 // Contains returns true if the key exists in the cache without updating LRU order.
 func (c *LRUCache[K, V]) Contains(key K) bool {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 	_, ok := c.items[key]
 	return ok
 }
--- a/pkg/database/lrucache_test.go
+++ b/pkg/database/lrucache_test.go
@@ -0,0 +1,261 @@
 //go:build !(js && wasm)
 package database
 import (
 	"sync"
 	"testing"
 )
 func TestLRUCache_BasicOperations(t *testing.T) {
 	c := NewLRUCache[string, int](10)
 	// Test Put and Get
 	c.Put("a", 1)
 	c.Put("b", 2)
 	c.Put("c", 3)
 	if v, ok := c.Get("a"); !ok || v != 1 {
 		t.Errorf("Get('a') = %d, %v; want 1, true", v, ok)
 	}
 	if v, ok := c.Get("b"); !ok || v != 2 {
 		t.Errorf("Get('b') = %d, %v; want 2, true", v, ok)
 	}
 	if v, ok := c.Get("c"); !ok || v != 3 {
 		t.Errorf("Get('c') = %d, %v; want 3, true", v, ok)
 	}
 	// Test non-existent key
 	if _, ok := c.Get("d"); ok {
 		t.Error("Get('d') should return false for non-existent key")
 	}
 	// Test Len
 	if c.Len() != 3 {
 		t.Errorf("Len() = %d; want 3", c.Len())
 	}
 }
 func TestLRUCache_Update(t *testing.T) {
 	c := NewLRUCache[string, int](10)
 	c.Put("a", 1)
 	c.Put("a", 2) // Update
 	if v, ok := c.Get("a"); !ok || v != 2 {
 		t.Errorf("Get('a') = %d, %v; want 2, true", v, ok)
 	}
 	if c.Len() != 1 {
 		t.Errorf("Len() = %d; want 1 (update should not add new entry)", c.Len())
 	}
 }
 func TestLRUCache_Eviction(t *testing.T) {
 	c := NewLRUCache[int, string](3)
 	// Fill cache
 	c.Put(1, "one")
 	c.Put(2, "two")
 	c.Put(3, "three")
 	// All should be present
 	if c.Len() != 3 {
 		t.Errorf("Len() = %d; want 3", c.Len())
 	}
 	// Add one more - should evict "1" (oldest)
 	c.Put(4, "four")
 	if c.Len() != 3 {
 		t.Errorf("Len() = %d; want 3 after eviction", c.Len())
 	}
 	// "1" should be evicted
 	if _, ok := c.Get(1); ok {
 		t.Error("Key 1 should have been evicted")
 	}
 	// Others should still be present
 	if _, ok := c.Get(2); !ok {
 		t.Error("Key 2 should still be present")
 	}
 	if _, ok := c.Get(3); !ok {
 		t.Error("Key 3 should still be present")
 	}
 	if _, ok := c.Get(4); !ok {
 		t.Error("Key 4 should be present")
 	}
 }
 func TestLRUCache_LRUOrder(t *testing.T) {
 	c := NewLRUCache[int, string](3)
 	// Fill cache
 	c.Put(1, "one")
 	c.Put(2, "two")
 	c.Put(3, "three")
 	// Access "1" - makes it most recent
 	c.Get(1)
 	// Add "4" - should evict "2" (now oldest)
 	c.Put(4, "four")
 	// "1" should still be present (was accessed recently)
 	if _, ok := c.Get(1); !ok {
 		t.Error("Key 1 should still be present after being accessed")
 	}
 	// "2" should be evicted
 	if _, ok := c.Get(2); ok {
 		t.Error("Key 2 should have been evicted (oldest)")
 	}
 }
 func TestLRUCache_Delete(t *testing.T) {
 	c := NewLRUCache[string, int](10)
 	c.Put("a", 1)
 	c.Put("b", 2)
 	c.Delete("a")
 	if _, ok := c.Get("a"); ok {
 		t.Error("Key 'a' should be deleted")
 	}
 	if c.Len() != 1 {
 		t.Errorf("Len() = %d; want 1", c.Len())
 	}
 	// Delete non-existent key should not panic
 	c.Delete("nonexistent")
 }
 func TestLRUCache_Clear(t *testing.T) {
 	c := NewLRUCache[int, int](10)
 	for i := 0; i < 5; i++ {
 		c.Put(i, i*10)
 	}
 	c.Clear()
 	if c.Len() != 0 {
 		t.Errorf("Len() = %d; want 0 after Clear()", c.Len())
 	}
 	// Should be able to add after clear
 	c.Put(100, 1000)
 	if v, ok := c.Get(100); !ok || v != 1000 {
 		t.Errorf("Get(100) = %d, %v; want 1000, true", v, ok)
 	}
 }
 func TestLRUCache_Contains(t *testing.T) {
 	c := NewLRUCache[string, int](10)
 	c.Put("a", 1)
 	if !c.Contains("a") {
 		t.Error("Contains('a') should return true")
 	}
 	if c.Contains("b") {
 		t.Error("Contains('b') should return false")
 	}
 }
 func TestLRUCache_ByteArrayKey(t *testing.T) {
 	// Test with [32]byte keys (like pubkeys/event IDs)
 	c := NewLRUCache[[32]byte, uint64](100)
 	var key1, key2 [32]byte
 	key1[0] = 1
 	key2[0] = 2
 	c.Put(key1, 100)
 	c.Put(key2, 200)
 	if v, ok := c.Get(key1); !ok || v != 100 {
 		t.Errorf("Get(key1) = %d, %v; want 100, true", v, ok)
 	}
 	if v, ok := c.Get(key2); !ok || v != 200 {
 		t.Errorf("Get(key2) = %d, %v; want 200, true", v, ok)
 	}
 }
 func TestLRUCache_Concurrent(t *testing.T) {
 	c := NewLRUCache[int, int](1000)
 	var wg sync.WaitGroup
 	// Concurrent writes
 	for i := 0; i < 10; i++ {
 		wg.Add(1)
 		go func(base int) {
 			defer wg.Done()
 			for j := 0; j < 100; j++ {
 				c.Put(base*100+j, j)
 			}
 		}(i)
 	}
 	// Concurrent reads
 	for i := 0; i < 10; i++ {
 		wg.Add(1)
 		go func(base int) {
 			defer wg.Done()
 			for j := 0; j < 100; j++ {
 				c.Get(base*100 + j)
 			}
 		}(i)
 	}
 	wg.Wait()
 	// Cache should not exceed max size
 	if c.Len() > c.MaxSize() {
 		t.Errorf("Len() = %d exceeds MaxSize() = %d", c.Len(), c.MaxSize())
 	}
 }
 func BenchmarkLRUCache_Put(b *testing.B) {
 	c := NewLRUCache[uint64, []byte](10000)
 	value := make([]byte, 32)
 	b.ReportAllocs()
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		c.Put(uint64(i%10000), value)
 	}
 }
 func BenchmarkLRUCache_Get(b *testing.B) {
 	c := NewLRUCache[uint64, []byte](10000)
 	value := make([]byte, 32)
 	// Pre-fill cache
 	for i := 0; i < 10000; i++ {
 		c.Put(uint64(i), value)
 	}
 	b.ReportAllocs()
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		c.Get(uint64(i % 10000))
 	}
 }
 func BenchmarkLRUCache_PutGet(b *testing.B) {
 	c := NewLRUCache[uint64, []byte](10000)
 	value := make([]byte, 32)
 	b.ReportAllocs()
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		key := uint64(i % 10000)
 		c.Put(key, value)
 		c.Get(key)
 	}
 }
--- a/pkg/database/serial_cache.go
+++ b/pkg/database/serial_cache.go
@@ -4,7 +4,6 @@ package database
 import (
 	"errors"
 	"sync"
 	"github.com/dgraph-io/badger/v4"
 	"lol.mleku.dev/chk"
@@ -16,156 +15,114 @@ import (
 // SerialCache provides LRU caching for pubkey and event ID serial lookups.
 // This is critical for compact event decoding performance since every event
 // requires looking up the author pubkey and potentially multiple tag references.
 //
 // The cache uses LRU eviction and starts empty, growing on demand up to the
 // configured limits. This provides better memory efficiency than pre-allocation
 // and better hit rates than random eviction.
 type SerialCache struct {
 	// Pubkey serial -> full pubkey (for decoding)
-	pubkeyBySerial     map[uint64][]byte
+	pubkeyBySerial *LRUCache[uint64, []byte]
 	pubkeyBySerialLock sync.RWMutex
-	// Pubkey hash -> serial (for encoding)
+	// Pubkey bytes -> serial (for encoding)
-	serialByPubkeyHash     map[string]uint64
+	// Uses [32]byte as key since []byte isn't comparable
-	serialByPubkeyHashLock sync.RWMutex
+	serialByPubkey *LRUCache[[32]byte, uint64]
 	// Event serial -> full event ID (for decoding)
-	eventIdBySerial     map[uint64][]byte
+	eventIdBySerial *LRUCache[uint64, []byte]
 	eventIdBySerialLock sync.RWMutex
-	// Event ID hash -> serial (for encoding)
+	// Event ID bytes -> serial (for encoding)
-	serialByEventIdHash     map[string]uint64
+	serialByEventId *LRUCache[[32]byte, uint64]
 	serialByEventIdHashLock sync.RWMutex
-	// Maximum cache sizes
+	// Limits (for stats reporting)
 	maxPubkeys  int
 	maxEventIds int
 }
-// NewSerialCache creates a new serial cache with the specified sizes.
+// NewSerialCache creates a new serial cache with the specified maximum sizes.
 // The cache starts empty and grows on demand up to these limits.
 func NewSerialCache(maxPubkeys, maxEventIds int) *SerialCache {
 	if maxPubkeys <= 0 {
-		maxPubkeys = 100000 // Default 100k pubkeys (~3.2MB)
+		maxPubkeys = 100000 // Default 100k pubkeys
 	}
 	if maxEventIds <= 0 {
-		maxEventIds = 500000 // Default 500k event IDs (~16MB)
+		maxEventIds = 500000 // Default 500k event IDs
 	}
 	return &SerialCache{
-		pubkeyBySerial:      make(map[uint64][]byte, maxPubkeys),
+		pubkeyBySerial:  NewLRUCache[uint64, []byte](maxPubkeys),
-		serialByPubkeyHash:  make(map[string]uint64, maxPubkeys),
+		serialByPubkey:  NewLRUCache[[32]byte, uint64](maxPubkeys),
-		eventIdBySerial:     make(map[uint64][]byte, maxEventIds),
+		eventIdBySerial: NewLRUCache[uint64, []byte](maxEventIds),
-		serialByEventIdHash: make(map[string]uint64, maxEventIds),
+		serialByEventId: NewLRUCache[[32]byte, uint64](maxEventIds),
 		maxPubkeys:      maxPubkeys,
 		maxEventIds:     maxEventIds,
 	}
 }
-// CachePubkey adds a pubkey to the cache.
+// CachePubkey adds a pubkey to the cache in both directions.
 func (c *SerialCache) CachePubkey(serial uint64, pubkey []byte) {
 	if len(pubkey) != 32 {
 		return
 	}
-	// Cache serial -> pubkey
+	// Copy pubkey to avoid referencing external slice
 	c.pubkeyBySerialLock.Lock()
 	if len(c.pubkeyBySerial) >= c.maxPubkeys {
 		// Simple eviction: clear half the cache
 		// A proper LRU would be better but this is simpler
 		count := 0
 		for k := range c.pubkeyBySerial {
 			delete(c.pubkeyBySerial, k)
 			count++
 			if count >= c.maxPubkeys/2 {
 				break
 			}
 		}
 	}
 	pk := make([]byte, 32)
 	copy(pk, pubkey)
 	c.pubkeyBySerial[serial] = pk
 	c.pubkeyBySerialLock.Unlock()
-	// Cache pubkey hash -> serial
+	// Cache serial -> pubkey (for decoding)
-	c.serialByPubkeyHashLock.Lock()
+	c.pubkeyBySerial.Put(serial, pk)
-	if len(c.serialByPubkeyHash) >= c.maxPubkeys {
+
-		count := 0
+	// Cache pubkey -> serial (for encoding)
-		for k := range c.serialByPubkeyHash {
+	var key [32]byte
-			delete(c.serialByPubkeyHash, k)
+	copy(key[:], pubkey)
-			count++
+	c.serialByPubkey.Put(key, serial)
 			if count >= c.maxPubkeys/2 {
 				break
 			}
 		}
 	}
 	c.serialByPubkeyHash[string(pubkey)] = serial
 	c.serialByPubkeyHashLock.Unlock()
 }
 // GetPubkeyBySerial returns the pubkey for a serial from cache.
 func (c *SerialCache) GetPubkeyBySerial(serial uint64) (pubkey []byte, found bool) {
-	c.pubkeyBySerialLock.RLock()
+	return c.pubkeyBySerial.Get(serial)
 	pubkey, found = c.pubkeyBySerial[serial]
 	c.pubkeyBySerialLock.RUnlock()
 	return
 }
 // GetSerialByPubkey returns the serial for a pubkey from cache.
 func (c *SerialCache) GetSerialByPubkey(pubkey []byte) (serial uint64, found bool) {
-	c.serialByPubkeyHashLock.RLock()
+	if len(pubkey) != 32 {
-	serial, found = c.serialByPubkeyHash[string(pubkey)]
+		return 0, false
-	c.serialByPubkeyHashLock.RUnlock()
+	}
-	return
+	var key [32]byte
 	copy(key[:], pubkey)
 	return c.serialByPubkey.Get(key)
 }
-// CacheEventId adds an event ID to the cache.
+// CacheEventId adds an event ID to the cache in both directions.
 func (c *SerialCache) CacheEventId(serial uint64, eventId []byte) {
 	if len(eventId) != 32 {
 		return
 	}
-	// Cache serial -> event ID
+	// Copy event ID to avoid referencing external slice
 	c.eventIdBySerialLock.Lock()
 	if len(c.eventIdBySerial) >= c.maxEventIds {
 		count := 0
 		for k := range c.eventIdBySerial {
 			delete(c.eventIdBySerial, k)
 			count++
 			if count >= c.maxEventIds/2 {
 				break
 			}
 		}
 	}
 	eid := make([]byte, 32)
 	copy(eid, eventId)
 	c.eventIdBySerial[serial] = eid
 	c.eventIdBySerialLock.Unlock()
-	// Cache event ID hash -> serial
+	// Cache serial -> event ID (for decoding)
-	c.serialByEventIdHashLock.Lock()
+	c.eventIdBySerial.Put(serial, eid)
-	if len(c.serialByEventIdHash) >= c.maxEventIds {
+
-		count := 0
+	// Cache event ID -> serial (for encoding)
-		for k := range c.serialByEventIdHash {
+	var key [32]byte
-			delete(c.serialByEventIdHash, k)
+	copy(key[:], eventId)
-			count++
+	c.serialByEventId.Put(key, serial)
 			if count >= c.maxEventIds/2 {
 				break
 			}
 		}
 	}
 	c.serialByEventIdHash[string(eventId)] = serial
 	c.serialByEventIdHashLock.Unlock()
 }
 // GetEventIdBySerial returns the event ID for a serial from cache.
 func (c *SerialCache) GetEventIdBySerial(serial uint64) (eventId []byte, found bool) {
-	c.eventIdBySerialLock.RLock()
+	return c.eventIdBySerial.Get(serial)
 	eventId, found = c.eventIdBySerial[serial]
 	c.eventIdBySerialLock.RUnlock()
 	return
 }
 // GetSerialByEventId returns the serial for an event ID from cache.
 func (c *SerialCache) GetSerialByEventId(eventId []byte) (serial uint64, found bool) {
-	c.serialByEventIdHashLock.RLock()
+	if len(eventId) != 32 {
-	serial, found = c.serialByEventIdHash[string(eventId)]
+		return 0, false
-	c.serialByEventIdHashLock.RUnlock()
+	}
-	return
+	var key [32]byte
 	copy(key[:], eventId)
 	return c.serialByEventId.Get(key)
 }
 // DatabaseSerialResolver implements SerialResolver using the database and cache.
@@ -341,20 +298,17 @@ type SerialCacheStats struct {
 // Stats returns statistics about the serial cache.
 func (c *SerialCache) Stats() SerialCacheStats {
-	c.pubkeyBySerialLock.RLock()
+	pubkeysCached := c.pubkeyBySerial.Len()
-	pubkeysCached := len(c.pubkeyBySerial)
+	eventIdsCached := c.eventIdBySerial.Len()
 	c.pubkeyBySerialLock.RUnlock()
 	c.eventIdBySerialLock.RLock()
 	eventIdsCached := len(c.eventIdBySerial)
 	c.eventIdBySerialLock.RUnlock()
 	// Memory estimation:
-	// - Each pubkey entry: 8 bytes (uint64 key) + 32 bytes (pubkey value) = 40 bytes
+	// Each entry has: key + value + list.Element overhead + map entry overhead
-	// - Each event ID entry: 8 bytes (uint64 key) + 32 bytes (event ID value) = 40 bytes
+	// - Pubkey by serial: 8 (key) + 32 (value) + ~80 (list) + ~16 (map) ≈ 136 bytes
-	// - Map overhead is roughly 2x the entry size for buckets
+	// - Serial by pubkey: 32 (key) + 8 (value) + ~80 (list) + ~16 (map) ≈ 136 bytes
-	pubkeyMemory := pubkeysCached * 40 * 2
+	// Total per pubkey (both directions): ~272 bytes
-	eventIdMemory := eventIdsCached * 40 * 2
+	// Similarly for event IDs: ~272 bytes per entry (both directions)
 	pubkeyMemory := pubkeysCached * 272
 	eventIdMemory := eventIdsCached * 272
 	return SerialCacheStats{
 		PubkeysCached:      pubkeysCached,
--- a/pkg/version/version
+++ b/pkg/version/version
@@ -1 +1 @@
-v0.36.16
+v0.36.17