Add archive relay query augmentation and access-based GC (v0.45.0)
- Add async archive relay querying (local results immediate, archives in background) - Add query caching with filter normalization to avoid repeated requests - Add session-deduplicated access tracking for events - Add continuous garbage collection based on access patterns - Auto-detect storage limit (80% of filesystem) when ORLY_MAX_STORAGE_BYTES=0 - Support NIP-50 search queries to archive relays New environment variables: - ORLY_ARCHIVE_ENABLED: Enable archive relay query augmentation - ORLY_ARCHIVE_RELAYS: Comma-separated archive relay URLs - ORLY_ARCHIVE_TIMEOUT_SEC: Archive query timeout - ORLY_ARCHIVE_CACHE_TTL_HRS: Query deduplication window - ORLY_GC_ENABLED: Enable access-based garbage collection - ORLY_MAX_STORAGE_BYTES: Max storage (0=auto 80%) - ORLY_GC_INTERVAL_SEC: GC check interval - ORLY_GC_BATCH_SIZE: Events per GC cycle 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
278
pkg/storage/gc.go
Normal file
278
pkg/storage/gc.go
Normal file
@@ -0,0 +1,278 @@
|
||||
//go:build !windows
|
||||
|
||||
package storage
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"lol.mleku.dev/log"
|
||||
|
||||
"next.orly.dev/pkg/database/indexes/types"
|
||||
"git.mleku.dev/mleku/nostr/encoders/event"
|
||||
)
|
||||
|
||||
// GCDatabase defines the interface for database operations needed by the GC.
|
||||
type GCDatabase interface {
|
||||
Path() string
|
||||
FetchEventBySerial(ser *types.Uint40) (ev *event.E, err error)
|
||||
DeleteEventBySerial(ctx context.Context, ser *types.Uint40, ev *event.E) error
|
||||
}
|
||||
|
||||
// GarbageCollector manages continuous event eviction based on access patterns.
|
||||
// It monitors storage usage and evicts the least accessed events when the
|
||||
// storage limit is exceeded.
|
||||
type GarbageCollector struct {
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
db GCDatabase
|
||||
tracker *AccessTracker
|
||||
|
||||
// Configuration
|
||||
dataDir string
|
||||
maxBytes int64 // 0 = auto-calculate
|
||||
interval time.Duration
|
||||
batchSize int
|
||||
minAgeSec int64 // Minimum age before considering for eviction
|
||||
|
||||
// State
|
||||
mu sync.Mutex
|
||||
running bool
|
||||
evictedCount uint64
|
||||
lastRun time.Time
|
||||
}
|
||||
|
||||
// GCConfig holds configuration for the garbage collector.
|
||||
type GCConfig struct {
|
||||
MaxStorageBytes int64 // 0 = auto-calculate (80% of filesystem)
|
||||
Interval time.Duration // How often to check storage
|
||||
BatchSize int // Events to consider per GC run
|
||||
MinAgeSec int64 // Minimum age before eviction (default: 1 hour)
|
||||
}
|
||||
|
||||
// DefaultGCConfig returns a default GC configuration.
|
||||
func DefaultGCConfig() GCConfig {
|
||||
return GCConfig{
|
||||
MaxStorageBytes: 0, // Auto-detect
|
||||
Interval: time.Minute, // Check every minute
|
||||
BatchSize: 1000, // 1000 events per run
|
||||
MinAgeSec: 3600, // 1 hour minimum age
|
||||
}
|
||||
}
|
||||
|
||||
// NewGarbageCollector creates a new garbage collector.
|
||||
func NewGarbageCollector(
|
||||
ctx context.Context,
|
||||
db GCDatabase,
|
||||
tracker *AccessTracker,
|
||||
cfg GCConfig,
|
||||
) *GarbageCollector {
|
||||
gcCtx, cancel := context.WithCancel(ctx)
|
||||
|
||||
if cfg.BatchSize <= 0 {
|
||||
cfg.BatchSize = 1000
|
||||
}
|
||||
if cfg.Interval <= 0 {
|
||||
cfg.Interval = time.Minute
|
||||
}
|
||||
if cfg.MinAgeSec <= 0 {
|
||||
cfg.MinAgeSec = 3600 // 1 hour
|
||||
}
|
||||
|
||||
return &GarbageCollector{
|
||||
ctx: gcCtx,
|
||||
cancel: cancel,
|
||||
db: db,
|
||||
tracker: tracker,
|
||||
dataDir: db.Path(),
|
||||
maxBytes: cfg.MaxStorageBytes,
|
||||
interval: cfg.Interval,
|
||||
batchSize: cfg.BatchSize,
|
||||
minAgeSec: cfg.MinAgeSec,
|
||||
}
|
||||
}
|
||||
|
||||
// Start begins the garbage collection loop.
|
||||
func (gc *GarbageCollector) Start() {
|
||||
gc.mu.Lock()
|
||||
if gc.running {
|
||||
gc.mu.Unlock()
|
||||
return
|
||||
}
|
||||
gc.running = true
|
||||
gc.mu.Unlock()
|
||||
|
||||
go gc.runLoop()
|
||||
log.I.F("garbage collector started (interval: %s, batch: %d)", gc.interval, gc.batchSize)
|
||||
}
|
||||
|
||||
// Stop stops the garbage collector.
|
||||
func (gc *GarbageCollector) Stop() {
|
||||
gc.cancel()
|
||||
gc.mu.Lock()
|
||||
gc.running = false
|
||||
gc.mu.Unlock()
|
||||
log.I.F("garbage collector stopped (total evicted: %d)", atomic.LoadUint64(&gc.evictedCount))
|
||||
}
|
||||
|
||||
// runLoop is the main GC loop.
|
||||
func (gc *GarbageCollector) runLoop() {
|
||||
ticker := time.NewTicker(gc.interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-gc.ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
if err := gc.runCycle(); err != nil {
|
||||
log.W.F("GC cycle error: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// runCycle executes one garbage collection cycle.
|
||||
func (gc *GarbageCollector) runCycle() error {
|
||||
gc.mu.Lock()
|
||||
gc.lastRun = time.Now()
|
||||
gc.mu.Unlock()
|
||||
|
||||
// Check if we need to run GC
|
||||
shouldRun, currentBytes, maxBytes, err := gc.shouldRunGC()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !shouldRun {
|
||||
return nil
|
||||
}
|
||||
|
||||
log.D.F("GC triggered: current=%d MB, max=%d MB (%.1f%%)",
|
||||
currentBytes/(1024*1024),
|
||||
maxBytes/(1024*1024),
|
||||
float64(currentBytes)/float64(maxBytes)*100)
|
||||
|
||||
// Get coldest events
|
||||
serials, err := gc.tracker.GetColdestEvents(gc.batchSize, gc.minAgeSec)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(serials) == 0 {
|
||||
log.D.F("GC: no events eligible for eviction")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Evict events
|
||||
evicted, err := gc.evictEvents(serials)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
atomic.AddUint64(&gc.evictedCount, uint64(evicted))
|
||||
log.I.F("GC: evicted %d events (total: %d)", evicted, atomic.LoadUint64(&gc.evictedCount))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// shouldRunGC checks if storage limit is exceeded.
|
||||
func (gc *GarbageCollector) shouldRunGC() (bool, int64, int64, error) {
|
||||
// Calculate max storage (dynamic based on filesystem)
|
||||
maxBytes, err := CalculateMaxStorage(gc.dataDir, gc.maxBytes)
|
||||
if err != nil {
|
||||
return false, 0, 0, err
|
||||
}
|
||||
|
||||
// Get current usage
|
||||
currentBytes, err := GetCurrentStorageUsage(gc.dataDir)
|
||||
if err != nil {
|
||||
return false, 0, 0, err
|
||||
}
|
||||
|
||||
return currentBytes > maxBytes, currentBytes, maxBytes, nil
|
||||
}
|
||||
|
||||
// evictEvents evicts the specified events from the database.
|
||||
func (gc *GarbageCollector) evictEvents(serials []uint64) (int, error) {
|
||||
evicted := 0
|
||||
|
||||
for _, serial := range serials {
|
||||
// Check context for cancellation
|
||||
select {
|
||||
case <-gc.ctx.Done():
|
||||
return evicted, gc.ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
// Convert serial to Uint40
|
||||
ser := &types.Uint40{}
|
||||
if err := ser.Set(serial); err != nil {
|
||||
log.D.F("GC: invalid serial %d: %v", serial, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Fetch the event
|
||||
ev, err := gc.db.FetchEventBySerial(ser)
|
||||
if err != nil {
|
||||
log.D.F("GC: failed to fetch event %d: %v", serial, err)
|
||||
continue
|
||||
}
|
||||
if ev == nil {
|
||||
continue // Already deleted
|
||||
}
|
||||
|
||||
// Delete the event
|
||||
if err := gc.db.DeleteEventBySerial(gc.ctx, ser, ev); err != nil {
|
||||
log.D.F("GC: failed to delete event %d: %v", serial, err)
|
||||
continue
|
||||
}
|
||||
|
||||
evicted++
|
||||
|
||||
// Rate limit to avoid overwhelming the database
|
||||
if evicted%100 == 0 {
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
return evicted, nil
|
||||
}
|
||||
|
||||
// Stats returns current GC statistics.
|
||||
func (gc *GarbageCollector) Stats() GCStats {
|
||||
gc.mu.Lock()
|
||||
lastRun := gc.lastRun
|
||||
running := gc.running
|
||||
gc.mu.Unlock()
|
||||
|
||||
// Get storage info
|
||||
currentBytes, _ := GetCurrentStorageUsage(gc.dataDir)
|
||||
maxBytes, _ := CalculateMaxStorage(gc.dataDir, gc.maxBytes)
|
||||
|
||||
var percentage float64
|
||||
if maxBytes > 0 {
|
||||
percentage = float64(currentBytes) / float64(maxBytes) * 100
|
||||
}
|
||||
|
||||
return GCStats{
|
||||
Running: running,
|
||||
LastRunTime: lastRun,
|
||||
TotalEvicted: atomic.LoadUint64(&gc.evictedCount),
|
||||
CurrentStorageBytes: currentBytes,
|
||||
MaxStorageBytes: maxBytes,
|
||||
StoragePercentage: percentage,
|
||||
}
|
||||
}
|
||||
|
||||
// GCStats holds garbage collector statistics.
|
||||
type GCStats struct {
|
||||
Running bool
|
||||
LastRunTime time.Time
|
||||
TotalEvicted uint64
|
||||
CurrentStorageBytes int64
|
||||
MaxStorageBytes int64
|
||||
StoragePercentage float64
|
||||
}
|
||||
Reference in New Issue
Block a user