- Add async archive relay querying (local results immediate, archives in background) - Add query caching with filter normalization to avoid repeated requests - Add session-deduplicated access tracking for events - Add continuous garbage collection based on access patterns - Auto-detect storage limit (80% of filesystem) when ORLY_MAX_STORAGE_BYTES=0 - Support NIP-50 search queries to archive relays New environment variables: - ORLY_ARCHIVE_ENABLED: Enable archive relay query augmentation - ORLY_ARCHIVE_RELAYS: Comma-separated archive relay URLs - ORLY_ARCHIVE_TIMEOUT_SEC: Archive query timeout - ORLY_ARCHIVE_CACHE_TTL_HRS: Query deduplication window - ORLY_GC_ENABLED: Enable access-based garbage collection - ORLY_MAX_STORAGE_BYTES: Max storage (0=auto 80%) - ORLY_GC_INTERVAL_SEC: GC check interval - ORLY_GC_BATCH_SIZE: Events per GC cycle 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
279 lines
6.4 KiB
Go
279 lines
6.4 KiB
Go
//go:build !windows
|
|
|
|
package storage
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"lol.mleku.dev/log"
|
|
|
|
"next.orly.dev/pkg/database/indexes/types"
|
|
"git.mleku.dev/mleku/nostr/encoders/event"
|
|
)
|
|
|
|
// GCDatabase defines the interface for database operations needed by the GC.
|
|
type GCDatabase interface {
|
|
Path() string
|
|
FetchEventBySerial(ser *types.Uint40) (ev *event.E, err error)
|
|
DeleteEventBySerial(ctx context.Context, ser *types.Uint40, ev *event.E) error
|
|
}
|
|
|
|
// GarbageCollector manages continuous event eviction based on access patterns.
|
|
// It monitors storage usage and evicts the least accessed events when the
|
|
// storage limit is exceeded.
|
|
type GarbageCollector struct {
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
db GCDatabase
|
|
tracker *AccessTracker
|
|
|
|
// Configuration
|
|
dataDir string
|
|
maxBytes int64 // 0 = auto-calculate
|
|
interval time.Duration
|
|
batchSize int
|
|
minAgeSec int64 // Minimum age before considering for eviction
|
|
|
|
// State
|
|
mu sync.Mutex
|
|
running bool
|
|
evictedCount uint64
|
|
lastRun time.Time
|
|
}
|
|
|
|
// GCConfig holds configuration for the garbage collector.
|
|
type GCConfig struct {
|
|
MaxStorageBytes int64 // 0 = auto-calculate (80% of filesystem)
|
|
Interval time.Duration // How often to check storage
|
|
BatchSize int // Events to consider per GC run
|
|
MinAgeSec int64 // Minimum age before eviction (default: 1 hour)
|
|
}
|
|
|
|
// DefaultGCConfig returns a default GC configuration.
|
|
func DefaultGCConfig() GCConfig {
|
|
return GCConfig{
|
|
MaxStorageBytes: 0, // Auto-detect
|
|
Interval: time.Minute, // Check every minute
|
|
BatchSize: 1000, // 1000 events per run
|
|
MinAgeSec: 3600, // 1 hour minimum age
|
|
}
|
|
}
|
|
|
|
// NewGarbageCollector creates a new garbage collector.
|
|
func NewGarbageCollector(
|
|
ctx context.Context,
|
|
db GCDatabase,
|
|
tracker *AccessTracker,
|
|
cfg GCConfig,
|
|
) *GarbageCollector {
|
|
gcCtx, cancel := context.WithCancel(ctx)
|
|
|
|
if cfg.BatchSize <= 0 {
|
|
cfg.BatchSize = 1000
|
|
}
|
|
if cfg.Interval <= 0 {
|
|
cfg.Interval = time.Minute
|
|
}
|
|
if cfg.MinAgeSec <= 0 {
|
|
cfg.MinAgeSec = 3600 // 1 hour
|
|
}
|
|
|
|
return &GarbageCollector{
|
|
ctx: gcCtx,
|
|
cancel: cancel,
|
|
db: db,
|
|
tracker: tracker,
|
|
dataDir: db.Path(),
|
|
maxBytes: cfg.MaxStorageBytes,
|
|
interval: cfg.Interval,
|
|
batchSize: cfg.BatchSize,
|
|
minAgeSec: cfg.MinAgeSec,
|
|
}
|
|
}
|
|
|
|
// Start begins the garbage collection loop.
|
|
func (gc *GarbageCollector) Start() {
|
|
gc.mu.Lock()
|
|
if gc.running {
|
|
gc.mu.Unlock()
|
|
return
|
|
}
|
|
gc.running = true
|
|
gc.mu.Unlock()
|
|
|
|
go gc.runLoop()
|
|
log.I.F("garbage collector started (interval: %s, batch: %d)", gc.interval, gc.batchSize)
|
|
}
|
|
|
|
// Stop stops the garbage collector.
|
|
func (gc *GarbageCollector) Stop() {
|
|
gc.cancel()
|
|
gc.mu.Lock()
|
|
gc.running = false
|
|
gc.mu.Unlock()
|
|
log.I.F("garbage collector stopped (total evicted: %d)", atomic.LoadUint64(&gc.evictedCount))
|
|
}
|
|
|
|
// runLoop is the main GC loop.
|
|
func (gc *GarbageCollector) runLoop() {
|
|
ticker := time.NewTicker(gc.interval)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-gc.ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
if err := gc.runCycle(); err != nil {
|
|
log.W.F("GC cycle error: %v", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// runCycle executes one garbage collection cycle.
|
|
func (gc *GarbageCollector) runCycle() error {
|
|
gc.mu.Lock()
|
|
gc.lastRun = time.Now()
|
|
gc.mu.Unlock()
|
|
|
|
// Check if we need to run GC
|
|
shouldRun, currentBytes, maxBytes, err := gc.shouldRunGC()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if !shouldRun {
|
|
return nil
|
|
}
|
|
|
|
log.D.F("GC triggered: current=%d MB, max=%d MB (%.1f%%)",
|
|
currentBytes/(1024*1024),
|
|
maxBytes/(1024*1024),
|
|
float64(currentBytes)/float64(maxBytes)*100)
|
|
|
|
// Get coldest events
|
|
serials, err := gc.tracker.GetColdestEvents(gc.batchSize, gc.minAgeSec)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(serials) == 0 {
|
|
log.D.F("GC: no events eligible for eviction")
|
|
return nil
|
|
}
|
|
|
|
// Evict events
|
|
evicted, err := gc.evictEvents(serials)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
atomic.AddUint64(&gc.evictedCount, uint64(evicted))
|
|
log.I.F("GC: evicted %d events (total: %d)", evicted, atomic.LoadUint64(&gc.evictedCount))
|
|
|
|
return nil
|
|
}
|
|
|
|
// shouldRunGC checks if storage limit is exceeded.
|
|
func (gc *GarbageCollector) shouldRunGC() (bool, int64, int64, error) {
|
|
// Calculate max storage (dynamic based on filesystem)
|
|
maxBytes, err := CalculateMaxStorage(gc.dataDir, gc.maxBytes)
|
|
if err != nil {
|
|
return false, 0, 0, err
|
|
}
|
|
|
|
// Get current usage
|
|
currentBytes, err := GetCurrentStorageUsage(gc.dataDir)
|
|
if err != nil {
|
|
return false, 0, 0, err
|
|
}
|
|
|
|
return currentBytes > maxBytes, currentBytes, maxBytes, nil
|
|
}
|
|
|
|
// evictEvents evicts the specified events from the database.
|
|
func (gc *GarbageCollector) evictEvents(serials []uint64) (int, error) {
|
|
evicted := 0
|
|
|
|
for _, serial := range serials {
|
|
// Check context for cancellation
|
|
select {
|
|
case <-gc.ctx.Done():
|
|
return evicted, gc.ctx.Err()
|
|
default:
|
|
}
|
|
|
|
// Convert serial to Uint40
|
|
ser := &types.Uint40{}
|
|
if err := ser.Set(serial); err != nil {
|
|
log.D.F("GC: invalid serial %d: %v", serial, err)
|
|
continue
|
|
}
|
|
|
|
// Fetch the event
|
|
ev, err := gc.db.FetchEventBySerial(ser)
|
|
if err != nil {
|
|
log.D.F("GC: failed to fetch event %d: %v", serial, err)
|
|
continue
|
|
}
|
|
if ev == nil {
|
|
continue // Already deleted
|
|
}
|
|
|
|
// Delete the event
|
|
if err := gc.db.DeleteEventBySerial(gc.ctx, ser, ev); err != nil {
|
|
log.D.F("GC: failed to delete event %d: %v", serial, err)
|
|
continue
|
|
}
|
|
|
|
evicted++
|
|
|
|
// Rate limit to avoid overwhelming the database
|
|
if evicted%100 == 0 {
|
|
time.Sleep(10 * time.Millisecond)
|
|
}
|
|
}
|
|
|
|
return evicted, nil
|
|
}
|
|
|
|
// Stats returns current GC statistics.
|
|
func (gc *GarbageCollector) Stats() GCStats {
|
|
gc.mu.Lock()
|
|
lastRun := gc.lastRun
|
|
running := gc.running
|
|
gc.mu.Unlock()
|
|
|
|
// Get storage info
|
|
currentBytes, _ := GetCurrentStorageUsage(gc.dataDir)
|
|
maxBytes, _ := CalculateMaxStorage(gc.dataDir, gc.maxBytes)
|
|
|
|
var percentage float64
|
|
if maxBytes > 0 {
|
|
percentage = float64(currentBytes) / float64(maxBytes) * 100
|
|
}
|
|
|
|
return GCStats{
|
|
Running: running,
|
|
LastRunTime: lastRun,
|
|
TotalEvicted: atomic.LoadUint64(&gc.evictedCount),
|
|
CurrentStorageBytes: currentBytes,
|
|
MaxStorageBytes: maxBytes,
|
|
StoragePercentage: percentage,
|
|
}
|
|
}
|
|
|
|
// GCStats holds garbage collector statistics.
|
|
type GCStats struct {
|
|
Running bool
|
|
LastRunTime time.Time
|
|
TotalEvicted uint64
|
|
CurrentStorageBytes int64
|
|
MaxStorageBytes int64
|
|
StoragePercentage float64
|
|
}
|