Implement PID-controlled adaptive rate limiting for database operations

- Add LoadMonitor interface in pkg/interfaces/loadmonitor/ for database load metrics - Implement PIDController with filtered derivative to suppress high-frequency noise - Proportional (P): immediate response to current error - Integral (I): eliminates steady-state offset with anti-windup clamping - Derivative (D): rate-of-change prediction with low-pass filtering - Create BadgerLoadMonitor tracking L0 tables, compaction score, and cache hit ratio - Create Neo4jLoadMonitor tracking query semaphore usage and latencies - Add AdaptiveRateLimiter combining PID controllers for reads and writes - Configure via environment variables: - ORLY_RATE_LIMIT_ENABLED: enable/disable rate limiting - ORLY_RATE_LIMIT_TARGET_MB: target memory limit (default 1500MB) - ORLY_RATE_LIMIT_*_K[PID]: PID gains for reads/writes - ORLY_RATE_LIMIT_MAX_*_MS: maximum delays - ORLY_RATE_LIMIT_*_TARGET: setpoints for reads/writes - Integrate rate limiter into Server struct and lifecycle management - Add comprehensive unit tests for PID controller behavior Files modified: - app/config/config.go: Add rate limiting configuration options - app/main.go: Initialize and start/stop rate limiter - app/server.go: Add rateLimiter field to Server struct - main.go: Create rate limiter with appropriate monitor - pkg/run/run.go: Pass disabled limiter for test instances - pkg/interfaces/loadmonitor/: New LoadMonitor interface - pkg/ratelimit/: New PID controller and limiter implementation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 22:45:11 +01:00
parent afa3dce1c9
commit 88b0509ad8
12 changed files with 1511 additions and 13 deletions
--- a/pkg/ratelimit/neo4j_monitor.go
+++ b/pkg/ratelimit/neo4j_monitor.go
@@ -0,0 +1,259 @@
+package ratelimit
+
+import (
+	"context"
+	"runtime"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/neo4j/neo4j-go-driver/v5/neo4j"
+	"next.orly.dev/pkg/interfaces/loadmonitor"
+)
+
+// Neo4jMonitor implements loadmonitor.Monitor for Neo4j database.
+// Since Neo4j driver doesn't expose detailed metrics, we track:
+// - Memory pressure via Go runtime
+// - Query concurrency via the semaphore
+// - Latency via recording
+type Neo4jMonitor struct {
+	driver   neo4j.DriverWithContext
+	querySem chan struct{} // Reference to the query semaphore
+
+	// Target memory for pressure calculation
+	targetMemoryBytes atomic.Uint64
+
+	// Latency tracking with exponential moving average
+	queryLatencyNs atomic.Int64
+	writeLatencyNs atomic.Int64
+	latencyAlpha   float64 // EMA coefficient (default 0.1)
+
+	// Concurrency tracking
+	activeReads  atomic.Int32
+	activeWrites atomic.Int32
+	maxConcurrency int
+
+	// Cached metrics (updated by background goroutine)
+	metricsLock   sync.RWMutex
+	cachedMetrics loadmonitor.Metrics
+
+	// Background collection
+	stopChan chan struct{}
+	stopped  chan struct{}
+	interval time.Duration
+}
+
+// Compile-time check that Neo4jMonitor implements loadmonitor.Monitor
+var _ loadmonitor.Monitor = (*Neo4jMonitor)(nil)
+
+// NewNeo4jMonitor creates a new Neo4j load monitor.
+// The querySem should be the same semaphore used for limiting concurrent queries.
+// maxConcurrency is the maximum concurrent query limit (typically 10).
+func NewNeo4jMonitor(
+	driver neo4j.DriverWithContext,
+	querySem chan struct{},
+	maxConcurrency int,
+	updateInterval time.Duration,
+) *Neo4jMonitor {
+	if updateInterval <= 0 {
+		updateInterval = 100 * time.Millisecond
+	}
+	if maxConcurrency <= 0 {
+		maxConcurrency = 10
+	}
+
+	m := &Neo4jMonitor{
+		driver:         driver,
+		querySem:       querySem,
+		maxConcurrency: maxConcurrency,
+		latencyAlpha:   0.1, // 10% new, 90% old for smooth EMA
+		stopChan:       make(chan struct{}),
+		stopped:        make(chan struct{}),
+		interval:       updateInterval,
+	}
+
+	// Set a default target (1.5GB)
+	m.targetMemoryBytes.Store(1500 * 1024 * 1024)
+
+	return m
+}
+
+// GetMetrics returns the current load metrics.
+func (m *Neo4jMonitor) GetMetrics() loadmonitor.Metrics {
+	m.metricsLock.RLock()
+	defer m.metricsLock.RUnlock()
+	return m.cachedMetrics
+}
+
+// RecordQueryLatency records a query latency sample using exponential moving average.
+func (m *Neo4jMonitor) RecordQueryLatency(latency time.Duration) {
+	ns := latency.Nanoseconds()
+	for {
+		old := m.queryLatencyNs.Load()
+		if old == 0 {
+			if m.queryLatencyNs.CompareAndSwap(0, ns) {
+				return
+			}
+			continue
+		}
+		// EMA: new = alpha * sample + (1-alpha) * old
+		newVal := int64(m.latencyAlpha*float64(ns) + (1-m.latencyAlpha)*float64(old))
+		if m.queryLatencyNs.CompareAndSwap(old, newVal) {
+			return
+		}
+	}
+}
+
+// RecordWriteLatency records a write latency sample using exponential moving average.
+func (m *Neo4jMonitor) RecordWriteLatency(latency time.Duration) {
+	ns := latency.Nanoseconds()
+	for {
+		old := m.writeLatencyNs.Load()
+		if old == 0 {
+			if m.writeLatencyNs.CompareAndSwap(0, ns) {
+				return
+			}
+			continue
+		}
+		// EMA: new = alpha * sample + (1-alpha) * old
+		newVal := int64(m.latencyAlpha*float64(ns) + (1-m.latencyAlpha)*float64(old))
+		if m.writeLatencyNs.CompareAndSwap(old, newVal) {
+			return
+		}
+	}
+}
+
+// SetMemoryTarget sets the target memory limit in bytes.
+func (m *Neo4jMonitor) SetMemoryTarget(bytes uint64) {
+	m.targetMemoryBytes.Store(bytes)
+}
+
+// Start begins background metric collection.
+func (m *Neo4jMonitor) Start() <-chan struct{} {
+	go m.collectLoop()
+	return m.stopped
+}
+
+// Stop halts background metric collection.
+func (m *Neo4jMonitor) Stop() {
+	close(m.stopChan)
+	<-m.stopped
+}
+
+// collectLoop periodically collects metrics.
+func (m *Neo4jMonitor) collectLoop() {
+	defer close(m.stopped)
+
+	ticker := time.NewTicker(m.interval)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-m.stopChan:
+			return
+		case <-ticker.C:
+			m.updateMetrics()
+		}
+	}
+}
+
+// updateMetrics collects current metrics.
+func (m *Neo4jMonitor) updateMetrics() {
+	metrics := loadmonitor.Metrics{
+		Timestamp: time.Now(),
+	}
+
+	// Calculate memory pressure from Go runtime
+	var memStats runtime.MemStats
+	runtime.ReadMemStats(&memStats)
+
+	targetBytes := m.targetMemoryBytes.Load()
+	if targetBytes > 0 {
+		// Use HeapAlloc as primary memory metric
+		metrics.MemoryPressure = float64(memStats.HeapAlloc) / float64(targetBytes)
+	}
+
+	// Calculate load from semaphore usage
+	// querySem is a buffered channel - count how many slots are taken
+	if m.querySem != nil {
+		usedSlots := len(m.querySem)
+		concurrencyLoad := float64(usedSlots) / float64(m.maxConcurrency)
+		if concurrencyLoad > 1.0 {
+			concurrencyLoad = 1.0
+		}
+		// Both read and write use the same semaphore
+		metrics.WriteLoad = concurrencyLoad
+		metrics.ReadLoad = concurrencyLoad
+	}
+
+	// Add latency-based load adjustment
+	// High latency indicates the database is struggling
+	queryLatencyNs := m.queryLatencyNs.Load()
+	writeLatencyNs := m.writeLatencyNs.Load()
+
+	// Consider > 500ms query latency as concerning
+	const latencyThresholdNs = 500 * 1e6 // 500ms
+	if queryLatencyNs > 0 {
+		latencyLoad := float64(queryLatencyNs) / float64(latencyThresholdNs)
+		if latencyLoad > 1.0 {
+			latencyLoad = 1.0
+		}
+		// Blend concurrency and latency for read load
+		metrics.ReadLoad = 0.5*metrics.ReadLoad + 0.5*latencyLoad
+	}
+
+	if writeLatencyNs > 0 {
+		latencyLoad := float64(writeLatencyNs) / float64(latencyThresholdNs)
+		if latencyLoad > 1.0 {
+			latencyLoad = 1.0
+		}
+		// Blend concurrency and latency for write load
+		metrics.WriteLoad = 0.5*metrics.WriteLoad + 0.5*latencyLoad
+	}
+
+	// Store latencies
+	metrics.QueryLatency = time.Duration(queryLatencyNs)
+	metrics.WriteLatency = time.Duration(writeLatencyNs)
+
+	// Update cached metrics
+	m.metricsLock.Lock()
+	m.cachedMetrics = metrics
+	m.metricsLock.Unlock()
+}
+
+// IncrementActiveReads tracks an active read operation.
+// Call this when starting a read, and call the returned function when done.
+func (m *Neo4jMonitor) IncrementActiveReads() func() {
+	m.activeReads.Add(1)
+	return func() {
+		m.activeReads.Add(-1)
+	}
+}
+
+// IncrementActiveWrites tracks an active write operation.
+// Call this when starting a write, and call the returned function when done.
+func (m *Neo4jMonitor) IncrementActiveWrites() func() {
+	m.activeWrites.Add(1)
+	return func() {
+		m.activeWrites.Add(-1)
+	}
+}
+
+// GetConcurrencyStats returns current concurrency statistics for debugging.
+func (m *Neo4jMonitor) GetConcurrencyStats() (reads, writes int32, semUsed int) {
+	reads = m.activeReads.Load()
+	writes = m.activeWrites.Load()
+	if m.querySem != nil {
+		semUsed = len(m.querySem)
+	}
+	return
+}
+
+// CheckConnectivity performs a connectivity check to Neo4j.
+// This can be used to verify the database is responsive.
+func (m *Neo4jMonitor) CheckConnectivity(ctx context.Context) error {
+	if m.driver == nil {
+		return nil
+	}
+	return m.driver.VerifyConnectivity(ctx)
+}