Interim release: documentation updates and rate limiting improvements

- Add applesauce library reference documentation
- Add rate limiting test report for Badger
- Add memory monitoring for rate limiter (platform-specific implementations)
- Enhance PID-controlled adaptive rate limiting
- Update Neo4j and Badger monitors with improved load metrics
- Add docker-compose configuration
- Update README and configuration options

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit was merged in pull request #3.
This commit is contained in:
2025-12-12 08:47:25 +01:00
parent ba84e12ea9
commit f16ab3077f
20 changed files with 1581 additions and 75 deletions

View File

@@ -3,23 +3,32 @@
package ratelimit
import (
"runtime"
"sync"
"sync/atomic"
"time"
"github.com/dgraph-io/badger/v4"
"lol.mleku.dev/log"
"next.orly.dev/pkg/interfaces/loadmonitor"
)
// BadgerMonitor implements loadmonitor.Monitor for the Badger database.
// It collects metrics from Badger's LSM tree, caches, and Go runtime.
// It collects metrics from Badger's LSM tree, caches, and actual process memory.
// It also implements CompactableMonitor and EmergencyModeMonitor interfaces.
type BadgerMonitor struct {
db *badger.DB
// Target memory for pressure calculation
targetMemoryBytes atomic.Uint64
// Emergency mode configuration
emergencyThreshold atomic.Uint64 // stored as threshold * 1000 (e.g., 1500 = 1.5)
emergencyModeUntil atomic.Int64 // Unix nano when forced emergency mode ends
inEmergencyMode atomic.Bool
// Compaction state
isCompacting atomic.Bool
// Latency tracking with exponential moving average
queryLatencyNs atomic.Int64
writeLatencyNs atomic.Int64
@@ -37,8 +46,10 @@ type BadgerMonitor struct {
interval time.Duration
}
// Compile-time check that BadgerMonitor implements loadmonitor.Monitor
// Compile-time checks for interface implementation
var _ loadmonitor.Monitor = (*BadgerMonitor)(nil)
var _ loadmonitor.CompactableMonitor = (*BadgerMonitor)(nil)
var _ loadmonitor.EmergencyModeMonitor = (*BadgerMonitor)(nil)
// NewBadgerMonitor creates a new Badger load monitor.
// The updateInterval controls how often metrics are collected (default 100ms).
@@ -58,9 +69,73 @@ func NewBadgerMonitor(db *badger.DB, updateInterval time.Duration) *BadgerMonito
// Set a default target (1.5GB)
m.targetMemoryBytes.Store(1500 * 1024 * 1024)
// Default emergency threshold: 150% of target
m.emergencyThreshold.Store(1500)
return m
}
// SetEmergencyThreshold sets the memory threshold above which emergency mode is triggered.
// threshold is a fraction, e.g., 1.5 = 150% of target memory.
func (m *BadgerMonitor) SetEmergencyThreshold(threshold float64) {
m.emergencyThreshold.Store(uint64(threshold * 1000))
}
// GetEmergencyThreshold returns the current emergency threshold as a fraction.
func (m *BadgerMonitor) GetEmergencyThreshold() float64 {
return float64(m.emergencyThreshold.Load()) / 1000.0
}
// ForceEmergencyMode manually triggers emergency mode for a duration.
func (m *BadgerMonitor) ForceEmergencyMode(duration time.Duration) {
m.emergencyModeUntil.Store(time.Now().Add(duration).UnixNano())
m.inEmergencyMode.Store(true)
log.W.F("⚠️ emergency mode forced for %v", duration)
}
// TriggerCompaction initiates a Badger Flatten operation to compact all levels.
// This should be called when memory pressure is high and the database needs to
// reclaim space. It runs synchronously and may take significant time.
func (m *BadgerMonitor) TriggerCompaction() error {
if m.db == nil || m.db.IsClosed() {
return nil
}
if m.isCompacting.Load() {
log.D.Ln("compaction already in progress, skipping")
return nil
}
m.isCompacting.Store(true)
defer m.isCompacting.Store(false)
log.I.Ln("🗜️ triggering Badger compaction (Flatten)")
start := time.Now()
// Flatten with 4 workers (matches NumCompactors default)
err := m.db.Flatten(4)
if err != nil {
log.E.F("compaction failed: %v", err)
return err
}
// Also run value log GC to reclaim space
for {
err := m.db.RunValueLogGC(0.5)
if err != nil {
break // No more GC needed
}
}
log.I.F("🗜️ compaction completed in %v", time.Since(start))
return nil
}
// IsCompacting returns true if a compaction is currently in progress.
func (m *BadgerMonitor) IsCompacting() bool {
return m.isCompacting.Load()
}
// GetMetrics returns the current load metrics.
func (m *BadgerMonitor) GetMetrics() loadmonitor.Metrics {
m.metricsLock.RLock()
@@ -140,7 +215,7 @@ func (m *BadgerMonitor) collectLoop() {
}
}
// updateMetrics collects current metrics from Badger and runtime.
// updateMetrics collects current metrics from Badger and actual process memory.
func (m *BadgerMonitor) updateMetrics() {
if m.db == nil || m.db.IsClosed() {
return
@@ -150,17 +225,40 @@ func (m *BadgerMonitor) updateMetrics() {
Timestamp: time.Now(),
}
// Calculate memory pressure from Go runtime
var memStats runtime.MemStats
runtime.ReadMemStats(&memStats)
// Use RSS-based memory pressure (actual physical memory, not Go runtime)
procMem := ReadProcessMemoryStats()
physicalMemBytes := procMem.PhysicalMemoryBytes()
metrics.PhysicalMemoryMB = physicalMemBytes / (1024 * 1024)
targetBytes := m.targetMemoryBytes.Load()
if targetBytes > 0 {
// Use HeapAlloc as primary memory metric
// This represents the actual live heap objects
metrics.MemoryPressure = float64(memStats.HeapAlloc) / float64(targetBytes)
// Use actual physical memory (RSS - shared) for pressure calculation
metrics.MemoryPressure = float64(physicalMemBytes) / float64(targetBytes)
}
// Check emergency mode
emergencyThreshold := float64(m.emergencyThreshold.Load()) / 1000.0
forcedUntil := m.emergencyModeUntil.Load()
now := time.Now().UnixNano()
if forcedUntil > now {
// Still in forced emergency mode
metrics.InEmergencyMode = true
} else if metrics.MemoryPressure >= emergencyThreshold {
// Memory pressure exceeds emergency threshold
metrics.InEmergencyMode = true
if !m.inEmergencyMode.Load() {
log.W.F("⚠️ entering emergency mode: memory pressure %.1f%% >= threshold %.1f%%",
metrics.MemoryPressure*100, emergencyThreshold*100)
}
} else {
if m.inEmergencyMode.Load() {
log.I.F("✅ exiting emergency mode: memory pressure %.1f%% < threshold %.1f%%",
metrics.MemoryPressure*100, emergencyThreshold*100)
}
}
m.inEmergencyMode.Store(metrics.InEmergencyMode)
// Get Badger LSM tree information for write load
levels := m.db.Levels()
var l0Tables int
@@ -191,6 +289,9 @@ func (m *BadgerMonitor) updateMetrics() {
compactionLoad = 1.0
}
// Mark compaction as pending if score is high
metrics.CompactionPending = maxScore > 1.5 || l0Tables > 10
// Blend: 60% L0 (immediate backpressure), 40% compaction score
metrics.WriteLoad = 0.6*l0Load + 0.4*compactionLoad

View File

@@ -6,6 +6,7 @@ import (
"sync/atomic"
"time"
"lol.mleku.dev/log"
"next.orly.dev/pkg/interfaces/loadmonitor"
pidif "next.orly.dev/pkg/interfaces/pid"
"next.orly.dev/pkg/pid"
@@ -74,25 +75,47 @@ type Config struct {
// The remaining weight is given to the load metric.
// Default: 0.7 (70% memory, 30% load)
MemoryWeight float64
// EmergencyThreshold is the memory pressure level (fraction of target) that triggers emergency mode.
// Default: 1.167 (116.7% = target + 1/6th)
// When exceeded, writes are aggressively throttled until memory drops below RecoveryThreshold.
EmergencyThreshold float64
// RecoveryThreshold is the memory pressure level below which we exit emergency mode.
// Default: 0.833 (83.3% = target - 1/6th)
// Hysteresis prevents rapid oscillation between normal and emergency modes.
RecoveryThreshold float64
// EmergencyMaxDelayMs is the maximum delay for writes during emergency mode.
// Default: 5000 (5 seconds) - much longer than normal MaxWriteDelayMs
EmergencyMaxDelayMs int
// CompactionCheckInterval controls how often to check if compaction should be triggered.
// Default: 10 seconds
CompactionCheckInterval time.Duration
}
// DefaultConfig returns a default configuration for the rate limiter.
func DefaultConfig() Config {
return Config{
Enabled: true,
TargetMemoryMB: 1500, // 1.5GB target
WriteSetpoint: 0.85,
ReadSetpoint: 0.90,
WriteKp: 0.5,
WriteKi: 0.1,
WriteKd: 0.05,
ReadKp: 0.3,
ReadKi: 0.05,
ReadKd: 0.02,
MaxWriteDelayMs: 1000, // 1 second max
MaxReadDelayMs: 500, // 500ms max
MetricUpdateInterval: 100 * time.Millisecond,
MemoryWeight: 0.7,
Enabled: true,
TargetMemoryMB: 1500, // 1.5GB target
WriteSetpoint: 0.85,
ReadSetpoint: 0.90,
WriteKp: 0.5,
WriteKi: 0.1,
WriteKd: 0.05,
ReadKp: 0.3,
ReadKi: 0.05,
ReadKd: 0.02,
MaxWriteDelayMs: 1000, // 1 second max
MaxReadDelayMs: 500, // 500ms max
MetricUpdateInterval: 100 * time.Millisecond,
MemoryWeight: 0.7,
EmergencyThreshold: 1.167, // Target + 1/6th (~1.75GB for 1.5GB target)
RecoveryThreshold: 0.833, // Target - 1/6th (~1.25GB for 1.5GB target)
EmergencyMaxDelayMs: 5000, // 5 seconds max in emergency mode
CompactionCheckInterval: 10 * time.Second,
}
}
@@ -105,22 +128,39 @@ func NewConfigFromValues(
readKp, readKi, readKd float64,
maxWriteMs, maxReadMs int,
writeTarget, readTarget float64,
emergencyThreshold, recoveryThreshold float64,
emergencyMaxMs int,
) Config {
// Apply defaults for zero values
if emergencyThreshold == 0 {
emergencyThreshold = 1.167 // Target + 1/6th
}
if recoveryThreshold == 0 {
recoveryThreshold = 0.833 // Target - 1/6th
}
if emergencyMaxMs == 0 {
emergencyMaxMs = 5000 // 5 seconds
}
return Config{
Enabled: enabled,
TargetMemoryMB: targetMB,
WriteSetpoint: writeTarget,
ReadSetpoint: readTarget,
WriteKp: writeKp,
WriteKi: writeKi,
WriteKd: writeKd,
ReadKp: readKp,
ReadKi: readKi,
ReadKd: readKd,
MaxWriteDelayMs: maxWriteMs,
MaxReadDelayMs: maxReadMs,
MetricUpdateInterval: 100 * time.Millisecond,
MemoryWeight: 0.7,
Enabled: enabled,
TargetMemoryMB: targetMB,
WriteSetpoint: writeTarget,
ReadSetpoint: readTarget,
WriteKp: writeKp,
WriteKi: writeKi,
WriteKd: writeKd,
ReadKp: readKp,
ReadKi: readKi,
ReadKd: readKd,
MaxWriteDelayMs: maxWriteMs,
MaxReadDelayMs: maxReadMs,
MetricUpdateInterval: 100 * time.Millisecond,
MemoryWeight: 0.7,
EmergencyThreshold: emergencyThreshold,
RecoveryThreshold: recoveryThreshold,
EmergencyMaxDelayMs: emergencyMaxMs,
CompactionCheckInterval: 10 * time.Second,
}
}
@@ -139,11 +179,17 @@ type Limiter struct {
metricsLock sync.RWMutex
currentMetrics loadmonitor.Metrics
// Emergency mode tracking with hysteresis
inEmergencyMode atomic.Bool
lastEmergencyCheck atomic.Int64 // Unix nano timestamp
compactionTriggered atomic.Bool
// Statistics
totalWriteDelayMs atomic.Int64
totalReadDelayMs atomic.Int64
writeThrottles atomic.Int64
readThrottles atomic.Int64
emergencyEvents atomic.Int64
// Lifecycle
ctx context.Context
@@ -158,6 +204,20 @@ type Limiter struct {
func NewLimiter(config Config, monitor loadmonitor.Monitor) *Limiter {
ctx, cancel := context.WithCancel(context.Background())
// Apply defaults for zero values
if config.EmergencyThreshold == 0 {
config.EmergencyThreshold = 1.167 // Target + 1/6th
}
if config.RecoveryThreshold == 0 {
config.RecoveryThreshold = 0.833 // Target - 1/6th
}
if config.EmergencyMaxDelayMs == 0 {
config.EmergencyMaxDelayMs = 5000 // 5 seconds
}
if config.CompactionCheckInterval == 0 {
config.CompactionCheckInterval = 10 * time.Second
}
l := &Limiter{
config: config,
monitor: monitor,
@@ -196,6 +256,11 @@ func NewLimiter(config Config, monitor loadmonitor.Monitor) *Limiter {
monitor.SetMemoryTarget(uint64(config.TargetMemoryMB) * 1024 * 1024)
}
// Configure emergency threshold if monitor supports it
if emMon, ok := monitor.(loadmonitor.EmergencyModeMonitor); ok {
emMon.SetEmergencyThreshold(config.EmergencyThreshold)
}
return l
}
@@ -255,12 +320,13 @@ func (l *Limiter) Stopped() <-chan struct{} {
// Wait blocks until the rate limiter permits the operation to proceed.
// It returns the delay that was applied, or 0 if no delay was needed.
// If the context is cancelled, it returns immediately.
func (l *Limiter) Wait(ctx context.Context, opType OperationType) time.Duration {
// opType accepts int for interface compatibility (0=Read, 1=Write)
func (l *Limiter) Wait(ctx context.Context, opType int) time.Duration {
if !l.config.Enabled || l.monitor == nil {
return 0
}
delay := l.ComputeDelay(opType)
delay := l.ComputeDelay(OperationType(opType))
if delay <= 0 {
return 0
}
@@ -286,6 +352,9 @@ func (l *Limiter) ComputeDelay(opType OperationType) time.Duration {
metrics := l.currentMetrics
l.metricsLock.RUnlock()
// Check emergency mode with hysteresis
inEmergency := l.checkEmergencyMode(metrics.MemoryPressure)
// Compute process variable as weighted combination of memory and load
var loadMetric float64
switch opType {
@@ -305,6 +374,34 @@ func (l *Limiter) ComputeDelay(opType OperationType) time.Duration {
case Write:
out := l.writePID.UpdateValue(pv)
delaySec = out.Value()
// In emergency mode, apply progressive throttling for writes
if inEmergency {
// Calculate how far above recovery threshold we are
// At emergency threshold, add 1x normal delay
// For every additional 10% above emergency, double the delay
excessPressure := metrics.MemoryPressure - l.config.RecoveryThreshold
if excessPressure > 0 {
// Progressive multiplier: starts at 2x, doubles every 10% excess
multiplier := 2.0
for excess := excessPressure; excess > 0.1; excess -= 0.1 {
multiplier *= 2
}
emergencyDelaySec := delaySec * multiplier
maxEmergencySec := float64(l.config.EmergencyMaxDelayMs) / 1000.0
if emergencyDelaySec > maxEmergencySec {
emergencyDelaySec = maxEmergencySec
}
// Minimum emergency delay of 100ms to allow other operations
if emergencyDelaySec < 0.1 {
emergencyDelaySec = 0.1
}
delaySec = emergencyDelaySec
}
}
if delaySec > 0 {
l.writeThrottles.Add(1)
l.totalWriteDelayMs.Add(int64(delaySec * 1000))
@@ -325,6 +422,68 @@ func (l *Limiter) ComputeDelay(opType OperationType) time.Duration {
return time.Duration(delaySec * float64(time.Second))
}
// checkEmergencyMode implements hysteresis-based emergency mode detection.
// Enters emergency mode when memory pressure >= EmergencyThreshold.
// Exits emergency mode when memory pressure <= RecoveryThreshold.
func (l *Limiter) checkEmergencyMode(memoryPressure float64) bool {
wasInEmergency := l.inEmergencyMode.Load()
if wasInEmergency {
// To exit, must drop below recovery threshold
if memoryPressure <= l.config.RecoveryThreshold {
l.inEmergencyMode.Store(false)
log.I.F("✅ exiting emergency mode: memory %.1f%% <= recovery threshold %.1f%%",
memoryPressure*100, l.config.RecoveryThreshold*100)
return false
}
return true
}
// To enter, must exceed emergency threshold
if memoryPressure >= l.config.EmergencyThreshold {
l.inEmergencyMode.Store(true)
l.emergencyEvents.Add(1)
log.W.F("⚠️ entering emergency mode: memory %.1f%% >= threshold %.1f%%",
memoryPressure*100, l.config.EmergencyThreshold*100)
// Trigger compaction if supported
l.triggerCompactionIfNeeded()
return true
}
return false
}
// triggerCompactionIfNeeded triggers database compaction if the monitor supports it
// and compaction isn't already in progress.
func (l *Limiter) triggerCompactionIfNeeded() {
if l.compactionTriggered.Load() {
return // Already triggered
}
compactMon, ok := l.monitor.(loadmonitor.CompactableMonitor)
if !ok {
return // Monitor doesn't support compaction
}
if compactMon.IsCompacting() {
return // Already compacting
}
l.compactionTriggered.Store(true)
go func() {
defer l.compactionTriggered.Store(false)
if err := compactMon.TriggerCompaction(); err != nil {
log.E.F("compaction failed: %v", err)
}
}()
}
// InEmergencyMode returns true if the limiter is currently in emergency mode.
func (l *Limiter) InEmergencyMode() bool {
return l.inEmergencyMode.Load()
}
// RecordLatency records an operation latency for the monitor.
func (l *Limiter) RecordLatency(opType OperationType, latency time.Duration) {
if l.monitor == nil {
@@ -345,6 +504,8 @@ type Stats struct {
ReadThrottles int64
TotalWriteDelayMs int64
TotalReadDelayMs int64
EmergencyEvents int64
InEmergencyMode bool
CurrentMetrics loadmonitor.Metrics
WritePIDState PIDState
ReadPIDState PIDState
@@ -368,6 +529,8 @@ func (l *Limiter) GetStats() Stats {
ReadThrottles: l.readThrottles.Load(),
TotalWriteDelayMs: l.totalWriteDelayMs.Load(),
TotalReadDelayMs: l.totalReadDelayMs.Load(),
EmergencyEvents: l.emergencyEvents.Load(),
InEmergencyMode: l.inEmergencyMode.Load(),
CurrentMetrics: metrics,
}

149
pkg/ratelimit/memory.go Normal file
View File

@@ -0,0 +1,149 @@
//go:build !(js && wasm)
package ratelimit
import (
"errors"
"runtime"
"github.com/pbnjay/memory"
)
// MinimumMemoryMB is the minimum memory required to run the relay with rate limiting.
const MinimumMemoryMB = 500
// AutoDetectMemoryFraction is the fraction of available memory to use when auto-detecting.
const AutoDetectMemoryFraction = 0.66
// DefaultMaxMemoryMB is the default maximum memory target when auto-detecting.
// This caps the auto-detected value to ensure optimal performance.
const DefaultMaxMemoryMB = 1500
// ErrInsufficientMemory is returned when there isn't enough memory to run the relay.
var ErrInsufficientMemory = errors.New("insufficient memory: relay requires at least 500MB of available memory")
// ProcessMemoryStats contains memory statistics for the current process.
// On Linux, these are read from /proc/self/status for accurate RSS values.
// On other platforms, these are approximated from Go runtime stats.
type ProcessMemoryStats struct {
// VmRSS is the resident set size (total physical memory in use) in bytes
VmRSS uint64
// RssShmem is the shared memory portion of RSS in bytes
RssShmem uint64
// RssAnon is the anonymous (non-shared) memory in bytes
RssAnon uint64
// VmHWM is the peak RSS (high water mark) in bytes
VmHWM uint64
}
// PhysicalMemoryBytes returns the actual physical memory usage (RSS - shared)
func (p ProcessMemoryStats) PhysicalMemoryBytes() uint64 {
if p.VmRSS > p.RssShmem {
return p.VmRSS - p.RssShmem
}
return p.VmRSS
}
// PhysicalMemoryMB returns the actual physical memory usage in MB
func (p ProcessMemoryStats) PhysicalMemoryMB() uint64 {
return p.PhysicalMemoryBytes() / (1024 * 1024)
}
// DetectAvailableMemoryMB returns the available system memory in megabytes.
// On Linux, this returns the actual available memory (free + cached).
// On other systems, it returns total memory minus the Go runtime's current usage.
func DetectAvailableMemoryMB() uint64 {
// Use pbnjay/memory for cross-platform memory detection
available := memory.FreeMemory()
if available == 0 {
// Fallback: use total memory
available = memory.TotalMemory()
}
return available / (1024 * 1024)
}
// DetectTotalMemoryMB returns the total system memory in megabytes.
func DetectTotalMemoryMB() uint64 {
return memory.TotalMemory() / (1024 * 1024)
}
// CalculateTargetMemoryMB calculates the target memory limit based on configuration.
// If configuredMB is 0, it auto-detects based on available memory (66% of available, capped at 1.5GB).
// If configuredMB is non-zero, it validates that it's achievable.
// Returns an error if there isn't enough memory.
func CalculateTargetMemoryMB(configuredMB int) (int, error) {
availableMB := int(DetectAvailableMemoryMB())
// If configured to auto-detect (0), calculate target
if configuredMB == 0 {
// First check if we have minimum available memory
if availableMB < MinimumMemoryMB {
return 0, ErrInsufficientMemory
}
// Calculate 66% of available
targetMB := int(float64(availableMB) * AutoDetectMemoryFraction)
// If 66% is less than minimum, use minimum (we've already verified we have enough)
if targetMB < MinimumMemoryMB {
targetMB = MinimumMemoryMB
}
// Cap at default maximum for optimal performance
if targetMB > DefaultMaxMemoryMB {
targetMB = DefaultMaxMemoryMB
}
return targetMB, nil
}
// If explicitly configured, validate it's achievable
if configuredMB < MinimumMemoryMB {
return 0, ErrInsufficientMemory
}
// Warn but allow if configured target exceeds available
// (the PID controller will throttle as needed)
return configuredMB, nil
}
// GetMemoryStats returns current memory statistics for logging.
type MemoryStats struct {
TotalMB uint64
AvailableMB uint64
TargetMB int
GoAllocatedMB uint64
GoSysMB uint64
}
// GetMemoryStats returns current memory statistics.
func GetMemoryStats(targetMB int) MemoryStats {
var m runtime.MemStats
runtime.ReadMemStats(&m)
return MemoryStats{
TotalMB: DetectTotalMemoryMB(),
AvailableMB: DetectAvailableMemoryMB(),
TargetMB: targetMB,
GoAllocatedMB: m.Alloc / (1024 * 1024),
GoSysMB: m.Sys / (1024 * 1024),
}
}
// readProcessMemoryStatsFallback returns memory stats using Go runtime.
// This is used on non-Linux platforms or when /proc is unavailable.
// The values are approximations and may not accurately reflect OS-level metrics.
func readProcessMemoryStatsFallback() ProcessMemoryStats {
var m runtime.MemStats
runtime.ReadMemStats(&m)
// Use Sys as an approximation of RSS (includes all memory from OS)
// HeapAlloc approximates anonymous memory (live heap objects)
// We cannot determine shared memory from Go runtime, so leave it at 0
return ProcessMemoryStats{
VmRSS: m.Sys,
RssAnon: m.HeapAlloc,
RssShmem: 0, // Cannot determine shared memory from Go runtime
VmHWM: 0, // Not available from Go runtime
}
}

View File

@@ -0,0 +1,62 @@
//go:build linux && !(js && wasm)
package ratelimit
import (
"bufio"
"os"
"strconv"
"strings"
)
// ReadProcessMemoryStats reads memory statistics from /proc/self/status.
// This provides accurate RSS (Resident Set Size) information on Linux,
// including the breakdown between shared and anonymous memory.
func ReadProcessMemoryStats() ProcessMemoryStats {
stats := ProcessMemoryStats{}
file, err := os.Open("/proc/self/status")
if err != nil {
// Fallback to runtime stats if /proc is not available
return readProcessMemoryStatsFallback()
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
fields := strings.Fields(line)
if len(fields) < 2 {
continue
}
key := strings.TrimSuffix(fields[0], ":")
valueStr := fields[1]
value, err := strconv.ParseUint(valueStr, 10, 64)
if err != nil {
continue
}
// Values in /proc/self/status are in kB
valueBytes := value * 1024
switch key {
case "VmRSS":
stats.VmRSS = valueBytes
case "RssShmem":
stats.RssShmem = valueBytes
case "RssAnon":
stats.RssAnon = valueBytes
case "VmHWM":
stats.VmHWM = valueBytes
}
}
// If we didn't get VmRSS, fall back to runtime stats
if stats.VmRSS == 0 {
return readProcessMemoryStatsFallback()
}
return stats
}

View File

@@ -0,0 +1,15 @@
//go:build !linux && !(js && wasm)
package ratelimit
// ReadProcessMemoryStats returns memory statistics using Go runtime stats.
// On non-Linux platforms, we cannot read /proc/self/status, so we approximate
// using the Go runtime's memory statistics.
//
// Note: This is less accurate than the Linux implementation because:
// - runtime.MemStats.Sys includes memory reserved but not necessarily resident
// - We cannot distinguish shared vs anonymous memory
// - The values may not match what the OS reports for the process
func ReadProcessMemoryStats() ProcessMemoryStats {
return readProcessMemoryStatsFallback()
}

View File

@@ -2,20 +2,25 @@ package ratelimit
import (
"context"
"runtime"
"sync"
"sync/atomic"
"time"
"github.com/neo4j/neo4j-go-driver/v5/neo4j"
"lol.mleku.dev/log"
"next.orly.dev/pkg/interfaces/loadmonitor"
)
// Neo4jMonitor implements loadmonitor.Monitor for Neo4j database.
// Since Neo4j driver doesn't expose detailed metrics, we track:
// - Memory pressure via Go runtime
// - Memory pressure via actual RSS (not Go runtime)
// - Query concurrency via the semaphore
// - Latency via recording
//
// This monitor implements aggressive memory-based limiting:
// When memory exceeds the target, it applies 50% more aggressive throttling.
// It rechecks every 10 seconds and doubles the throttling multiplier until
// memory returns under target.
type Neo4jMonitor struct {
driver neo4j.DriverWithContext
querySem chan struct{} // Reference to the query semaphore
@@ -23,14 +28,24 @@ type Neo4jMonitor struct {
// Target memory for pressure calculation
targetMemoryBytes atomic.Uint64
// Emergency mode configuration
emergencyThreshold atomic.Uint64 // stored as threshold * 1000 (e.g., 1500 = 1.5)
emergencyModeUntil atomic.Int64 // Unix nano when forced emergency mode ends
inEmergencyMode atomic.Bool
// Aggressive throttling multiplier for Neo4j
// Starts at 1.5 (50% more aggressive), doubles every 10 seconds while over limit
throttleMultiplier atomic.Uint64 // stored as multiplier * 100 (e.g., 150 = 1.5x)
lastThrottleCheck atomic.Int64 // Unix nano timestamp
// Latency tracking with exponential moving average
queryLatencyNs atomic.Int64
writeLatencyNs atomic.Int64
latencyAlpha float64 // EMA coefficient (default 0.1)
// Concurrency tracking
activeReads atomic.Int32
activeWrites atomic.Int32
activeReads atomic.Int32
activeWrites atomic.Int32
maxConcurrency int
// Cached metrics (updated by background goroutine)
@@ -43,8 +58,12 @@ type Neo4jMonitor struct {
interval time.Duration
}
// Compile-time check that Neo4jMonitor implements loadmonitor.Monitor
// Compile-time checks for interface implementation
var _ loadmonitor.Monitor = (*Neo4jMonitor)(nil)
var _ loadmonitor.EmergencyModeMonitor = (*Neo4jMonitor)(nil)
// ThrottleCheckInterval is how often to recheck memory and adjust throttling
const ThrottleCheckInterval = 10 * time.Second
// NewNeo4jMonitor creates a new Neo4j load monitor.
// The querySem should be the same semaphore used for limiting concurrent queries.
@@ -75,9 +94,40 @@ func NewNeo4jMonitor(
// Set a default target (1.5GB)
m.targetMemoryBytes.Store(1500 * 1024 * 1024)
// Default emergency threshold: 100% of target (same as target for Neo4j)
m.emergencyThreshold.Store(1000)
// Start with 1.0x multiplier (no throttling)
m.throttleMultiplier.Store(100)
return m
}
// SetEmergencyThreshold sets the memory threshold above which emergency mode is triggered.
// threshold is a fraction, e.g., 1.0 = 100% of target memory.
func (m *Neo4jMonitor) SetEmergencyThreshold(threshold float64) {
m.emergencyThreshold.Store(uint64(threshold * 1000))
}
// GetEmergencyThreshold returns the current emergency threshold as a fraction.
func (m *Neo4jMonitor) GetEmergencyThreshold() float64 {
return float64(m.emergencyThreshold.Load()) / 1000.0
}
// ForceEmergencyMode manually triggers emergency mode for a duration.
func (m *Neo4jMonitor) ForceEmergencyMode(duration time.Duration) {
m.emergencyModeUntil.Store(time.Now().Add(duration).UnixNano())
m.inEmergencyMode.Store(true)
m.throttleMultiplier.Store(150) // Start at 1.5x
log.W.F("⚠️ Neo4j emergency mode forced for %v", duration)
}
// GetThrottleMultiplier returns the current throttle multiplier.
// Returns a value >= 1.0, where 1.0 = no extra throttling, 1.5 = 50% more aggressive, etc.
func (m *Neo4jMonitor) GetThrottleMultiplier() float64 {
return float64(m.throttleMultiplier.Load()) / 100.0
}
// GetMetrics returns the current load metrics.
func (m *Neo4jMonitor) GetMetrics() loadmonitor.Metrics {
m.metricsLock.RLock()
@@ -157,22 +207,27 @@ func (m *Neo4jMonitor) collectLoop() {
}
}
// updateMetrics collects current metrics.
// updateMetrics collects current metrics and manages aggressive throttling.
func (m *Neo4jMonitor) updateMetrics() {
metrics := loadmonitor.Metrics{
Timestamp: time.Now(),
}
// Calculate memory pressure from Go runtime
var memStats runtime.MemStats
runtime.ReadMemStats(&memStats)
// Use RSS-based memory pressure (actual physical memory, not Go runtime)
procMem := ReadProcessMemoryStats()
physicalMemBytes := procMem.PhysicalMemoryBytes()
metrics.PhysicalMemoryMB = physicalMemBytes / (1024 * 1024)
targetBytes := m.targetMemoryBytes.Load()
if targetBytes > 0 {
// Use HeapAlloc as primary memory metric
metrics.MemoryPressure = float64(memStats.HeapAlloc) / float64(targetBytes)
// Use actual physical memory (RSS - shared) for pressure calculation
metrics.MemoryPressure = float64(physicalMemBytes) / float64(targetBytes)
}
// Check and update emergency mode with aggressive throttling
m.updateEmergencyMode(metrics.MemoryPressure)
metrics.InEmergencyMode = m.inEmergencyMode.Load()
// Calculate load from semaphore usage
// querySem is a buffered channel - count how many slots are taken
if m.querySem != nil {
@@ -186,6 +241,20 @@ func (m *Neo4jMonitor) updateMetrics() {
metrics.ReadLoad = concurrencyLoad
}
// Apply throttle multiplier to loads when in emergency mode
// This makes the PID controller think load is higher, causing more throttling
if metrics.InEmergencyMode {
multiplier := m.GetThrottleMultiplier()
metrics.WriteLoad = metrics.WriteLoad * multiplier
if metrics.WriteLoad > 1.0 {
metrics.WriteLoad = 1.0
}
metrics.ReadLoad = metrics.ReadLoad * multiplier
if metrics.ReadLoad > 1.0 {
metrics.ReadLoad = 1.0
}
}
// Add latency-based load adjustment
// High latency indicates the database is struggling
queryLatencyNs := m.queryLatencyNs.Load()
@@ -221,6 +290,60 @@ func (m *Neo4jMonitor) updateMetrics() {
m.metricsLock.Unlock()
}
// updateEmergencyMode manages the emergency mode state and throttle multiplier.
// When memory exceeds the target:
// - Enters emergency mode with 1.5x throttle multiplier (50% more aggressive)
// - Every 10 seconds while still over limit, doubles the multiplier
// - When memory returns under target, resets to normal
func (m *Neo4jMonitor) updateEmergencyMode(memoryPressure float64) {
threshold := float64(m.emergencyThreshold.Load()) / 1000.0
forcedUntil := m.emergencyModeUntil.Load()
now := time.Now().UnixNano()
// Check if in forced emergency mode
if forcedUntil > now {
return // Stay in forced mode
}
// Check if memory exceeds threshold
if memoryPressure >= threshold {
if !m.inEmergencyMode.Load() {
// Entering emergency mode - start at 1.5x (50% more aggressive)
m.inEmergencyMode.Store(true)
m.throttleMultiplier.Store(150)
m.lastThrottleCheck.Store(now)
log.W.F("⚠️ Neo4j entering emergency mode: memory %.1f%% >= threshold %.1f%%, throttle 1.5x",
memoryPressure*100, threshold*100)
return
}
// Already in emergency mode - check if it's time to double throttling
lastCheck := m.lastThrottleCheck.Load()
elapsed := time.Duration(now - lastCheck)
if elapsed >= ThrottleCheckInterval {
// Double the throttle multiplier
currentMult := m.throttleMultiplier.Load()
newMult := currentMult * 2
if newMult > 1600 { // Cap at 16x to prevent overflow
newMult = 1600
}
m.throttleMultiplier.Store(newMult)
m.lastThrottleCheck.Store(now)
log.W.F("⚠️ Neo4j still over memory limit: %.1f%%, doubling throttle to %.1fx",
memoryPressure*100, float64(newMult)/100.0)
}
} else {
// Memory is under threshold
if m.inEmergencyMode.Load() {
m.inEmergencyMode.Store(false)
m.throttleMultiplier.Store(100) // Reset to 1.0x
log.I.F("✅ Neo4j exiting emergency mode: memory %.1f%% < threshold %.1f%%",
memoryPressure*100, threshold*100)
}
}
}
// IncrementActiveReads tracks an active read operation.
// Call this when starting a read, and call the returned function when done.
func (m *Neo4jMonitor) IncrementActiveReads() func() {