add first draft graph query implementation

2025-12-04 09:28:13 +00:00
parent 8dbc19ee9e
commit 6b98c23606
40 changed files with 9078 additions and 46 deletions
--- a/pkg/protocol/graph/ratelimit.go
+++ b/pkg/protocol/graph/ratelimit.go
@@ -0,0 +1,282 @@
+package graph
+
+import (
+	"context"
+	"sync"
+	"time"
+)
+
+// RateLimiter implements a token bucket rate limiter with adaptive throttling
+// based on graph query complexity. It allows cooperative scheduling by inserting
+// pauses between operations to allow other work to proceed.
+type RateLimiter struct {
+	mu sync.Mutex
+
+	// Token bucket parameters
+	tokens        float64   // Current available tokens
+	maxTokens     float64   // Maximum token capacity
+	refillRate    float64   // Tokens per second to add
+	lastRefill    time.Time // Last time tokens were refilled
+
+	// Throttling parameters
+	baseDelay     time.Duration // Minimum delay between operations
+	maxDelay      time.Duration // Maximum delay for complex queries
+	depthFactor   float64       // Multiplier per depth level
+	limitFactor   float64       // Multiplier based on result limit
+}
+
+// RateLimiterConfig configures the rate limiter behavior.
+type RateLimiterConfig struct {
+	// MaxTokens is the maximum number of tokens in the bucket (default: 100)
+	MaxTokens float64
+
+	// RefillRate is tokens added per second (default: 10)
+	RefillRate float64
+
+	// BaseDelay is the minimum delay between operations (default: 1ms)
+	BaseDelay time.Duration
+
+	// MaxDelay is the maximum delay for complex queries (default: 100ms)
+	MaxDelay time.Duration
+
+	// DepthFactor is the cost multiplier per depth level (default: 2.0)
+	// A depth-3 query costs 2^3 = 8x more tokens than depth-1
+	DepthFactor float64
+
+	// LimitFactor is additional cost per 100 results requested (default: 0.1)
+	LimitFactor float64
+}
+
+// DefaultRateLimiterConfig returns sensible defaults for the rate limiter.
+func DefaultRateLimiterConfig() RateLimiterConfig {
+	return RateLimiterConfig{
+		MaxTokens:   100.0,
+		RefillRate:  10.0, // Refills fully in 10 seconds
+		BaseDelay:   1 * time.Millisecond,
+		MaxDelay:    100 * time.Millisecond,
+		DepthFactor: 2.0,
+		LimitFactor: 0.1,
+	}
+}
+
+// NewRateLimiter creates a new rate limiter with the given configuration.
+func NewRateLimiter(cfg RateLimiterConfig) *RateLimiter {
+	if cfg.MaxTokens <= 0 {
+		cfg.MaxTokens = DefaultRateLimiterConfig().MaxTokens
+	}
+	if cfg.RefillRate <= 0 {
+		cfg.RefillRate = DefaultRateLimiterConfig().RefillRate
+	}
+	if cfg.BaseDelay <= 0 {
+		cfg.BaseDelay = DefaultRateLimiterConfig().BaseDelay
+	}
+	if cfg.MaxDelay <= 0 {
+		cfg.MaxDelay = DefaultRateLimiterConfig().MaxDelay
+	}
+	if cfg.DepthFactor <= 0 {
+		cfg.DepthFactor = DefaultRateLimiterConfig().DepthFactor
+	}
+	if cfg.LimitFactor <= 0 {
+		cfg.LimitFactor = DefaultRateLimiterConfig().LimitFactor
+	}
+
+	return &RateLimiter{
+		tokens:      cfg.MaxTokens,
+		maxTokens:   cfg.MaxTokens,
+		refillRate:  cfg.RefillRate,
+		lastRefill:  time.Now(),
+		baseDelay:   cfg.BaseDelay,
+		maxDelay:    cfg.MaxDelay,
+		depthFactor: cfg.DepthFactor,
+		limitFactor: cfg.LimitFactor,
+	}
+}
+
+// QueryCost calculates the token cost for a graph query based on its complexity.
+// Higher depths and larger limits cost exponentially more tokens.
+func (rl *RateLimiter) QueryCost(q *Query) float64 {
+	if q == nil {
+		return 1.0
+	}
+
+	// Base cost is exponential in depth: depthFactor^depth
+	// This models the exponential growth of traversal work
+	cost := 1.0
+	for i := 0; i < q.Depth; i++ {
+		cost *= rl.depthFactor
+	}
+
+	// Add cost for reference collection (adds ~50% per ref spec)
+	refCost := float64(len(q.InboundRefs)+len(q.OutboundRefs)) * 0.5
+	cost += refCost
+
+	return cost
+}
+
+// OperationCost calculates the token cost for a single traversal operation.
+// This is used during query execution for per-operation throttling.
+func (rl *RateLimiter) OperationCost(depth int, nodesAtDepth int) float64 {
+	// Cost increases with depth and number of nodes to process
+	depthMultiplier := 1.0
+	for i := 0; i < depth; i++ {
+		depthMultiplier *= rl.depthFactor
+	}
+
+	// More nodes at this depth = more work
+	nodeFactor := 1.0 + float64(nodesAtDepth)*0.01
+
+	return depthMultiplier * nodeFactor
+}
+
+// refillTokens adds tokens based on elapsed time since last refill.
+func (rl *RateLimiter) refillTokens() {
+	now := time.Now()
+	elapsed := now.Sub(rl.lastRefill).Seconds()
+	rl.lastRefill = now
+
+	rl.tokens += elapsed * rl.refillRate
+	if rl.tokens > rl.maxTokens {
+		rl.tokens = rl.maxTokens
+	}
+}
+
+// Acquire tries to acquire tokens for a query. If not enough tokens are available,
+// it waits until they become available or the context is cancelled.
+// Returns the delay that was applied, or an error if context was cancelled.
+func (rl *RateLimiter) Acquire(ctx context.Context, cost float64) (time.Duration, error) {
+	rl.mu.Lock()
+	defer rl.mu.Unlock()
+
+	rl.refillTokens()
+
+	var totalDelay time.Duration
+
+	// Wait until we have enough tokens
+	for rl.tokens < cost {
+		// Calculate how long we need to wait for tokens to refill
+		tokensNeeded := cost - rl.tokens
+		waitTime := time.Duration(tokensNeeded/rl.refillRate*1000) * time.Millisecond
+
+		// Clamp to max delay
+		if waitTime > rl.maxDelay {
+			waitTime = rl.maxDelay
+		}
+		if waitTime < rl.baseDelay {
+			waitTime = rl.baseDelay
+		}
+
+		// Release lock while waiting
+		rl.mu.Unlock()
+
+		select {
+		case <-ctx.Done():
+			rl.mu.Lock()
+			return totalDelay, ctx.Err()
+		case <-time.After(waitTime):
+		}
+
+		totalDelay += waitTime
+		rl.mu.Lock()
+		rl.refillTokens()
+	}
+
+	// Consume tokens
+	rl.tokens -= cost
+	return totalDelay, nil
+}
+
+// TryAcquire attempts to acquire tokens without waiting.
+// Returns true if successful, false if insufficient tokens.
+func (rl *RateLimiter) TryAcquire(cost float64) bool {
+	rl.mu.Lock()
+	defer rl.mu.Unlock()
+
+	rl.refillTokens()
+
+	if rl.tokens >= cost {
+		rl.tokens -= cost
+		return true
+	}
+	return false
+}
+
+// Pause inserts a cooperative delay to allow other work to proceed.
+// The delay is proportional to the current depth and load.
+// This should be called periodically during long-running traversals.
+func (rl *RateLimiter) Pause(ctx context.Context, depth int, itemsProcessed int) error {
+	// Calculate adaptive delay based on depth and progress
+	// Deeper traversals and more processed items = longer pauses
+	delay := rl.baseDelay
+
+	// Increase delay with depth
+	for i := 0; i < depth; i++ {
+		delay += rl.baseDelay
+	}
+
+	// Add extra delay every N items to allow other work
+	if itemsProcessed > 0 && itemsProcessed%100 == 0 {
+		delay += rl.baseDelay * 5
+	}
+
+	// Cap at max delay
+	if delay > rl.maxDelay {
+		delay = rl.maxDelay
+	}
+
+	select {
+	case <-ctx.Done():
+		return ctx.Err()
+	case <-time.After(delay):
+		return nil
+	}
+}
+
+// AvailableTokens returns the current number of available tokens.
+func (rl *RateLimiter) AvailableTokens() float64 {
+	rl.mu.Lock()
+	defer rl.mu.Unlock()
+	rl.refillTokens()
+	return rl.tokens
+}
+
+// Throttler provides a simple interface for cooperative scheduling during traversal.
+// It wraps the rate limiter and provides depth-aware throttling.
+type Throttler struct {
+	rl            *RateLimiter
+	depth         int
+	itemsProcessed int
+}
+
+// NewThrottler creates a throttler for a specific traversal operation.
+func NewThrottler(rl *RateLimiter, depth int) *Throttler {
+	return &Throttler{
+		rl:    rl,
+		depth: depth,
+	}
+}
+
+// Tick should be called after processing each item.
+// It tracks progress and inserts pauses as needed.
+func (t *Throttler) Tick(ctx context.Context) error {
+	t.itemsProcessed++
+
+	// Insert cooperative pause periodically
+	// More frequent pauses at higher depths
+	interval := 50
+	if t.depth >= 2 {
+		interval = 25
+	}
+	if t.depth >= 4 {
+		interval = 10
+	}
+
+	if t.itemsProcessed%interval == 0 {
+		return t.rl.Pause(ctx, t.depth, t.itemsProcessed)
+	}
+	return nil
+}
+
+// Complete marks the throttler as complete and returns stats.
+func (t *Throttler) Complete() (itemsProcessed int) {
+	return t.itemsProcessed
+}