Merge remote-tracking branch 'origin/main'

2025-08-15 18:56:12 +01:00
parent ffcd0bdcc0 3525dd2b6c
commit 6fc3e9a049
16 changed files with 3549 additions and 720 deletions
--- a/cmd/benchmark/BENCHMARK_RESULTS.md
+++ b/cmd/benchmark/BENCHMARK_RESULTS.md
@@ -1,173 +0,0 @@
-# Orly Relay Benchmark Results
-
-## Test Environment
-
- **Date**: August 5, 2025
- **Relay**: Orly v0.4.14
- **Port**: 3334 (WebSocket)
- **System**: Linux 5.15.0-151-generic
- **Storage**: BadgerDB v4
-
-## Benchmark Test Results
-
-### Test 1: Basic Performance (1,000 events, 1KB each)
-
-**Parameters:**
- Events: 1,000
- Event size: 1,024 bytes
- Concurrent publishers: 5
- Queries: 50
-
-**Results:**
-```
-Publish Performance:
-  Events Published: 1,000
-  Total Data: 4.01 MB
-  Duration: 1.769s
-  Rate: 565.42 events/second
-  Bandwidth: 2.26 MB/second
-
-Query Performance:
-  Queries Executed: 50
-  Events Returned: 2,000
-  Duration: 3.058s
-  Rate: 16.35 queries/second
-  Avg Events/Query: 40.00
-```
-
-### Test 2: Medium Load (10,000 events, 2KB each)
-
-**Parameters:**
- Events: 10,000
- Event size: 2,048 bytes
- Concurrent publishers: 10
- Queries: 100
-
-**Results:**
-```
-Publish Performance:
-  Events Published: 10,000
-  Total Data: 76.81 MB
-  Duration: 598.301ms
-  Rate: 16,714.00 events/second
-  Bandwidth: 128.38 MB/second
-
-Query Performance:
-  Queries Executed: 100
-  Events Returned: 4,000
-  Duration: 8.923s
-  Rate: 11.21 queries/second
-  Avg Events/Query: 40.00
-```
-
-### Test 3: High Concurrency (50,000 events, 512 bytes each)
-
-**Parameters:**
- Events: 50,000
- Event size: 512 bytes
- Concurrent publishers: 50
- Queries: 200
-
-**Results:**
-```
-Publish Performance:
-  Events Published: 50,000
-  Total Data: 108.63 MB
-  Duration: 2.368s
-  Rate: 21,118.66 events/second
-  Bandwidth: 45.88 MB/second
-
-Query Performance:
-  Queries Executed: 200
-  Events Returned: 8,000
-  Duration: 36.146s
-  Rate: 5.53 queries/second
-  Avg Events/Query: 40.00
-```
-
-### Test 4: Large Events (5,000 events, 10KB each)
-
-**Parameters:**
- Events: 5,000
- Event size: 10,240 bytes
- Concurrent publishers: 10
- Queries: 50
-
-**Results:**
-```
-Publish Performance:
-  Events Published: 5,000
-  Total Data: 185.26 MB
-  Duration: 934.328ms
-  Rate: 5,351.44 events/second
-  Bandwidth: 198.28 MB/second
-
-Query Performance:
-  Queries Executed: 50
-  Events Returned: 2,000
-  Duration: 9.982s
-  Rate: 5.01 queries/second
-  Avg Events/Query: 40.00
-```
-
-### Test 5: Query-Only Performance (500 queries)
-
-**Parameters:**
- Skip publishing phase
- Queries: 500
- Query limit: 100
-
-**Results:**
-```
-Query Performance:
-  Queries Executed: 500
-  Events Returned: 20,000
-  Duration: 1m14.384s
-  Rate: 6.72 queries/second
-  Avg Events/Query: 40.00
-```
-
-## Performance Summary
-
-### Publishing Performance
-
-| Metric | Best Result | Test Configuration |
-|--------|-------------|-------------------|
-| **Peak Event Rate** | 21,118.66 events/sec | 50 concurrent publishers, 512-byte events |
-| **Peak Bandwidth** | 198.28 MB/sec | 10 concurrent publishers, 10KB events |
-| **Optimal Balance** | 16,714.00 events/sec @ 128.38 MB/sec | 10 concurrent publishers, 2KB events |
-
-### Query Performance
-
-| Query Type | Avg Rate | Notes |
-|------------|----------|--------|
-| **Light Load** | 16.35 queries/sec | 50 queries after 1K events |
-| **Medium Load** | 11.21 queries/sec | 100 queries after 10K events |
-| **Heavy Load** | 5.53 queries/sec | 200 queries after 50K events |
-| **Sustained** | 6.72 queries/sec | 500 continuous queries |
-
-## Key Findings
-
-1. **Optimal Concurrency**: The relay performs best with 10-50 concurrent publishers, achieving rates of 16,000-21,000 events/second.
-
-2. **Event Size Impact**: 
-   - Smaller events (512B-2KB) achieve higher event rates
-   - Larger events (10KB) achieve higher bandwidth utilization but lower event rates
-
-3. **Query Performance**: Query performance varies with database size:
-   - Fresh database: ~16 queries/second
-   - After 50K events: ~6 queries/second
-
-4. **Scalability**: The relay maintains consistent performance up to 50 concurrent connections and can sustain 21,000+ events/second under optimal conditions.
-
-## Query Filter Distribution
-
-The benchmark tested 5 different query patterns in rotation:
-1. Query by kind (20%)
-2. Query by time range (20%)
-3. Query by tag (20%)
-4. Query by author (20%)
-5. Complex queries with multiple conditions (20%)
-
-All query types showed similar performance characteristics, indicating well-balanced indexing.
-
--- a/cmd/benchmark/README.md
+++ b/cmd/benchmark/README.md
@@ -1,61 +1,130 @@
-# Orly Relay Benchmark Tool
+# Nostr Relay Benchmark Suite

-A performance benchmarking tool for Nostr relays that tests both event ingestion speed and query performance.
-
-## Quick Start (Simple Version)
-
-The repository includes a simple standalone benchmark tool that doesn't require the full Orly dependencies:
-
-```bash
-# Build the simple benchmark
-go build -o benchmark-simple ./benchmark_simple.go
-
-# Run with default settings
-./benchmark-simple
-
-# Or use the convenience script
-chmod +x run_benchmark.sh
-./run_benchmark.sh --relay ws://localhost:7447 --events 10000
-```
+A comprehensive performance benchmarking suite for Nostr relay implementations, featuring event publishing tests, query profiling, load simulation, and timing instrumentation.

 ## Features

- **Event Publishing Benchmark**: Tests how fast a relay can accept and store events
- **Query Performance Benchmark**: Tests various filter types and query speeds
- **Concurrent Publishing**: Supports multiple concurrent publishers to stress test the relay
- **Detailed Metrics**: Reports events/second, bandwidth usage, and query performance
+- **Multi-relay comparison benchmarks** - Compare Khatru, Strfry, Relayer, and Orly  
+- **Publishing performance testing** - Measure event ingestion rates and bandwidth  
+- **Query profiling** - Test various filter patterns and query speeds  
+- **Load pattern simulation** - Constant, spike, burst, sine, and ramp patterns  
+- **Timing instrumentation** - Track full event lifecycle and identify bottlenecks  
+- **Concurrent stress testing** - Multiple publishers with connection pooling  
+- **Production-grade event generation** - Proper secp256k1 signatures and UTF-8 content  
+- **Comparative reporting** - Markdown, JSON, and CSV format reports

-## Usage
+## Quick Start

 ```bash
-# Build the tool
-go build -o benchmark ./cmd/benchmark
+# Build the benchmark tool
+cd cmd/benchmark
+CGO_LDFLAGS="-L/usr/local/lib" PKG_CONFIG_PATH="/usr/local/lib/pkgconfig" go build -o benchmark .

-# Run a full benchmark (publish and query)
-./benchmark -relay ws://localhost:7447 -events 10000 -queries 100
+# Run simple benchmark
+./benchmark --relay ws://localhost:7447 --events 1000 --queries 50

-# Benchmark only publishing
-./benchmark -relay ws://localhost:7447 -events 50000 -concurrency 20 -skip-query
-
-# Benchmark only querying
-./benchmark -relay ws://localhost:7447 -queries 500 -skip-publish
-
-# Use custom event sizes
-./benchmark -relay ws://localhost:7447 -events 10000 -size 2048
+# Run full comparison benchmark  
+./setup_relays.sh  # Setup all relay implementations
+./run_all_benchmarks.sh  # Run benchmarks on all relays
 ```

-## Options
+## Latest Benchmark Results

- `-relay`: Relay URL to benchmark (default: ws://localhost:7447)
- `-events`: Number of events to publish (default: 10000)
- `-size`: Average size of event content in bytes (default: 1024)
- `-concurrency`: Number of concurrent publishers (default: 10)
- `-queries`: Number of queries to execute (default: 100)
- `-query-limit`: Limit for each query (default: 100)
- `-skip-publish`: Skip the publishing phase
- `-skip-query`: Skip the query phase
+| Relay | Publishing (events/sec) | Querying (queries/sec) | Backend |
+|-------|------------------------|------------------------|---------|
+| **Khatru** | 9,570 | 4.77 | SQLite |
+| **Strfry** | 1,338 | 266.16 | LMDB |
+| **Relayer** | 1,122 | 623.36 | PostgreSQL |
+| **Orly** | 668 | 4.92 | Badger |
+
+See [RELAY_COMPARISON_RESULTS.md](RELAY_COMPARISON_RESULTS.md) for detailed analysis.
+
+## Core Benchmarking
+
+### Basic Usage
+
+```bash
+# Run a full benchmark (publish and query)
+./benchmark --relay ws://localhost:7447 --events 10000 --queries 100
+
+# Benchmark only publishing
+./benchmark --relay ws://localhost:7447 --events 50000 --concurrency 20 --skip-query
+
+# Benchmark only querying
+./benchmark --relay ws://localhost:7447 --queries 500 --skip-publish
+
+# Use custom event sizes
+./benchmark --relay ws://localhost:7447 --events 10000 --size 2048
+```
+
+### Advanced Features
+
+```bash
+# Query profiling with subscription testing
+./benchmark --profile --profile-subs --sub-count 100 --sub-duration 30s
+
+# Load pattern simulation
+./benchmark --load --load-pattern spike --load-duration 60s --load-base 50 --load-peak 200
+
+# Full load test suite
+./benchmark --load-suite --load-constraints
+
+# Timing instrumentation
+./benchmark --timing --timing-events 100 --timing-subs --timing-duration 10s
+
+# Generate comparative reports
+./benchmark --report --report-format markdown --report-title "Production Benchmark"
+```
+
+## Command Line Options
+
+### Basic Options
+- `--relay`: Relay URL to benchmark (default: ws://localhost:7447)
+- `--events`: Number of events to publish (default: 10000)
+- `--size`: Average size of event content in bytes (default: 1024)
+- `--concurrency`: Number of concurrent publishers (default: 10)
+- `--queries`: Number of queries to execute (default: 100)
+- `--query-limit`: Limit for each query (default: 100)
+- `--skip-publish`: Skip the publishing phase
+- `--skip-query`: Skip the query phase
 - `-v`: Enable verbose output

+### Multi-Relay Options
+- `--multi-relay`: Use multi-relay harness
+- `--relay-bin`: Path to relay binary
+- `--install`: Install relay dependencies and binaries
+- `--install-secp`: Install only secp256k1 library
+- `--work-dir`: Working directory for builds (default: /tmp/relay-build)
+- `--install-dir`: Installation directory for binaries (default: /usr/local/bin)
+
+### Profiling Options
+- `--profile`: Run query performance profiling
+- `--profile-subs`: Profile subscription performance
+- `--sub-count`: Number of concurrent subscriptions (default: 100)
+- `--sub-duration`: Duration for subscription profiling (default: 30s)
+
+### Load Testing Options
+- `--load`: Run load pattern simulation
+- `--load-pattern`: Pattern type: constant, spike, burst, sine, ramp (default: constant)
+- `--load-duration`: Duration for load test (default: 60s)
+- `--load-base`: Base load in events/sec (default: 50)
+- `--load-peak`: Peak load in events/sec (default: 200)
+- `--load-pool`: Connection pool size (default: 10)
+- `--load-suite`: Run comprehensive load test suite
+- `--load-constraints`: Test under resource constraints
+
+### Timing Options
+- `--timing`: Run end-to-end timing instrumentation
+- `--timing-events`: Number of events for timing (default: 100)
+- `--timing-subs`: Test subscription timing
+- `--timing-duration`: Duration for subscription timing (default: 10s)
+
+### Report Options
+- `--report`: Generate comparative report
+- `--report-format`: Output format: markdown, json, csv (default: markdown)
+- `--report-file`: Output filename without extension (default: benchmark_report)
+- `--report-title`: Report title (default: "Relay Benchmark Comparison")
+
 ## Query Types Tested

 The benchmark tests various query patterns:
@@ -84,29 +153,80 @@ The tool provides detailed metrics including:
 ## Example Output

 ```
-Publishing 10000 events to ws://localhost:7447...
+Publishing 1000 events to ws://localhost:7447...
  Published 1000 events...
-  Published 2000 events...
-  ...

 Querying events from ws://localhost:7447...
  Executed 20 queries...
  Executed 40 queries...
-  ...

 === Benchmark Results ===

 Publish Performance:
-  Events Published: 10000
-  Total Data: 12.34 MB
-  Duration: 5.2s
-  Rate: 1923.08 events/second
-  Bandwidth: 2.37 MB/second
+  Events Published: 1000
+  Total Data: 0.81 MB
+  Duration: 890.91ms
+  Rate: 1122.45 events/second
+  Bandwidth: 0.91 MB/second

 Query Performance:
-  Queries Executed: 100
-  Events Returned: 4523
-  Duration: 2.1s
-  Rate: 47.62 queries/second
-  Avg Events/Query: 45.23
-```
+  Queries Executed: 50
+  Events Returned: 800
+  Duration: 80.21ms
+  Rate: 623.36 queries/second
+  Avg Events/Query: 16.00
+```
+
+## Relay Setup
+
+First run `./setup_relays.sh` to build all relay binaries, then start individual relays:
+
+### Khatru (SQLite)
+```bash
+cd /tmp/relay-benchmark/khatru/examples/basic-sqlite3
+./khatru-relay
+```
+
+### Strfry (LMDB)
+```bash
+cd /tmp/relay-benchmark/strfry
+./strfry --config strfry.conf relay
+```
+
+### Relayer (PostgreSQL)
+```bash
+# Start PostgreSQL
+docker run -d --name relay-postgres -e POSTGRES_PASSWORD=postgres \
+  -e POSTGRES_DB=nostr -p 5433:5432 postgres:15-alpine
+
+# Run relayer
+cd /tmp/relay-benchmark/relayer/examples/basic
+POSTGRESQL_DATABASE="postgres://postgres:postgres@localhost:5433/nostr?sslmode=disable" \
+  ./relayer-bin
+```
+
+### Orly (Badger)
+```bash
+cd /tmp/relay-benchmark
+ORLY_PORT=7448 ORLY_DATA_DIR=/tmp/orly-benchmark ORLY_SPIDER_TYPE=none ./orly-relay
+```
+
+## Development
+
+The benchmark suite consists of several components:
+
+- `main.go` - Core benchmark orchestration
+- `test_signer.go` - secp256k1 event signing
+- `simple_event.go` - UTF-8 safe event generation
+- `query_profiler.go` - Query performance analysis
+- `load_simulator.go` - Load pattern generation
+- `timing_instrumentation.go` - Event lifecycle tracking
+- `report_generator.go` - Comparative report generation
+- `relay_harness.go` - Multi-relay management
+
+## Notes
+
+- All benchmarks use event generation with proper secp256k1 signatures
+- Events are generated with valid UTF-8 content to ensure compatibility
+- Connection pooling is used for realistic concurrent load testing
+- Query patterns test real-world filter combinations
--- a/cmd/benchmark/RELAY_COMPARISON_RESULTS.md
+++ b/cmd/benchmark/RELAY_COMPARISON_RESULTS.md
@@ -0,0 +1,73 @@
+# Nostr Relay Performance Comparison
+
+Benchmark results for Khatru, Strfry, Relayer, and Orly relay implementations.
+
+## Test Configuration
+
+- **Events Published**: 1000 per relay
+- **Event Size**: 512 bytes content
+- **Queries Executed**: 50 per relay  
+- **Concurrency**: 5 simultaneous publishers
+- **Platform**: Linux 5.15.0-151-generic
+- **Date**: 2025-08-08
+
+## Performance Results
+
+### Publishing Performance
+
+| Relay | Events Published | Data Size | Duration | Events/sec | Bandwidth |
+|-------|-----------------|-----------|----------|------------|-----------|
+| **Khatru** | 1,000 | 0.81 MB | 104.49ms | **9,569.94** | **7.79 MB/s** |
+| **Strfry** | 1,000 | 0.81 MB | 747.41ms | 1,337.95 | 1.09 MB/s |
+| **Relayer** | 1,000 | 0.81 MB | 890.91ms | 1,122.45 | 0.91 MB/s |
+| **Orly** | 1,000 | 0.81 MB | 1.497s | 667.91 | 0.54 MB/s |
+
+
+### Query Performance
+
+| Relay | Queries | Events Retrieved | Duration | Queries/sec | Avg Events/Query |
+|-------|---------|-----------------|----------|-------------|------------------|
+| **Relayer** | 50 | 800 | 80.21ms | **623.36** | 16.00 |
+| **Strfry** | 50 | 2,000 | 187.86ms | 266.16 | 40.00 |
+| **Orly** | 50 | 800 | 10.164s | 4.92 | 16.00 |
+| **Khatru** | 50 | 2,000 | 10.487s | 4.77 | 40.00 |
+
+
+## Implementation Details
+
+### Khatru
+- Language: Go
+- Backend: SQLite (embedded)
+- Dependencies: Go 1.20+, SQLite3
+- Publishing: 9,570 events/sec, 104ms duration
+- Querying: 4.77 queries/sec, 10.5s duration
+
+### Strfry
+- Language: C++
+- Backend: LMDB (embedded)
+- Dependencies: flatbuffers, lmdb, zstd, secp256k1, cmake, g++
+- Publishing: 1,338 events/sec, 747ms duration
+- Querying: 266 queries/sec, 188ms duration
+
+### Relayer
+- Language: Go
+- Backend: PostgreSQL (external)
+- Dependencies: Go 1.20+, PostgreSQL 12+
+- Publishing: 1,122 events/sec, 891ms duration
+- Querying: 623 queries/sec, 80ms duration
+
+### Orly
+- Language: Go
+- Backend: Badger (embedded)
+- Dependencies: Go 1.20+, libsecp256k1
+- Publishing: 668 events/sec, 1.5s duration
+- Querying: 4.92 queries/sec, 10.2s duration
+
+## Test Environment
+
+- Platform: Linux 5.15.0-151-generic
+- Concurrency: 5 publishers
+- Event size: 512 bytes
+- Signature verification: secp256k1
+- Content validation: UTF-8
+
--- a/cmd/benchmark/benchmark_simple.go
+++ b/cmd/benchmark/benchmark_simple.go
@@ -1,304 +0,0 @@
-// +build ignore
-
-package main
-
-import (
-	"context"
-	"crypto/sha256"
-	"encoding/hex"
-	"encoding/json"
-	"flag"
-	"fmt"
-	"log"
-	"math/rand"
-	"net/url"
-	"sync"
-	"sync/atomic"
-	"time"
-
-	"github.com/gobwas/ws"
-	"github.com/gobwas/ws/wsutil"
-)
-
-// Simple event structure for benchmarking
-type Event struct {
-	ID        string     `json:"id"`
-	Pubkey    string     `json:"pubkey"`
-	CreatedAt int64      `json:"created_at"`
-	Kind      int        `json:"kind"`
-	Tags      [][]string `json:"tags"`
-	Content   string     `json:"content"`
-	Sig       string     `json:"sig"`
-}
-
-// Generate a test event
-func generateTestEvent(size int) *Event {
-	content := make([]byte, size)
-	rand.Read(content)
-	
-	// Generate random pubkey and sig
-	pubkey := make([]byte, 32)
-	sig := make([]byte, 64)
-	rand.Read(pubkey)
-	rand.Read(sig)
-	
-	ev := &Event{
-		Pubkey:    hex.EncodeToString(pubkey),
-		CreatedAt: time.Now().Unix(),
-		Kind:      1,
-		Tags:      [][]string{},
-		Content:   string(content),
-		Sig:       hex.EncodeToString(sig),
-	}
-	
-	// Generate ID (simplified)
-	serialized, _ := json.Marshal([]interface{}{
-		0,
-		ev.Pubkey,
-		ev.CreatedAt,
-		ev.Kind,
-		ev.Tags,
-		ev.Content,
-	})
-	hash := sha256.Sum256(serialized)
-	ev.ID = hex.EncodeToString(hash[:])
-	
-	return ev
-}
-
-func publishEvents(relayURL string, count int, size int, concurrency int) (int64, int64, time.Duration, error) {
-	u, err := url.Parse(relayURL)
-	if err != nil {
-		return 0, 0, 0, err
-	}
-
-	var publishedEvents atomic.Int64
-	var publishedBytes atomic.Int64
-	var wg sync.WaitGroup
-	
-	eventsPerWorker := count / concurrency
-	extraEvents := count % concurrency
-	
-	start := time.Now()
-	
-	for i := 0; i < concurrency; i++ {
-		wg.Add(1)
-		eventsToPublish := eventsPerWorker
-		if i < extraEvents {
-			eventsToPublish++
-		}
-		
-		go func(workerID int, eventCount int) {
-			defer wg.Done()
-			
-			// Connect to relay
-			ctx := context.Background()
-			conn, _, _, err := ws.Dial(ctx, u.String())
-			if err != nil {
-				log.Printf("Worker %d: connection error: %v", workerID, err)
-				return
-			}
-			defer conn.Close()
-			
-			// Publish events
-			for j := 0; j < eventCount; j++ {
-				ev := generateTestEvent(size)
-				
-				// Create EVENT message
-				msg, _ := json.Marshal([]interface{}{"EVENT", ev})
-				
-				err := wsutil.WriteClientMessage(conn, ws.OpText, msg)
-				if err != nil {
-					log.Printf("Worker %d: write error: %v", workerID, err)
-					continue
-				}
-				
-				publishedEvents.Add(1)
-				publishedBytes.Add(int64(len(msg)))
-				
-				// Read response (OK or error)
-				_, _, err = wsutil.ReadServerData(conn)
-				if err != nil {
-					log.Printf("Worker %d: read error: %v", workerID, err)
-				}
-			}
-		}(i, eventsToPublish)
-	}
-	
-	wg.Wait()
-	duration := time.Since(start)
-	
-	return publishedEvents.Load(), publishedBytes.Load(), duration, nil
-}
-
-func queryEvents(relayURL string, queries int, limit int) (int64, int64, time.Duration, error) {
-	u, err := url.Parse(relayURL)
-	if err != nil {
-		return 0, 0, 0, err
-	}
-
-	ctx := context.Background()
-	conn, _, _, err := ws.Dial(ctx, u.String())
-	if err != nil {
-		return 0, 0, 0, err
-	}
-	defer conn.Close()
-	
-	var totalQueries int64
-	var totalEvents int64
-	
-	start := time.Now()
-	
-	for i := 0; i < queries; i++ {
-		// Generate various filter types
-		var filter map[string]interface{}
-		
-		switch i % 5 {
-		case 0:
-			// Query by kind
-			filter = map[string]interface{}{
-				"kinds": []int{1},
-				"limit": limit,
-			}
-		case 1:
-			// Query by time range
-			now := time.Now().Unix()
-			filter = map[string]interface{}{
-				"since": now - 3600,
-				"until": now,
-				"limit": limit,
-			}
-		case 2:
-			// Query by tag
-			filter = map[string]interface{}{
-				"#p": []string{hex.EncodeToString(randBytes(32))},
-				"limit": limit,
-			}
-		case 3:
-			// Query by author
-			filter = map[string]interface{}{
-				"authors": []string{hex.EncodeToString(randBytes(32))},
-				"limit": limit,
-			}
-		case 4:
-			// Complex query
-			now := time.Now().Unix()
-			filter = map[string]interface{}{
-				"kinds":   []int{1, 6},
-				"authors": []string{hex.EncodeToString(randBytes(32))},
-				"since":   now - 7200,
-				"limit":   limit,
-			}
-		}
-		
-		// Send REQ
-		subID := fmt.Sprintf("bench-%d", i)
-		msg, _ := json.Marshal([]interface{}{"REQ", subID, filter})
-		
-		err := wsutil.WriteClientMessage(conn, ws.OpText, msg)
-		if err != nil {
-			log.Printf("Query %d: write error: %v", i, err)
-			continue
-		}
-		
-		// Read events until EOSE
-		eventCount := 0
-		for {
-			data, err := wsutil.ReadServerText(conn)
-			if err != nil {
-				log.Printf("Query %d: read error: %v", i, err)
-				break
-			}
-			
-			var msg []interface{}
-			if err := json.Unmarshal(data, &msg); err != nil {
-				continue
-			}
-			
-			if len(msg) < 2 {
-				continue
-			}
-			
-			msgType, ok := msg[0].(string)
-			if !ok {
-				continue
-			}
-			
-			switch msgType {
-			case "EVENT":
-				eventCount++
-			case "EOSE":
-				goto done
-			}
-		}
-		done:
-		
-		// Send CLOSE
-		closeMsg, _ := json.Marshal([]interface{}{"CLOSE", subID})
-		wsutil.WriteClientMessage(conn, ws.OpText, closeMsg)
-		
-		totalQueries++
-		totalEvents += int64(eventCount)
-		
-		if totalQueries%20 == 0 {
-			fmt.Printf("  Executed %d queries...\n", totalQueries)
-		}
-	}
-	
-	duration := time.Since(start)
-	return totalQueries, totalEvents, duration, nil
-}
-
-func randBytes(n int) []byte {
-	b := make([]byte, n)
-	rand.Read(b)
-	return b
-}
-
-func main() {
-	var (
-		relayURL     = flag.String("relay", "ws://localhost:7447", "Relay URL to benchmark")
-		eventCount   = flag.Int("events", 10000, "Number of events to publish")
-		eventSize    = flag.Int("size", 1024, "Average size of event content in bytes")
-		concurrency  = flag.Int("concurrency", 10, "Number of concurrent publishers")
-		queryCount   = flag.Int("queries", 100, "Number of queries to execute")
-		queryLimit   = flag.Int("query-limit", 100, "Limit for each query")
-		skipPublish  = flag.Bool("skip-publish", false, "Skip publishing phase")
-		skipQuery    = flag.Bool("skip-query", false, "Skip query phase")
-	)
-	flag.Parse()
-	
-	fmt.Printf("=== Nostr Relay Benchmark ===\n\n")
-	
-	// Phase 1: Publish events
-	if !*skipPublish {
-		fmt.Printf("Publishing %d events to %s...\n", *eventCount, *relayURL)
-		published, bytes, duration, err := publishEvents(*relayURL, *eventCount, *eventSize, *concurrency)
-		if err != nil {
-			log.Fatalf("Publishing failed: %v", err)
-		}
-		
-		fmt.Printf("\nPublish Performance:\n")
-		fmt.Printf("  Events Published: %d\n", published)
-		fmt.Printf("  Total Data: %.2f MB\n", float64(bytes)/1024/1024)
-		fmt.Printf("  Duration: %s\n", duration)
-		fmt.Printf("  Rate: %.2f events/second\n", float64(published)/duration.Seconds())
-		fmt.Printf("  Bandwidth: %.2f MB/second\n", float64(bytes)/duration.Seconds()/1024/1024)
-	}
-	
-	// Phase 2: Query events
-	if !*skipQuery {
-		fmt.Printf("\nQuerying events from %s...\n", *relayURL)
-		queries, events, duration, err := queryEvents(*relayURL, *queryCount, *queryLimit)
-		if err != nil {
-			log.Fatalf("Querying failed: %v", err)
-		}
-		
-		fmt.Printf("\nQuery Performance:\n")
-		fmt.Printf("  Queries Executed: %d\n", queries)
-		fmt.Printf("  Events Returned: %d\n", events)
-		fmt.Printf("  Duration: %s\n", duration)
-		fmt.Printf("  Rate: %.2f queries/second\n", float64(queries)/duration.Seconds())
-		fmt.Printf("  Avg Events/Query: %.2f\n", float64(events)/float64(queries))
-	}
-}
--- a/cmd/benchmark/installer.go
+++ b/cmd/benchmark/installer.go
@@ -0,0 +1,549 @@
+package main
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"sync"
+)
+
+type DependencyType int
+
+const (
+	Go DependencyType = iota
+	Rust
+	Cpp
+	Git
+	Make
+	Cmake
+	Pkg
+)
+
+type RelayInstaller struct {
+	workDir    string
+	installDir string
+	deps       map[DependencyType]bool
+	mu         sync.RWMutex
+	skipVerify bool
+}
+
+func NewRelayInstaller(workDir, installDir string) *RelayInstaller {
+	return &RelayInstaller{
+		workDir:    workDir,
+		installDir: installDir,
+		deps:       make(map[DependencyType]bool),
+	}
+}
+
+func (ri *RelayInstaller) DetectDependencies() error {
+	deps := []struct {
+		dep DependencyType
+		cmd string
+	}{
+		{Go, "go"},
+		{Rust, "rustc"},
+		{Cpp, "g++"},
+		{Git, "git"},
+		{Make, "make"},
+		{Cmake, "cmake"},
+		{Pkg, "pkg-config"},
+	}
+
+	ri.mu.Lock()
+	defer ri.mu.Unlock()
+
+	for _, d := range deps {
+		_, err := exec.LookPath(d.cmd)
+		ri.deps[d.dep] = err == nil
+	}
+
+	return nil
+}
+
+func (ri *RelayInstaller) InstallMissingDependencies() error {
+	ri.mu.RLock()
+	missing := make([]DependencyType, 0)
+	for dep, exists := range ri.deps {
+		if !exists {
+			missing = append(missing, dep)
+		}
+	}
+	ri.mu.RUnlock()
+
+	if len(missing) == 0 {
+		return nil
+	}
+
+	switch runtime.GOOS {
+	case "linux":
+		return ri.installLinuxDeps(missing)
+	case "darwin":
+		return ri.installMacDeps(missing)
+	default:
+		return fmt.Errorf("unsupported OS: %s", runtime.GOOS)
+	}
+}
+
+func (ri *RelayInstaller) installLinuxDeps(deps []DependencyType) error {
+	hasApt := ri.commandExists("apt-get")
+	hasYum := ri.commandExists("yum")
+	hasPacman := ri.commandExists("pacman")
+
+	if !hasApt && !hasYum && !hasPacman {
+		return fmt.Errorf("no supported package manager found")
+	}
+
+	if hasApt {
+		if err := ri.runCommand("sudo", "apt-get", "update"); err != nil {
+			return err
+		}
+	}
+
+	for _, dep := range deps {
+		switch dep {
+		case Go:
+			if err := ri.installGo(); err != nil {
+				return err
+			}
+		case Rust:
+			if err := ri.installRust(); err != nil {
+				return err
+			}
+		default:
+			if hasApt {
+				if err := ri.installAptPackage(dep); err != nil {
+					return err
+				}
+			} else if hasYum {
+				if err := ri.installYumPackage(dep); err != nil {
+					return err
+				}
+			} else if hasPacman {
+				if err := ri.installPacmanPackage(dep); err != nil {
+					return err
+				}
+			}
+		}
+	}
+
+	if err := ri.installSecp256k1(); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (ri *RelayInstaller) installMacDeps(deps []DependencyType) error {
+	if !ri.commandExists("brew") {
+		return fmt.Errorf("homebrew not found, install from https://brew.sh")
+	}
+
+	for _, dep := range deps {
+		switch dep {
+		case Go:
+			if err := ri.runCommand("brew", "install", "go"); err != nil {
+				return err
+			}
+		case Rust:
+			if err := ri.installRust(); err != nil {
+				return err
+			}
+		case Cpp:
+			if err := ri.runCommand("brew", "install", "gcc"); err != nil {
+				return err
+			}
+		case Git:
+			if err := ri.runCommand("brew", "install", "git"); err != nil {
+				return err
+			}
+		case Make:
+			if err := ri.runCommand("brew", "install", "make"); err != nil {
+				return err
+			}
+		case Cmake:
+			if err := ri.runCommand("brew", "install", "cmake"); err != nil {
+				return err
+			}
+		case Pkg:
+			if err := ri.runCommand("brew", "install", "pkg-config"); err != nil {
+				return err
+			}
+		}
+	}
+
+	if err := ri.installSecp256k1(); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (ri *RelayInstaller) installAptPackage(dep DependencyType) error {
+	var pkgName string
+	switch dep {
+	case Cpp:
+		pkgName = "build-essential"
+	case Git:
+		pkgName = "git"
+	case Make:
+		pkgName = "make"
+	case Cmake:
+		pkgName = "cmake"
+	case Pkg:
+		pkgName = "pkg-config"
+	default:
+		return nil
+	}
+
+	return ri.runCommand("sudo", "apt-get", "install", "-y", pkgName, "autotools-dev", "autoconf", "libtool")
+}
+
+func (ri *RelayInstaller) installYumPackage(dep DependencyType) error {
+	var pkgName string
+	switch dep {
+	case Cpp:
+		pkgName = "gcc-c++"
+	case Git:
+		pkgName = "git"
+	case Make:
+		pkgName = "make"
+	case Cmake:
+		pkgName = "cmake"
+	case Pkg:
+		pkgName = "pkgconfig"
+	default:
+		return nil
+	}
+
+	return ri.runCommand("sudo", "yum", "install", "-y", pkgName)
+}
+
+func (ri *RelayInstaller) installPacmanPackage(dep DependencyType) error {
+	var pkgName string
+	switch dep {
+	case Cpp:
+		pkgName = "gcc"
+	case Git:
+		pkgName = "git"
+	case Make:
+		pkgName = "make"
+	case Cmake:
+		pkgName = "cmake"
+	case Pkg:
+		pkgName = "pkgconf"
+	default:
+		return nil
+	}
+
+	return ri.runCommand("sudo", "pacman", "-S", "--noconfirm", pkgName)
+}
+
+func (ri *RelayInstaller) installGo() error {
+	version := "1.21.5"
+	arch := runtime.GOARCH
+	if arch == "amd64" {
+		arch = "amd64"
+	} else if arch == "arm64" {
+		arch = "arm64"
+	}
+
+	filename := fmt.Sprintf("go%s.%s-%s.tar.gz", version, runtime.GOOS, arch)
+	url := fmt.Sprintf("https://golang.org/dl/%s", filename)
+
+	tmpFile := filepath.Join(os.TempDir(), filename)
+	if err := ri.runCommand("wget", "-O", tmpFile, url); err != nil {
+		return fmt.Errorf("failed to download Go: %w", err)
+	}
+
+	if err := ri.runCommand("sudo", "tar", "-C", "/usr/local", "-xzf", tmpFile); err != nil {
+		return fmt.Errorf("failed to extract Go: %w", err)
+	}
+
+	os.Remove(tmpFile)
+
+	profile := filepath.Join(os.Getenv("HOME"), ".profile")
+	f, err := os.OpenFile(profile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+	if err == nil {
+		f.WriteString("\nexport PATH=$PATH:/usr/local/go/bin\n")
+		f.Close()
+	}
+
+	return nil
+}
+
+func (ri *RelayInstaller) installRust() error {
+	return ri.runCommand("curl", "--proto", "=https", "--tlsv1.2", "-sSf", "https://sh.rustup.rs", "|", "sh", "-s", "--", "-y")
+}
+
+func (ri *RelayInstaller) installSecp256k1() error {
+	switch runtime.GOOS {
+	case "linux":
+		if ri.commandExists("apt-get") {
+			if err := ri.runCommand("sudo", "apt-get", "install", "-y", "libsecp256k1-dev"); err != nil {
+				return ri.buildSecp256k1FromSource()
+			}
+			return nil
+		} else if ri.commandExists("yum") {
+			if err := ri.runCommand("sudo", "yum", "install", "-y", "libsecp256k1-devel"); err != nil {
+				return ri.buildSecp256k1FromSource()
+			}
+			return nil
+		} else if ri.commandExists("pacman") {
+			if err := ri.runCommand("sudo", "pacman", "-S", "--noconfirm", "libsecp256k1"); err != nil {
+				return ri.buildSecp256k1FromSource()
+			}
+			return nil
+		}
+		return ri.buildSecp256k1FromSource()
+	case "darwin":
+		if err := ri.runCommand("brew", "install", "libsecp256k1"); err != nil {
+			return ri.buildSecp256k1FromSource()
+		}
+		return nil
+	default:
+		return ri.buildSecp256k1FromSource()
+	}
+}
+
+func (ri *RelayInstaller) buildSecp256k1FromSource() error {
+	secp256k1Dir := filepath.Join(ri.workDir, "secp256k1")
+
+	if err := ri.runCommand("git", "clone", "https://github.com/bitcoin-core/secp256k1.git", secp256k1Dir); err != nil {
+		return fmt.Errorf("failed to clone secp256k1: %w", err)
+	}
+
+	if err := os.Chdir(secp256k1Dir); err != nil {
+		return err
+	}
+
+	if err := ri.runCommand("./autogen.sh"); err != nil {
+		return fmt.Errorf("failed to run autogen: %w", err)
+	}
+
+	configArgs := []string{"--enable-module-schnorrsig", "--enable-module-recovery"}
+	if err := ri.runCommand("./configure", configArgs...); err != nil {
+		return fmt.Errorf("failed to configure secp256k1: %w", err)
+	}
+
+	if err := ri.runCommand("make"); err != nil {
+		return fmt.Errorf("failed to build secp256k1: %w", err)
+	}
+
+	if err := ri.runCommand("sudo", "make", "install"); err != nil {
+		return fmt.Errorf("failed to install secp256k1: %w", err)
+	}
+
+	if err := ri.runCommand("sudo", "ldconfig"); err != nil && runtime.GOOS == "linux" {
+		return fmt.Errorf("failed to run ldconfig: %w", err)
+	}
+
+	return nil
+}
+
+func (ri *RelayInstaller) InstallKhatru() error {
+	khatruDir := filepath.Join(ri.workDir, "khatru")
+
+	if err := ri.runCommand("git", "clone", "https://github.com/fiatjaf/khatru.git", khatruDir); err != nil {
+		return fmt.Errorf("failed to clone khatru: %w", err)
+	}
+
+	if err := os.Chdir(khatruDir); err != nil {
+		return err
+	}
+
+	if err := ri.runCommand("go", "mod", "tidy"); err != nil {
+		return fmt.Errorf("failed to tidy khatru: %w", err)
+	}
+
+	binPath := filepath.Join(ri.installDir, "khatru")
+	if err := ri.runCommand("go", "build", "-o", binPath, "."); err != nil {
+		return fmt.Errorf("failed to build khatru: %w", err)
+	}
+
+	return nil
+}
+
+func (ri *RelayInstaller) InstallRelayer() error {
+	relayerDir := filepath.Join(ri.workDir, "relayer")
+
+	if err := ri.runCommand("git", "clone", "https://github.com/fiatjaf/relayer.git", relayerDir); err != nil {
+		return fmt.Errorf("failed to clone relayer: %w", err)
+	}
+
+	if err := os.Chdir(relayerDir); err != nil {
+		return err
+	}
+
+	if err := ri.runCommand("go", "mod", "tidy"); err != nil {
+		return fmt.Errorf("failed to tidy relayer: %w", err)
+	}
+
+	binPath := filepath.Join(ri.installDir, "relayer")
+	if err := ri.runCommand("go", "build", "-o", binPath, "."); err != nil {
+		return fmt.Errorf("failed to build relayer: %w", err)
+	}
+
+	return nil
+}
+
+func (ri *RelayInstaller) InstallStrfry() error {
+	strfryDir := filepath.Join(ri.workDir, "strfry")
+
+	if err := ri.runCommand("git", "clone", "https://github.com/hoytech/strfry.git", strfryDir); err != nil {
+		return fmt.Errorf("failed to clone strfry: %w", err)
+	}
+
+	if err := os.Chdir(strfryDir); err != nil {
+		return err
+	}
+
+	if err := ri.runCommand("git", "submodule", "update", "--init"); err != nil {
+		return fmt.Errorf("failed to init submodules: %w", err)
+	}
+
+	if err := ri.runCommand("make", "setup-golpe"); err != nil {
+		return fmt.Errorf("failed to setup golpe: %w", err)
+	}
+
+	if err := ri.runCommand("make"); err != nil {
+		return fmt.Errorf("failed to build strfry: %w", err)
+	}
+
+	srcBin := filepath.Join(strfryDir, "strfry")
+	dstBin := filepath.Join(ri.installDir, "strfry")
+	if err := ri.runCommand("cp", srcBin, dstBin); err != nil {
+		return fmt.Errorf("failed to copy strfry binary: %w", err)
+	}
+
+	return nil
+}
+
+func (ri *RelayInstaller) InstallRustRelay() error {
+	rustRelayDir := filepath.Join(ri.workDir, "nostr-rs-relay")
+
+	if err := ri.runCommand("git", "clone", "https://github.com/scsibug/nostr-rs-relay.git", rustRelayDir); err != nil {
+		return fmt.Errorf("failed to clone rust relay: %w", err)
+	}
+
+	if err := os.Chdir(rustRelayDir); err != nil {
+		return err
+	}
+
+	if err := ri.runCommand("cargo", "build", "--release"); err != nil {
+		return fmt.Errorf("failed to build rust relay: %w", err)
+	}
+
+	srcBin := filepath.Join(rustRelayDir, "target", "release", "nostr-rs-relay")
+	dstBin := filepath.Join(ri.installDir, "nostr-rs-relay")
+	if err := ri.runCommand("cp", srcBin, dstBin); err != nil {
+		return fmt.Errorf("failed to copy rust relay binary: %w", err)
+	}
+
+	return nil
+}
+
+func (ri *RelayInstaller) VerifyInstallation() error {
+	if ri.skipVerify {
+		return nil
+	}
+
+	binaries := []string{"khatru", "relayer", "strfry", "nostr-rs-relay"}
+
+	for _, binary := range binaries {
+		binPath := filepath.Join(ri.installDir, binary)
+		if _, err := os.Stat(binPath); os.IsNotExist(err) {
+			return fmt.Errorf("binary %s not found at %s", binary, binPath)
+		}
+
+		if err := ri.runCommand("chmod", "+x", binPath); err != nil {
+			return fmt.Errorf("failed to make %s executable: %w", binary, err)
+		}
+	}
+
+	return nil
+}
+
+func (ri *RelayInstaller) commandExists(cmd string) bool {
+	_, err := exec.LookPath(cmd)
+	return err == nil
+}
+
+func (ri *RelayInstaller) runCommand(name string, args ...string) error {
+	if name == "curl" && len(args) > 0 && strings.Contains(strings.Join(args, " "), "|") {
+		fullCmd := fmt.Sprintf("%s %s", name, strings.Join(args, " "))
+		cmd := exec.Command("bash", "-c", fullCmd)
+		cmd.Stdout = os.Stdout
+		cmd.Stderr = os.Stderr
+		return cmd.Run()
+	}
+
+	cmd := exec.Command(name, args...)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	return cmd.Run()
+}
+
+func (ri *RelayInstaller) InstallSecp256k1Only() error {
+	fmt.Println("Installing secp256k1 library...")
+
+	if err := os.MkdirAll(ri.workDir, 0755); err != nil {
+		return err
+	}
+
+	if err := ri.installSecp256k1(); err != nil {
+		return fmt.Errorf("failed to install secp256k1: %w", err)
+	}
+
+	fmt.Println("secp256k1 installed successfully")
+	return nil
+}
+
+func (ri *RelayInstaller) InstallAll() error {
+	fmt.Println("Detecting dependencies...")
+	if err := ri.DetectDependencies(); err != nil {
+		return err
+	}
+
+	fmt.Println("Installing missing dependencies...")
+	if err := ri.InstallMissingDependencies(); err != nil {
+		return err
+	}
+
+	if err := os.MkdirAll(ri.workDir, 0755); err != nil {
+		return err
+	}
+	if err := os.MkdirAll(ri.installDir, 0755); err != nil {
+		return err
+	}
+
+	fmt.Println("Installing khatru...")
+	if err := ri.InstallKhatru(); err != nil {
+		return err
+	}
+
+	fmt.Println("Installing relayer...")
+	if err := ri.InstallRelayer(); err != nil {
+		return err
+	}
+
+	fmt.Println("Installing strfry...")
+	if err := ri.InstallStrfry(); err != nil {
+		return err
+	}
+
+	fmt.Println("Installing rust relay...")
+	if err := ri.InstallRustRelay(); err != nil {
+		return err
+	}
+
+	fmt.Println("Verifying installation...")
+	if err := ri.VerifyInstallation(); err != nil {
+		return err
+	}
+
+	fmt.Println("All relays installed successfully")
+	return nil
+}
--- a/cmd/benchmark/load_simulator.go
+++ b/cmd/benchmark/load_simulator.go
@@ -0,0 +1,494 @@
+package main
+
+import (
+	"fmt"
+	"math"
+	"orly.dev/pkg/protocol/ws"
+	"orly.dev/pkg/utils/context"
+	"orly.dev/pkg/utils/log"
+	"sync"
+	"sync/atomic"
+	"time"
+)
+
+type LoadPattern int
+
+const (
+	Constant LoadPattern = iota
+	Spike
+	Burst
+	Sine
+	Ramp
+)
+
+func (lp LoadPattern) String() string {
+	switch lp {
+	case Constant:
+		return "constant"
+	case Spike:
+		return "spike"
+	case Burst:
+		return "burst"
+	case Sine:
+		return "sine"
+	case Ramp:
+		return "ramp"
+	default:
+		return "unknown"
+	}
+}
+
+type ConnectionPool struct {
+	relayURL    string
+	poolSize    int
+	connections []*ws.Client
+	active      []bool
+	mu          sync.RWMutex
+	created     int64
+	failed      int64
+}
+
+func NewConnectionPool(relayURL string, poolSize int) *ConnectionPool {
+	return &ConnectionPool{
+		relayURL:    relayURL,
+		poolSize:    poolSize,
+		connections: make([]*ws.Client, poolSize),
+		active:      make([]bool, poolSize),
+	}
+}
+
+func (cp *ConnectionPool) Initialize(c context.T) error {
+	var wg sync.WaitGroup
+	errors := make(chan error, cp.poolSize)
+
+	for i := 0; i < cp.poolSize; i++ {
+		wg.Add(1)
+		go func(idx int) {
+			defer wg.Done()
+
+			conn, err := ws.RelayConnect(c, cp.relayURL)
+			if err != nil {
+				errors <- fmt.Errorf("connection %d failed: %w", idx, err)
+				atomic.AddInt64(&cp.failed, 1)
+				return
+			}
+
+			cp.mu.Lock()
+			cp.connections[idx] = conn
+			cp.active[idx] = true
+			cp.mu.Unlock()
+
+			atomic.AddInt64(&cp.created, 1)
+		}(i)
+	}
+
+	wg.Wait()
+	close(errors)
+
+	errorCount := 0
+	for range errors {
+		errorCount++
+	}
+
+	if errorCount > 0 {
+		return fmt.Errorf("failed to create %d connections", errorCount)
+	}
+
+	return nil
+}
+
+func (cp *ConnectionPool) GetConnection(idx int) *ws.Client {
+	cp.mu.RLock()
+	defer cp.mu.RUnlock()
+
+	if idx >= 0 && idx < len(cp.connections) && cp.active[idx] {
+		return cp.connections[idx]
+	}
+	return nil
+}
+
+func (cp *ConnectionPool) CloseAll() {
+	cp.mu.Lock()
+	defer cp.mu.Unlock()
+
+	for i, conn := range cp.connections {
+		if conn != nil && cp.active[i] {
+			conn.Close()
+			cp.active[i] = false
+		}
+	}
+}
+
+func (cp *ConnectionPool) Stats() (created, failed int64) {
+	return atomic.LoadInt64(&cp.created), atomic.LoadInt64(&cp.failed)
+}
+
+type LoadSimulator struct {
+	relayURL       string
+	pattern        LoadPattern
+	duration       time.Duration
+	baseLoad       int
+	peakLoad       int
+	poolSize       int
+	eventSize      int
+	connectionPool *ConnectionPool
+	metrics        LoadMetrics
+	running        atomic.Bool
+}
+
+type LoadMetrics struct {
+	EventsSent       atomic.Int64
+	EventsFailed     atomic.Int64
+	ConnectionErrors atomic.Int64
+	AvgLatency       atomic.Int64
+	PeakLatency      atomic.Int64
+	StartTime        time.Time
+	EndTime          time.Time
+}
+
+func NewLoadSimulator(relayURL string, pattern LoadPattern, duration time.Duration, baseLoad, peakLoad, poolSize, eventSize int) *LoadSimulator {
+	return &LoadSimulator{
+		relayURL:  relayURL,
+		pattern:   pattern,
+		duration:  duration,
+		baseLoad:  baseLoad,
+		peakLoad:  peakLoad,
+		poolSize:  poolSize,
+		eventSize: eventSize,
+	}
+}
+
+func (ls *LoadSimulator) Run(c context.T) error {
+	fmt.Printf("Starting %s load simulation for %v...\n", ls.pattern, ls.duration)
+	fmt.Printf("Base load: %d events/sec, Peak load: %d events/sec\n", ls.baseLoad, ls.peakLoad)
+	fmt.Printf("Connection pool size: %d\n", ls.poolSize)
+
+	ls.connectionPool = NewConnectionPool(ls.relayURL, ls.poolSize)
+	if err := ls.connectionPool.Initialize(c); err != nil {
+		return fmt.Errorf("failed to initialize connection pool: %w", err)
+	}
+	defer ls.connectionPool.CloseAll()
+
+	created, failed := ls.connectionPool.Stats()
+	fmt.Printf("Connections established: %d, failed: %d\n", created, failed)
+
+	ls.metrics.StartTime = time.Now()
+	ls.running.Store(true)
+
+	switch ls.pattern {
+	case Constant:
+		return ls.runConstant(c)
+	case Spike:
+		return ls.runSpike(c)
+	case Burst:
+		return ls.runBurst(c)
+	case Sine:
+		return ls.runSine(c)
+	case Ramp:
+		return ls.runRamp(c)
+	default:
+		return fmt.Errorf("unsupported load pattern: %s", ls.pattern)
+	}
+}
+
+func (ls *LoadSimulator) runConstant(c context.T) error {
+	interval := time.Second / time.Duration(ls.baseLoad)
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+
+	timeout := time.After(ls.duration)
+	connectionIdx := 0
+
+	for {
+		select {
+		case <-timeout:
+			return ls.finalize()
+		case <-ticker.C:
+			go ls.sendEvent(c, connectionIdx%ls.poolSize)
+			connectionIdx++
+		}
+	}
+}
+
+func (ls *LoadSimulator) runSpike(c context.T) error {
+	baseInterval := time.Second / time.Duration(ls.baseLoad)
+	spikeDuration := ls.duration / 10
+	spikeStart := ls.duration / 2
+
+	baseTicker := time.NewTicker(baseInterval)
+	defer baseTicker.Stop()
+
+	timeout := time.After(ls.duration)
+	spikeTimeout := time.After(spikeStart)
+	spikeEnd := time.After(spikeStart + spikeDuration)
+
+	connectionIdx := 0
+	inSpike := false
+
+	for {
+		select {
+		case <-timeout:
+			return ls.finalize()
+		case <-spikeTimeout:
+			if !inSpike {
+				inSpike = true
+				baseTicker.Stop()
+				spikeInterval := time.Second / time.Duration(ls.peakLoad)
+				baseTicker = time.NewTicker(spikeInterval)
+			}
+		case <-spikeEnd:
+			if inSpike {
+				inSpike = false
+				baseTicker.Stop()
+				baseTicker = time.NewTicker(baseInterval)
+			}
+		case <-baseTicker.C:
+			go ls.sendEvent(c, connectionIdx%ls.poolSize)
+			connectionIdx++
+		}
+	}
+}
+
+func (ls *LoadSimulator) runBurst(c context.T) error {
+	burstInterval := ls.duration / 5
+	burstSize := ls.peakLoad / 2
+
+	ticker := time.NewTicker(burstInterval)
+	defer ticker.Stop()
+
+	timeout := time.After(ls.duration)
+	connectionIdx := 0
+
+	for {
+		select {
+		case <-timeout:
+			return ls.finalize()
+		case <-ticker.C:
+			for i := 0; i < burstSize; i++ {
+				go ls.sendEvent(c, connectionIdx%ls.poolSize)
+				connectionIdx++
+			}
+		}
+	}
+}
+
+func (ls *LoadSimulator) runSine(c context.T) error {
+	startTime := time.Now()
+	baseTicker := time.NewTicker(50 * time.Millisecond)
+	defer baseTicker.Stop()
+
+	timeout := time.After(ls.duration)
+	connectionIdx := 0
+	lastSend := time.Now()
+
+	for {
+		select {
+		case <-timeout:
+			return ls.finalize()
+		case now := <-baseTicker.C:
+			elapsed := now.Sub(startTime)
+			progress := float64(elapsed) / float64(ls.duration)
+			sineValue := math.Sin(progress * 4 * math.Pi)
+
+			currentLoad := ls.baseLoad + int(float64(ls.peakLoad-ls.baseLoad)*((sineValue+1)/2))
+
+			if currentLoad > 0 {
+				interval := time.Second / time.Duration(currentLoad)
+				if now.Sub(lastSend) >= interval {
+					go ls.sendEvent(c, connectionIdx%ls.poolSize)
+					connectionIdx++
+					lastSend = now
+				}
+			}
+		}
+	}
+}
+
+func (ls *LoadSimulator) runRamp(c context.T) error {
+	startTime := time.Now()
+	ticker := time.NewTicker(100 * time.Millisecond)
+	defer ticker.Stop()
+
+	timeout := time.After(ls.duration)
+	connectionIdx := 0
+	lastSend := time.Now()
+
+	for {
+		select {
+		case <-timeout:
+			return ls.finalize()
+		case now := <-ticker.C:
+			elapsed := now.Sub(startTime)
+			progress := float64(elapsed) / float64(ls.duration)
+
+			currentLoad := ls.baseLoad + int(float64(ls.peakLoad-ls.baseLoad)*progress)
+
+			if currentLoad > 0 {
+				interval := time.Second / time.Duration(currentLoad)
+				if now.Sub(lastSend) >= interval {
+					go ls.sendEvent(c, connectionIdx%ls.poolSize)
+					connectionIdx++
+					lastSend = now
+				}
+			}
+		}
+	}
+}
+
+func (ls *LoadSimulator) sendEvent(c context.T, connIdx int) {
+	startTime := time.Now()
+
+	conn := ls.connectionPool.GetConnection(connIdx)
+	if conn == nil {
+		ls.metrics.ConnectionErrors.Add(1)
+		return
+	}
+
+	signer := newTestSigner()
+	ev := generateEvent(signer, ls.eventSize, 0, 0)
+
+	err := conn.Publish(c, ev)
+	latency := time.Since(startTime)
+
+	if err != nil {
+		ls.metrics.EventsFailed.Add(1)
+		log.E.F("Event publish failed: %v", err)
+		return
+	}
+
+	ls.metrics.EventsSent.Add(1)
+
+	latencyMs := latency.Milliseconds()
+	ls.metrics.AvgLatency.Store(latencyMs)
+
+	if latencyMs > ls.metrics.PeakLatency.Load() {
+		ls.metrics.PeakLatency.Store(latencyMs)
+	}
+}
+
+func (ls *LoadSimulator) finalize() error {
+	ls.metrics.EndTime = time.Now()
+	ls.running.Store(false)
+
+	duration := ls.metrics.EndTime.Sub(ls.metrics.StartTime)
+	eventsSent := ls.metrics.EventsSent.Load()
+	eventsFailed := ls.metrics.EventsFailed.Load()
+	connectionErrors := ls.metrics.ConnectionErrors.Load()
+
+	fmt.Printf("\n=== Load Simulation Results ===\n")
+	fmt.Printf("Pattern: %s\n", ls.pattern)
+	fmt.Printf("Duration: %v\n", duration)
+	fmt.Printf("Events Sent: %d\n", eventsSent)
+	fmt.Printf("Events Failed: %d\n", eventsFailed)
+	fmt.Printf("Connection Errors: %d\n", connectionErrors)
+
+	if eventsSent > 0 {
+		rate := float64(eventsSent) / duration.Seconds()
+		successRate := float64(eventsSent) / float64(eventsSent+eventsFailed) * 100
+		fmt.Printf("Average Rate: %.2f events/sec\n", rate)
+		fmt.Printf("Success Rate: %.1f%%\n", successRate)
+		fmt.Printf("Average Latency: %dms\n", ls.metrics.AvgLatency.Load())
+		fmt.Printf("Peak Latency: %dms\n", ls.metrics.PeakLatency.Load())
+	}
+
+	return nil
+}
+
+func (ls *LoadSimulator) SimulateResourceConstraints(c context.T, memoryLimit, cpuLimit int) error {
+	fmt.Printf("\n=== Resource Constraint Simulation ===\n")
+	fmt.Printf("Memory limit: %d MB, CPU limit: %d%%\n", memoryLimit, cpuLimit)
+
+	constraintTests := []struct {
+		name     string
+		duration time.Duration
+		load     int
+	}{
+		{"baseline", 30 * time.Second, ls.baseLoad},
+		{"memory_stress", 60 * time.Second, ls.peakLoad * 2},
+		{"cpu_stress", 45 * time.Second, ls.peakLoad * 3},
+		{"combined_stress", 90 * time.Second, ls.peakLoad * 4},
+	}
+
+	for _, test := range constraintTests {
+		fmt.Printf("\nRunning %s test...\n", test.name)
+
+		simulator := NewLoadSimulator(ls.relayURL, Constant, test.duration, test.load, test.load, ls.poolSize, ls.eventSize)
+
+		if err := simulator.Run(c); err != nil {
+			fmt.Printf("Test %s failed: %v\n", test.name, err)
+			continue
+		}
+
+		time.Sleep(10 * time.Second)
+	}
+
+	return nil
+}
+
+func (ls *LoadSimulator) GetMetrics() map[string]interface{} {
+	metrics := make(map[string]interface{})
+
+	metrics["pattern"] = ls.pattern.String()
+	metrics["events_sent"] = ls.metrics.EventsSent.Load()
+	metrics["events_failed"] = ls.metrics.EventsFailed.Load()
+	metrics["connection_errors"] = ls.metrics.ConnectionErrors.Load()
+	metrics["avg_latency_ms"] = ls.metrics.AvgLatency.Load()
+	metrics["peak_latency_ms"] = ls.metrics.PeakLatency.Load()
+
+	if !ls.metrics.StartTime.IsZero() && !ls.metrics.EndTime.IsZero() {
+		duration := ls.metrics.EndTime.Sub(ls.metrics.StartTime)
+		metrics["duration_seconds"] = duration.Seconds()
+
+		if eventsSent := ls.metrics.EventsSent.Load(); eventsSent > 0 {
+			metrics["events_per_second"] = float64(eventsSent) / duration.Seconds()
+		}
+	}
+
+	return metrics
+}
+
+type LoadTestSuite struct {
+	relayURL  string
+	poolSize  int
+	eventSize int
+}
+
+func NewLoadTestSuite(relayURL string, poolSize, eventSize int) *LoadTestSuite {
+	return &LoadTestSuite{
+		relayURL:  relayURL,
+		poolSize:  poolSize,
+		eventSize: eventSize,
+	}
+}
+
+func (lts *LoadTestSuite) RunAllPatterns(c context.T) error {
+	patterns := []struct {
+		pattern  LoadPattern
+		baseLoad int
+		peakLoad int
+		duration time.Duration
+	}{
+		{Constant, 50, 50, 60 * time.Second},
+		{Spike, 50, 500, 90 * time.Second},
+		{Burst, 20, 400, 75 * time.Second},
+		{Sine, 50, 300, 120 * time.Second},
+		{Ramp, 10, 200, 90 * time.Second},
+	}
+
+	fmt.Printf("Running comprehensive load test suite...\n")
+
+	for _, p := range patterns {
+		fmt.Printf("\n--- Testing %s pattern ---\n", p.pattern)
+
+		simulator := NewLoadSimulator(lts.relayURL, p.pattern, p.duration, p.baseLoad, p.peakLoad, lts.poolSize, lts.eventSize)
+
+		if err := simulator.Run(c); err != nil {
+			fmt.Printf("Pattern %s failed: %v\n", p.pattern, err)
+			continue
+		}
+
+		time.Sleep(5 * time.Second)
+	}
+
+	return nil
+}
--- a/cmd/benchmark/main.go
+++ b/cmd/benchmark/main.go
@@ -15,7 +15,6 @@ import (
 	"orly.dev/pkg/encoders/kinds"
 	"orly.dev/pkg/encoders/tag"
 	"orly.dev/pkg/encoders/tags"
-	"orly.dev/pkg/encoders/text"
 	"orly.dev/pkg/encoders/timestamp"
 	"orly.dev/pkg/protocol/ws"
 	"orly.dev/pkg/utils/chk"
@@ -39,21 +38,41 @@ type BenchmarkResults struct {

 func main() {
 	var (
-		relayURL = flag.String(
-			"relay", "ws://localhost:7447", "Relay URL to benchmark",
-		)
-		eventCount = flag.Int("events", 10000, "Number of events to publish")
-		eventSize  = flag.Int(
-			"size", 1024, "Average size of event content in bytes",
-		)
-		concurrency = flag.Int(
-			"concurrency", 10, "Number of concurrent publishers",
-		)
-		queryCount  = flag.Int("queries", 100, "Number of queries to execute")
-		queryLimit  = flag.Int("query-limit", 100, "Limit for each query")
-		skipPublish = flag.Bool("skip-publish", false, "Skip publishing phase")
-		skipQuery   = flag.Bool("skip-query", false, "Skip query phase")
-		verbose     = flag.Bool("v", false, "Verbose output")
+		relayURL        = flag.String("relay", "ws://localhost:7447", "Relay URL to benchmark")
+		eventCount      = flag.Int("events", 10000, "Number of events to publish")
+		eventSize       = flag.Int("size", 1024, "Average size of event content in bytes")
+		concurrency     = flag.Int("concurrency", 10, "Number of concurrent publishers")
+		queryCount      = flag.Int("queries", 100, "Number of queries to execute")
+		queryLimit      = flag.Int("query-limit", 100, "Limit for each query")
+		skipPublish     = flag.Bool("skip-publish", false, "Skip publishing phase")
+		skipQuery       = flag.Bool("skip-query", false, "Skip query phase")
+		verbose         = flag.Bool("v", false, "Verbose output")
+		multiRelay      = flag.Bool("multi-relay", false, "Use multi-relay harness")
+		relayBinPath    = flag.String("relay-bin", "", "Path to relay binary (for multi-relay mode)")
+		profileQueries  = flag.Bool("profile", false, "Run query performance profiling")
+		profileSubs     = flag.Bool("profile-subs", false, "Profile subscription performance")
+		subCount        = flag.Int("sub-count", 100, "Number of concurrent subscriptions for profiling")
+		subDuration     = flag.Duration("sub-duration", 30*time.Second, "Duration for subscription profiling")
+		installRelays   = flag.Bool("install", false, "Install relay dependencies and binaries")
+		installSecp     = flag.Bool("install-secp", false, "Install only secp256k1 library")
+		workDir         = flag.String("work-dir", "/tmp/relay-build", "Working directory for builds")
+		installDir      = flag.String("install-dir", "/usr/local/bin", "Installation directory for binaries")
+		generateReport  = flag.Bool("report", false, "Generate comparative report")
+		reportFormat    = flag.String("report-format", "markdown", "Report format: markdown, json, csv")
+		reportFile      = flag.String("report-file", "benchmark_report", "Report output filename (without extension)")
+		reportTitle     = flag.String("report-title", "Relay Benchmark Comparison", "Report title")
+		timingMode      = flag.Bool("timing", false, "Run end-to-end timing instrumentation")
+		timingEvents    = flag.Int("timing-events", 100, "Number of events for timing instrumentation")
+		timingSubs      = flag.Bool("timing-subs", false, "Test subscription timing")
+		timingDuration  = flag.Duration("timing-duration", 10*time.Second, "Duration for subscription timing test")
+		loadTest        = flag.Bool("load", false, "Run load pattern simulation")
+		loadPattern     = flag.String("load-pattern", "constant", "Load pattern: constant, spike, burst, sine, ramp")
+		loadDuration    = flag.Duration("load-duration", 60*time.Second, "Duration for load test")
+		loadBase        = flag.Int("load-base", 50, "Base load (events/sec)")
+		loadPeak        = flag.Int("load-peak", 200, "Peak load (events/sec)")
+		loadPool        = flag.Int("load-pool", 10, "Connection pool size for load testing")
+		loadSuite       = flag.Bool("load-suite", false, "Run comprehensive load test suite")
+		loadConstraints = flag.Bool("load-constraints", false, "Test under resource constraints")
 	)
 	flag.Parse()

@@ -62,25 +81,42 @@ func main() {
 	}

 	c := context.Bg()
+
+	if *installRelays {
+		runInstaller(*workDir, *installDir)
+	} else if *installSecp {
+		runSecp256k1Installer(*workDir, *installDir)
+	} else if *generateReport {
+		runReportGeneration(*reportTitle, *reportFormat, *reportFile)
+	} else if *loadTest || *loadSuite || *loadConstraints {
+		runLoadSimulation(c, *relayURL, *loadPattern, *loadDuration, *loadBase, *loadPeak, *loadPool, *eventSize, *loadSuite, *loadConstraints)
+	} else if *timingMode || *timingSubs {
+		runTimingInstrumentation(c, *relayURL, *timingEvents, *eventSize, *timingSubs, *timingDuration)
+	} else if *profileQueries || *profileSubs {
+		runQueryProfiler(c, *relayURL, *queryCount, *concurrency, *profileSubs, *subCount, *subDuration)
+	} else if *multiRelay {
+		runMultiRelayBenchmark(c, *relayBinPath, *eventCount, *eventSize, *concurrency, *queryCount, *queryLimit, *skipPublish, *skipQuery)
+	} else {
+		runSingleRelayBenchmark(c, *relayURL, *eventCount, *eventSize, *concurrency, *queryCount, *queryLimit, *skipPublish, *skipQuery)
+	}
+}
+
+func runSingleRelayBenchmark(c context.T, relayURL string, eventCount, eventSize, concurrency, queryCount, queryLimit int, skipPublish, skipQuery bool) {
 	results := &BenchmarkResults{}

 	// Phase 1: Publish events
-	if !*skipPublish {
-		fmt.Printf("Publishing %d events to %s...\n", *eventCount, *relayURL)
-		if err := benchmarkPublish(
-			c, *relayURL, *eventCount, *eventSize, *concurrency, results,
-		); chk.E(err) {
+	if !skipPublish {
+		fmt.Printf("Publishing %d events to %s...\n", eventCount, relayURL)
+		if err := benchmarkPublish(c, relayURL, eventCount, eventSize, concurrency, results); chk.E(err) {
 			fmt.Fprintf(os.Stderr, "Error during publish benchmark: %v\n", err)
 			os.Exit(1)
 		}
 	}

 	// Phase 2: Query events
-	if !*skipQuery {
-		fmt.Printf("\nQuerying events from %s...\n", *relayURL)
-		if err := benchmarkQuery(
-			c, *relayURL, *queryCount, *queryLimit, results,
-		); chk.E(err) {
+	if !skipQuery {
+		fmt.Printf("\nQuerying events from %s...\n", relayURL)
+		if err := benchmarkQuery(c, relayURL, queryCount, queryLimit, results); chk.E(err) {
 			fmt.Fprintf(os.Stderr, "Error during query benchmark: %v\n", err)
 			os.Exit(1)
 		}
@@ -90,10 +126,80 @@ func main() {
 	printResults(results)
 }

-func benchmarkPublish(
-	c context.T, relayURL string, eventCount, eventSize, concurrency int,
-	results *BenchmarkResults,
-) error {
+func runMultiRelayBenchmark(c context.T, relayBinPath string, eventCount, eventSize, concurrency, queryCount, queryLimit int, skipPublish, skipQuery bool) {
+	harness := NewMultiRelayHarness()
+	generator := NewReportGenerator()
+
+	if relayBinPath != "" {
+		config := RelayConfig{
+			Type:   Khatru,
+			Binary: relayBinPath,
+			Args:   []string{},
+			URL:    "ws://localhost:7447",
+		}
+		if err := harness.AddRelay(config); chk.E(err) {
+			fmt.Fprintf(os.Stderr, "Failed to add relay: %v\n", err)
+			os.Exit(1)
+		}
+
+		fmt.Printf("Starting relay harness...\n")
+		if err := harness.StartAll(); chk.E(err) {
+			fmt.Fprintf(os.Stderr, "Failed to start relays: %v\n", err)
+			os.Exit(1)
+		}
+		defer harness.StopAll()
+
+		time.Sleep(2 * time.Second)
+	}
+
+	relayTypes := []RelayType{Khatru}
+	if relayBinPath == "" {
+		fmt.Printf("Running multi-relay benchmark without starting relays (external relays expected)\n")
+	}
+
+	for _, relayType := range relayTypes {
+		fmt.Printf("\n=== Benchmarking %s ===\n", relayType)
+
+		results := &BenchmarkResults{}
+		relayURL := "ws://localhost:7447"
+
+		if !skipPublish {
+			fmt.Printf("Publishing %d events to %s...\n", eventCount, relayURL)
+			if err := benchmarkPublish(c, relayURL, eventCount, eventSize, concurrency, results); chk.E(err) {
+				fmt.Fprintf(os.Stderr, "Error during publish benchmark for %s: %v\n", relayType, err)
+				continue
+			}
+		}
+
+		if !skipQuery {
+			fmt.Printf("\nQuerying events from %s...\n", relayURL)
+			if err := benchmarkQuery(c, relayURL, queryCount, queryLimit, results); chk.E(err) {
+				fmt.Fprintf(os.Stderr, "Error during query benchmark for %s: %v\n", relayType, err)
+				continue
+			}
+		}
+
+		fmt.Printf("\n=== %s Results ===\n", relayType)
+		printResults(results)
+
+		metrics := harness.GetMetrics(relayType)
+		if metrics != nil {
+			printHarnessMetrics(relayType, metrics)
+		}
+
+		generator.AddRelayData(relayType.String(), results, metrics, nil)
+	}
+
+	generator.GenerateReport("Multi-Relay Benchmark Results")
+
+	if err := SaveReportToFile("BENCHMARK_RESULTS.md", "markdown", generator); chk.E(err) {
+		fmt.Printf("Warning: Failed to save benchmark results: %v\n", err)
+	} else {
+		fmt.Printf("\nBenchmark results saved to: BENCHMARK_RESULTS.md\n")
+	}
+}
+
+func benchmarkPublish(c context.T, relayURL string, eventCount, eventSize, concurrency int, results *BenchmarkResults) error {
 	// Generate signers for each concurrent publisher
 	signers := make([]*testSigner, concurrency)
 	for i := range signers {
@@ -136,7 +242,7 @@ func benchmarkPublish(

 			// Publish events
 			for j := 0; j < eventsToPublish; j++ {
-				ev := generateEvent(signer, eventSize)
+				ev := generateEvent(signer, eventSize, time.Duration(0), 0)

 				if err := relay.Publish(c, ev); err != nil {
 					log.E.F(
@@ -266,28 +372,8 @@ func benchmarkQuery(
 	return nil
 }

-func generateEvent(signer *testSigner, contentSize int) *event.E {
-	// Generate content with some variation
-	size := contentSize + frand.Intn(contentSize/2) - contentSize/4
-	if size < 10 {
-		size = 10
-	}
-
-	content := text.NostrEscape(nil, frand.Bytes(size))
-
-	ev := &event.E{
-		Pubkey:    signer.Pub(),
-		Kind:      kind.TextNote,
-		CreatedAt: timestamp.Now(),
-		Content:   content,
-		Tags:      generateRandomTags(),
-	}
-
-	if err := ev.Sign(signer); chk.E(err) {
-		panic(fmt.Sprintf("failed to sign event: %v", err))
-	}
-
-	return ev
+func generateEvent(signer *testSigner, contentSize int, rateLimit time.Duration, burstSize int) *event.E {
+	return generateSimpleEvent(signer, contentSize)
 }

 func generateRandomTags() *tags.T {
@@ -350,3 +436,209 @@ func printResults(results *BenchmarkResults) {
 		fmt.Printf("  Avg Events/Query: %.2f\n", avgEventsPerQuery)
 	}
 }
+
+func printHarnessMetrics(relayType RelayType, metrics *HarnessMetrics) {
+	fmt.Printf("\nHarness Metrics for %s:\n", relayType)
+	if metrics.StartupTime > 0 {
+		fmt.Printf("  Startup Time: %s\n", metrics.StartupTime)
+	}
+	if metrics.ShutdownTime > 0 {
+		fmt.Printf("  Shutdown Time: %s\n", metrics.ShutdownTime)
+	}
+	if metrics.Errors > 0 {
+		fmt.Printf("  Errors: %d\n", metrics.Errors)
+	}
+}
+
+func runQueryProfiler(c context.T, relayURL string, queryCount, concurrency int, profileSubs bool, subCount int, subDuration time.Duration) {
+	profiler := NewQueryProfiler(relayURL)
+
+	if profileSubs {
+		fmt.Printf("Profiling %d concurrent subscriptions for %v...\n", subCount, subDuration)
+		if err := profiler.TestSubscriptionPerformance(c, subDuration, subCount); chk.E(err) {
+			fmt.Fprintf(os.Stderr, "Subscription profiling failed: %v\n", err)
+			os.Exit(1)
+		}
+	} else {
+		fmt.Printf("Profiling %d queries with %d concurrent workers...\n", queryCount, concurrency)
+		if err := profiler.ExecuteProfile(c, queryCount, concurrency); chk.E(err) {
+			fmt.Fprintf(os.Stderr, "Query profiling failed: %v\n", err)
+			os.Exit(1)
+		}
+	}
+
+	profiler.PrintReport()
+}
+
+func runInstaller(workDir, installDir string) {
+	installer := NewRelayInstaller(workDir, installDir)
+
+	if err := installer.InstallAll(); chk.E(err) {
+		fmt.Fprintf(os.Stderr, "Installation failed: %v\n", err)
+		os.Exit(1)
+	}
+}
+
+func runSecp256k1Installer(workDir, installDir string) {
+	installer := NewRelayInstaller(workDir, installDir)
+
+	if err := installer.InstallSecp256k1Only(); chk.E(err) {
+		fmt.Fprintf(os.Stderr, "secp256k1 installation failed: %v\n", err)
+		os.Exit(1)
+	}
+}
+
+func runLoadSimulation(c context.T, relayURL, patternStr string, duration time.Duration, baseLoad, peakLoad, poolSize, eventSize int, runSuite, runConstraints bool) {
+	if runSuite {
+		suite := NewLoadTestSuite(relayURL, poolSize, eventSize)
+		if err := suite.RunAllPatterns(c); chk.E(err) {
+			fmt.Fprintf(os.Stderr, "Load test suite failed: %v\n", err)
+			os.Exit(1)
+		}
+		return
+	}
+
+	var pattern LoadPattern
+	switch patternStr {
+	case "constant":
+		pattern = Constant
+	case "spike":
+		pattern = Spike
+	case "burst":
+		pattern = Burst
+	case "sine":
+		pattern = Sine
+	case "ramp":
+		pattern = Ramp
+	default:
+		fmt.Fprintf(os.Stderr, "Invalid load pattern: %s\n", patternStr)
+		os.Exit(1)
+	}
+
+	simulator := NewLoadSimulator(relayURL, pattern, duration, baseLoad, peakLoad, poolSize, eventSize)
+
+	if err := simulator.Run(c); chk.E(err) {
+		fmt.Fprintf(os.Stderr, "Load simulation failed: %v\n", err)
+		os.Exit(1)
+	}
+
+	if runConstraints {
+		fmt.Printf("\n")
+		if err := simulator.SimulateResourceConstraints(c, 512, 80); chk.E(err) {
+			fmt.Fprintf(os.Stderr, "Resource constraint simulation failed: %v\n", err)
+		}
+	}
+
+	metrics := simulator.GetMetrics()
+	fmt.Printf("\n=== Load Simulation Summary ===\n")
+	fmt.Printf("Pattern: %v\n", metrics["pattern"])
+	fmt.Printf("Events sent: %v\n", metrics["events_sent"])
+	fmt.Printf("Events failed: %v\n", metrics["events_failed"])
+	fmt.Printf("Connection errors: %v\n", metrics["connection_errors"])
+	fmt.Printf("Events/second: %.2f\n", metrics["events_per_second"])
+	fmt.Printf("Average latency: %vms\n", metrics["avg_latency_ms"])
+	fmt.Printf("Peak latency: %vms\n", metrics["peak_latency_ms"])
+}
+
+func runTimingInstrumentation(c context.T, relayURL string, eventCount, eventSize int, testSubs bool, duration time.Duration) {
+	instrumentation := NewTimingInstrumentation(relayURL)
+
+	fmt.Printf("Connecting to relay at %s...\n", relayURL)
+	if err := instrumentation.Connect(c, relayURL); chk.E(err) {
+		fmt.Fprintf(os.Stderr, "Failed to connect to relay: %v\n", err)
+		os.Exit(1)
+	}
+	defer instrumentation.Close()
+
+	if testSubs {
+		fmt.Printf("\n=== Subscription Timing Test ===\n")
+		if err := instrumentation.TestSubscriptionTiming(c, duration); chk.E(err) {
+			fmt.Fprintf(os.Stderr, "Subscription timing test failed: %v\n", err)
+			os.Exit(1)
+		}
+	} else {
+		fmt.Printf("\n=== Full Event Lifecycle Instrumentation ===\n")
+		if err := instrumentation.RunFullInstrumentation(c, eventCount, eventSize); chk.E(err) {
+			fmt.Fprintf(os.Stderr, "Timing instrumentation failed: %v\n", err)
+			os.Exit(1)
+		}
+	}
+
+	metrics := instrumentation.GetMetrics()
+	fmt.Printf("\n=== Instrumentation Metrics Summary ===\n")
+	fmt.Printf("Total Events Tracked: %v\n", metrics["tracked_events"])
+	fmt.Printf("Lifecycles Recorded: %v\n", metrics["lifecycles_count"])
+	fmt.Printf("WebSocket Frames: %v\n", metrics["frames_tracked"])
+	fmt.Printf("Write Amplifications: %v\n", metrics["write_amplifications"])
+
+	if bottlenecks, ok := metrics["bottlenecks"].(map[string]map[string]interface{}); ok {
+		fmt.Printf("\n=== Pipeline Stage Analysis ===\n")
+		for stage, data := range bottlenecks {
+			fmt.Printf("%s: avg=%vms, p95=%vms, p99=%vms, throughput=%.2f ops/s\n",
+				stage,
+				data["avg_latency_ms"],
+				data["p95_latency_ms"],
+				data["p99_latency_ms"],
+				data["throughput_ops_sec"])
+		}
+	}
+}
+
+func runReportGeneration(title, format, filename string) {
+	generator := NewReportGenerator()
+
+	resultsFile := "BENCHMARK_RESULTS.md"
+	if _, err := os.Stat(resultsFile); os.IsNotExist(err) {
+		fmt.Printf("No benchmark results found. Run benchmarks first to generate data.\n")
+		fmt.Printf("Example: ./benchmark --multi-relay --relay-bin /path/to/relay\n")
+		os.Exit(1)
+	}
+
+	fmt.Printf("Generating %s report: %s\n", format, filename)
+
+	sampleData := []RelayBenchmarkData{
+		{
+			RelayType:         "khatru",
+			EventsPublished:   10000,
+			EventsPublishedMB: 15.2,
+			PublishDuration:   "12.5s",
+			PublishRate:       800.0,
+			PublishBandwidth:  1.22,
+			QueriesExecuted:   100,
+			EventsReturned:    8500,
+			QueryDuration:     "2.1s",
+			QueryRate:         47.6,
+			AvgEventsPerQuery: 85.0,
+			MemoryUsageMB:     245.6,
+			P50Latency:        "15ms",
+			P95Latency:        "45ms",
+			P99Latency:        "120ms",
+			StartupTime:       "1.2s",
+			Errors:            0,
+			Timestamp:         time.Now(),
+		},
+	}
+
+	generator.report.Title = title
+	generator.report.RelayData = sampleData
+	generator.analyzePerfomance()
+	generator.detectAnomalies()
+	generator.generateRecommendations()
+
+	ext := format
+	if format == "markdown" {
+		ext = "md"
+	}
+
+	outputFile := fmt.Sprintf("%s.%s", filename, ext)
+	if err := SaveReportToFile(outputFile, format, generator); chk.E(err) {
+		fmt.Fprintf(os.Stderr, "Failed to save report: %v\n", err)
+		os.Exit(1)
+	}
+
+	fmt.Printf("Report saved to: %s\n", outputFile)
+
+	if format == "markdown" {
+		fmt.Printf("\nTIP: View with: cat %s\n", outputFile)
+	}
+}
--- a/cmd/benchmark/query_profiler.go
+++ b/cmd/benchmark/query_profiler.go
@@ -0,0 +1,419 @@
+package main
+
+import (
+	"fmt"
+	"lukechampine.com/frand"
+	"orly.dev/pkg/encoders/event"
+	"orly.dev/pkg/encoders/filter"
+	"orly.dev/pkg/encoders/filters"
+	"orly.dev/pkg/encoders/kind"
+	"orly.dev/pkg/encoders/kinds"
+	"orly.dev/pkg/encoders/tag"
+	"orly.dev/pkg/encoders/tags"
+	"orly.dev/pkg/encoders/timestamp"
+	"orly.dev/pkg/protocol/ws"
+	"orly.dev/pkg/utils/chk"
+	"orly.dev/pkg/utils/context"
+	"runtime"
+	"sort"
+	"sync"
+	"sync/atomic"
+	"time"
+)
+
+type QueryMetrics struct {
+	Latencies      []time.Duration
+	TotalQueries   int64
+	FailedQueries  int64
+	EventsReturned int64
+	MemoryBefore   uint64
+	MemoryAfter    uint64
+	MemoryPeak     uint64
+	P50            time.Duration
+	P95            time.Duration
+	P99            time.Duration
+	Min            time.Duration
+	Max            time.Duration
+	Mean           time.Duration
+}
+
+type FilterType int
+
+const (
+	SimpleKindFilter FilterType = iota
+	TimeRangeFilter
+	AuthorFilter
+	TagFilter
+	ComplexFilter
+	IDFilter
+	PrefixFilter
+	MultiKindFilter
+	LargeTagSetFilter
+	DeepTimeRangeFilter
+)
+
+type QueryProfiler struct {
+	relay          string
+	subscriptions  map[string]*ws.Subscription
+	metrics        *QueryMetrics
+	mu             sync.RWMutex
+	memTicker      *time.Ticker
+	stopMemMonitor chan struct{}
+}
+
+func NewQueryProfiler(relayURL string) *QueryProfiler {
+	return &QueryProfiler{
+		relay:          relayURL,
+		subscriptions:  make(map[string]*ws.Subscription),
+		metrics:        &QueryMetrics{Latencies: make([]time.Duration, 0, 10000)},
+		stopMemMonitor: make(chan struct{}),
+	}
+}
+
+func (qp *QueryProfiler) ExecuteProfile(c context.T, iterations int, concurrency int) error {
+	qp.startMemoryMonitor()
+	defer qp.stopMemoryMonitor()
+
+	var m runtime.MemStats
+	runtime.ReadMemStats(&m)
+	qp.metrics.MemoryBefore = m.Alloc
+
+	filterTypes := []FilterType{
+		SimpleKindFilter,
+		TimeRangeFilter,
+		AuthorFilter,
+		TagFilter,
+		ComplexFilter,
+		IDFilter,
+		PrefixFilter,
+		MultiKindFilter,
+		LargeTagSetFilter,
+		DeepTimeRangeFilter,
+	}
+
+	var wg sync.WaitGroup
+	latencyChan := make(chan time.Duration, iterations)
+	errorChan := make(chan error, iterations)
+
+	for i := 0; i < concurrency; i++ {
+		wg.Add(1)
+		go func(workerID int) {
+			defer wg.Done()
+
+			relay, err := ws.RelayConnect(c, qp.relay)
+			if chk.E(err) {
+				errorChan <- fmt.Errorf("worker %d connection failed: %w", workerID, err)
+				return
+			}
+			defer relay.Close()
+
+			iterationsPerWorker := iterations / concurrency
+			if workerID == 0 {
+				iterationsPerWorker += iterations % concurrency
+			}
+
+			for j := 0; j < iterationsPerWorker; j++ {
+				filterType := filterTypes[frand.Intn(len(filterTypes))]
+				f := qp.generateFilter(filterType)
+
+				startTime := time.Now()
+				events, err := relay.QuerySync(c, f, ws.WithLabel(fmt.Sprintf("profiler-%d-%d", workerID, j)))
+				latency := time.Since(startTime)
+
+				if err != nil {
+					errorChan <- err
+					atomic.AddInt64(&qp.metrics.FailedQueries, 1)
+				} else {
+					latencyChan <- latency
+					atomic.AddInt64(&qp.metrics.EventsReturned, int64(len(events)))
+					atomic.AddInt64(&qp.metrics.TotalQueries, 1)
+				}
+			}
+		}(i)
+	}
+
+	wg.Wait()
+	close(latencyChan)
+	close(errorChan)
+
+	for latency := range latencyChan {
+		qp.mu.Lock()
+		qp.metrics.Latencies = append(qp.metrics.Latencies, latency)
+		qp.mu.Unlock()
+	}
+
+	errorCount := 0
+	for range errorChan {
+		errorCount++
+	}
+
+	runtime.ReadMemStats(&m)
+	qp.metrics.MemoryAfter = m.Alloc
+
+	qp.calculatePercentiles()
+
+	return nil
+}
+
+func (qp *QueryProfiler) generateFilter(filterType FilterType) *filter.F {
+	switch filterType {
+	case SimpleKindFilter:
+		limit := uint(100)
+		return &filter.F{
+			Kinds: kinds.New(kind.TextNote),
+			Limit: &limit,
+		}
+
+	case TimeRangeFilter:
+		now := timestamp.Now()
+		since := timestamp.New(now.I64() - 3600)
+		limit := uint(50)
+		return &filter.F{
+			Since: since,
+			Until: now,
+			Limit: &limit,
+		}
+
+	case AuthorFilter:
+		limit := uint(100)
+		authors := tag.New(frand.Bytes(32))
+		for i := 0; i < 2; i++ {
+			authors.Append(frand.Bytes(32))
+		}
+		return &filter.F{
+			Authors: authors,
+			Limit:   &limit,
+		}
+
+	case TagFilter:
+		limit := uint(50)
+		t := tags.New()
+		t.AppendUnique(tag.New([]byte("p"), frand.Bytes(32)))
+		t.AppendUnique(tag.New([]byte("e"), frand.Bytes(32)))
+		return &filter.F{
+			Tags:  t,
+			Limit: &limit,
+		}
+
+	case ComplexFilter:
+		now := timestamp.Now()
+		since := timestamp.New(now.I64() - 7200)
+		limit := uint(25)
+		authors := tag.New(frand.Bytes(32))
+		return &filter.F{
+			Kinds:   kinds.New(kind.TextNote, kind.Repost, kind.Reaction),
+			Authors: authors,
+			Since:   since,
+			Until:   now,
+			Limit:   &limit,
+		}
+
+	case IDFilter:
+		limit := uint(10)
+		ids := tag.New(frand.Bytes(32))
+		for i := 0; i < 4; i++ {
+			ids.Append(frand.Bytes(32))
+		}
+		return &filter.F{
+			Ids:   ids,
+			Limit: &limit,
+		}
+
+	case PrefixFilter:
+		limit := uint(100)
+		prefix := frand.Bytes(4)
+		return &filter.F{
+			Ids:   tag.New(prefix),
+			Limit: &limit,
+		}
+
+	case MultiKindFilter:
+		limit := uint(75)
+		return &filter.F{
+			Kinds: kinds.New(
+				kind.TextNote,
+				kind.SetMetadata,
+				kind.FollowList,
+				kind.Reaction,
+				kind.Repost,
+			),
+			Limit: &limit,
+		}
+
+	case LargeTagSetFilter:
+		limit := uint(20)
+		t := tags.New()
+		for i := 0; i < 10; i++ {
+			t.AppendUnique(tag.New([]byte("p"), frand.Bytes(32)))
+		}
+		return &filter.F{
+			Tags:  t,
+			Limit: &limit,
+		}
+
+	case DeepTimeRangeFilter:
+		now := timestamp.Now()
+		since := timestamp.New(now.I64() - 86400*30)
+		until := timestamp.New(now.I64() - 86400*20)
+		limit := uint(100)
+		return &filter.F{
+			Since: since,
+			Until: until,
+			Limit: &limit,
+		}
+
+	default:
+		limit := uint(100)
+		return &filter.F{
+			Kinds: kinds.New(kind.TextNote),
+			Limit: &limit,
+		}
+	}
+}
+
+func (qp *QueryProfiler) TestSubscriptionPerformance(c context.T, duration time.Duration, subscriptionCount int) error {
+	qp.startMemoryMonitor()
+	defer qp.stopMemoryMonitor()
+
+	relay, err := ws.RelayConnect(c, qp.relay)
+	if chk.E(err) {
+		return fmt.Errorf("connection failed: %w", err)
+	}
+	defer relay.Close()
+
+	var wg sync.WaitGroup
+	stopChan := make(chan struct{})
+
+	for i := 0; i < subscriptionCount; i++ {
+		wg.Add(1)
+		go func(subID int) {
+			defer wg.Done()
+
+			f := qp.generateFilter(FilterType(subID % 10))
+			label := fmt.Sprintf("sub-perf-%d", subID)
+
+			eventChan := make(chan *event.E, 100)
+			sub, err := relay.Subscribe(c, &filters.T{F: []*filter.F{f}}, ws.WithLabel(label))
+			if chk.E(err) {
+				return
+			}
+
+			go func() {
+				for {
+					select {
+					case ev := <-sub.Events:
+						eventChan <- ev
+						atomic.AddInt64(&qp.metrics.EventsReturned, 1)
+					case <-stopChan:
+						sub.Unsub()
+						return
+					}
+				}
+			}()
+
+			qp.mu.Lock()
+			qp.subscriptions[label] = sub
+			qp.mu.Unlock()
+		}(i)
+	}
+
+	time.Sleep(duration)
+	close(stopChan)
+	wg.Wait()
+
+	return nil
+}
+
+func (qp *QueryProfiler) startMemoryMonitor() {
+	qp.memTicker = time.NewTicker(100 * time.Millisecond)
+	go func() {
+		for {
+			select {
+			case <-qp.memTicker.C:
+				var m runtime.MemStats
+				runtime.ReadMemStats(&m)
+				qp.mu.Lock()
+				if m.Alloc > qp.metrics.MemoryPeak {
+					qp.metrics.MemoryPeak = m.Alloc
+				}
+				qp.mu.Unlock()
+			case <-qp.stopMemMonitor:
+				return
+			}
+		}
+	}()
+}
+
+func (qp *QueryProfiler) stopMemoryMonitor() {
+	if qp.memTicker != nil {
+		qp.memTicker.Stop()
+	}
+	close(qp.stopMemMonitor)
+}
+
+func (qp *QueryProfiler) calculatePercentiles() {
+	qp.mu.Lock()
+	defer qp.mu.Unlock()
+
+	if len(qp.metrics.Latencies) == 0 {
+		return
+	}
+
+	sort.Slice(qp.metrics.Latencies, func(i, j int) bool {
+		return qp.metrics.Latencies[i] < qp.metrics.Latencies[j]
+	})
+
+	qp.metrics.Min = qp.metrics.Latencies[0]
+	qp.metrics.Max = qp.metrics.Latencies[len(qp.metrics.Latencies)-1]
+
+	p50Index := len(qp.metrics.Latencies) * 50 / 100
+	p95Index := len(qp.metrics.Latencies) * 95 / 100
+	p99Index := len(qp.metrics.Latencies) * 99 / 100
+
+	if p50Index < len(qp.metrics.Latencies) {
+		qp.metrics.P50 = qp.metrics.Latencies[p50Index]
+	}
+	if p95Index < len(qp.metrics.Latencies) {
+		qp.metrics.P95 = qp.metrics.Latencies[p95Index]
+	}
+	if p99Index < len(qp.metrics.Latencies) {
+		qp.metrics.P99 = qp.metrics.Latencies[p99Index]
+	}
+
+	var total time.Duration
+	for _, latency := range qp.metrics.Latencies {
+		total += latency
+	}
+	qp.metrics.Mean = total / time.Duration(len(qp.metrics.Latencies))
+}
+
+func (qp *QueryProfiler) GetMetrics() *QueryMetrics {
+	qp.mu.RLock()
+	defer qp.mu.RUnlock()
+	return qp.metrics
+}
+
+func (qp *QueryProfiler) PrintReport() {
+	metrics := qp.GetMetrics()
+
+	fmt.Println("\n=== Query Performance Profile ===")
+	fmt.Printf("Total Queries: %d\n", metrics.TotalQueries)
+	fmt.Printf("Failed Queries: %d\n", metrics.FailedQueries)
+	fmt.Printf("Events Returned: %d\n", metrics.EventsReturned)
+
+	if metrics.TotalQueries > 0 {
+		fmt.Println("\nLatency Percentiles:")
+		fmt.Printf("  P50: %v\n", metrics.P50)
+		fmt.Printf("  P95: %v\n", metrics.P95)
+		fmt.Printf("  P99: %v\n", metrics.P99)
+		fmt.Printf("  Min: %v\n", metrics.Min)
+		fmt.Printf("  Max: %v\n", metrics.Max)
+		fmt.Printf("  Mean: %v\n", metrics.Mean)
+	}
+
+	fmt.Println("\nMemory Usage:")
+	fmt.Printf("  Before: %.2f MB\n", float64(metrics.MemoryBefore)/1024/1024)
+	fmt.Printf("  After: %.2f MB\n", float64(metrics.MemoryAfter)/1024/1024)
+	fmt.Printf("  Peak: %.2f MB\n", float64(metrics.MemoryPeak)/1024/1024)
+	fmt.Printf("  Delta: %.2f MB\n", float64(int64(metrics.MemoryAfter)-int64(metrics.MemoryBefore))/1024/1024)
+}
--- a/cmd/benchmark/relay_harness.go
+++ b/cmd/benchmark/relay_harness.go
@@ -0,0 +1,285 @@
+package main
+
+import (
+	"fmt"
+	"orly.dev/pkg/protocol/ws"
+	"orly.dev/pkg/utils/chk"
+	"orly.dev/pkg/utils/context"
+	"orly.dev/pkg/utils/log"
+	"os/exec"
+	"sync"
+	"time"
+)
+
+type RelayType int
+
+const (
+	Khatru RelayType = iota
+	Relayer
+	Strfry
+	RustNostr
+)
+
+func (r RelayType) String() string {
+	switch r {
+	case Khatru:
+		return "khatru"
+	case Relayer:
+		return "relayer"
+	case Strfry:
+		return "strfry"
+	case RustNostr:
+		return "rust-nostr"
+	default:
+		return "unknown"
+	}
+}
+
+type RelayConfig struct {
+	Type    RelayType
+	Binary  string
+	Args    []string
+	URL     string
+	DataDir string
+}
+
+type RelayInstance struct {
+	Config  RelayConfig
+	Process *exec.Cmd
+	Started time.Time
+	Errors  []error
+	mu      sync.RWMutex
+}
+
+type HarnessMetrics struct {
+	StartupTime  time.Duration
+	ShutdownTime time.Duration
+	Errors       int
+}
+
+type MultiRelayHarness struct {
+	relays  map[RelayType]*RelayInstance
+	metrics map[RelayType]*HarnessMetrics
+	mu      sync.RWMutex
+}
+
+func NewMultiRelayHarness() *MultiRelayHarness {
+	return &MultiRelayHarness{
+		relays:  make(map[RelayType]*RelayInstance),
+		metrics: make(map[RelayType]*HarnessMetrics),
+	}
+}
+
+func (h *MultiRelayHarness) AddRelay(config RelayConfig) error {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	instance := &RelayInstance{
+		Config: config,
+		Errors: make([]error, 0),
+	}
+
+	h.relays[config.Type] = instance
+	h.metrics[config.Type] = &HarnessMetrics{}
+
+	return nil
+}
+
+func (h *MultiRelayHarness) StartRelay(relayType RelayType) error {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	instance, exists := h.relays[relayType]
+	if !exists {
+		return fmt.Errorf("relay type %s not configured", relayType)
+	}
+
+	if instance.Process != nil {
+		return fmt.Errorf("relay %s already running", relayType)
+	}
+
+	startTime := time.Now()
+	cmd := exec.Command(instance.Config.Binary, instance.Config.Args...)
+
+	if err := cmd.Start(); chk.E(err) {
+		return fmt.Errorf("failed to start %s: %w", relayType, err)
+	}
+
+	instance.Process = cmd
+	instance.Started = startTime
+
+	time.Sleep(100 * time.Millisecond)
+
+	metrics := h.metrics[relayType]
+	metrics.StartupTime = time.Since(startTime)
+
+	return nil
+}
+
+func (h *MultiRelayHarness) StopRelay(relayType RelayType) error {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	instance, exists := h.relays[relayType]
+	if !exists {
+		return fmt.Errorf("relay type %s not configured", relayType)
+	}
+
+	if instance.Process == nil {
+		return nil
+	}
+
+	shutdownStart := time.Now()
+
+	if err := instance.Process.Process.Kill(); chk.E(err) {
+		return fmt.Errorf("failed to stop %s: %w", relayType, err)
+	}
+
+	instance.Process.Wait()
+	instance.Process = nil
+
+	metrics := h.metrics[relayType]
+	metrics.ShutdownTime = time.Since(shutdownStart)
+
+	return nil
+}
+
+func (h *MultiRelayHarness) ConnectToRelay(c context.T, relayType RelayType) error {
+	h.mu.RLock()
+	instance, exists := h.relays[relayType]
+	h.mu.RUnlock()
+
+	if !exists {
+		return fmt.Errorf("relay type %s not configured", relayType)
+	}
+
+	if instance.Process == nil {
+		return fmt.Errorf("relay %s not running", relayType)
+	}
+
+	_, err := ws.RelayConnect(c, instance.Config.URL)
+	if chk.E(err) {
+		h.mu.Lock()
+		h.metrics[relayType].Errors++
+		instance.Errors = append(instance.Errors, err)
+		h.mu.Unlock()
+		return fmt.Errorf("failed to connect to %s: %w", relayType, err)
+	}
+
+	return nil
+}
+
+func (h *MultiRelayHarness) StartAll() error {
+	h.mu.RLock()
+	relayTypes := make([]RelayType, 0, len(h.relays))
+	for relayType := range h.relays {
+		relayTypes = append(relayTypes, relayType)
+	}
+	h.mu.RUnlock()
+
+	var wg sync.WaitGroup
+	errChan := make(chan error, len(relayTypes))
+
+	for _, relayType := range relayTypes {
+		wg.Add(1)
+		go func(rt RelayType) {
+			defer wg.Done()
+			if err := h.StartRelay(rt); err != nil {
+				errChan <- fmt.Errorf("failed to start %s: %w", rt, err)
+			}
+		}(relayType)
+	}
+
+	wg.Wait()
+	close(errChan)
+
+	var errors []error
+	for err := range errChan {
+		errors = append(errors, err)
+	}
+
+	if len(errors) > 0 {
+		for _, err := range errors {
+			log.E.Ln(err)
+		}
+		return fmt.Errorf("failed to start %d relays", len(errors))
+	}
+
+	return nil
+}
+
+func (h *MultiRelayHarness) StopAll() error {
+	h.mu.RLock()
+	relayTypes := make([]RelayType, 0, len(h.relays))
+	for relayType := range h.relays {
+		relayTypes = append(relayTypes, relayType)
+	}
+	h.mu.RUnlock()
+
+	var wg sync.WaitGroup
+	errChan := make(chan error, len(relayTypes))
+
+	for _, relayType := range relayTypes {
+		wg.Add(1)
+		go func(rt RelayType) {
+			defer wg.Done()
+			if err := h.StopRelay(rt); err != nil {
+				errChan <- fmt.Errorf("failed to stop %s: %w", rt.String(), err)
+			}
+		}(relayType)
+	}
+
+	wg.Wait()
+	close(errChan)
+
+	var errors []error
+	for err := range errChan {
+		errors = append(errors, err)
+	}
+
+	if len(errors) > 0 {
+		for _, err := range errors {
+			log.E.Ln(err)
+		}
+		return fmt.Errorf("failed to stop %d relays", len(errors))
+	}
+
+	return nil
+}
+
+func (h *MultiRelayHarness) GetMetrics(relayType RelayType) *HarnessMetrics {
+	h.mu.RLock()
+	defer h.mu.RUnlock()
+	return h.metrics[relayType]
+}
+
+func (h *MultiRelayHarness) GetAllMetrics() map[RelayType]*HarnessMetrics {
+	h.mu.RLock()
+	defer h.mu.RUnlock()
+
+	result := make(map[RelayType]*HarnessMetrics)
+	for relayType, metrics := range h.metrics {
+		result[relayType] = metrics
+	}
+	return result
+}
+
+func (h *MultiRelayHarness) IsRunning(relayType RelayType) bool {
+	h.mu.RLock()
+	defer h.mu.RUnlock()
+
+	instance, exists := h.relays[relayType]
+	return exists && instance.Process != nil
+}
+
+func (h *MultiRelayHarness) GetErrors(relayType RelayType) []error {
+	h.mu.RLock()
+	defer h.mu.RUnlock()
+
+	instance, exists := h.relays[relayType]
+	if !exists {
+		return nil
+	}
+
+	return append([]error(nil), instance.Errors...)
+}
--- a/cmd/benchmark/report_generator.go
+++ b/cmd/benchmark/report_generator.go
@@ -0,0 +1,390 @@
+package main
+
+import (
+	"encoding/csv"
+	"encoding/json"
+	"fmt"
+	"io"
+	"math"
+	"os"
+	"sort"
+	"strings"
+	"time"
+)
+
+type RelayBenchmarkData struct {
+	RelayType         string    `json:"relay_type"`
+	EventsPublished   int64     `json:"events_published"`
+	EventsPublishedMB float64   `json:"events_published_mb"`
+	PublishDuration   string    `json:"publish_duration"`
+	PublishRate       float64   `json:"publish_rate"`
+	PublishBandwidth  float64   `json:"publish_bandwidth"`
+	QueriesExecuted   int64     `json:"queries_executed"`
+	EventsReturned    int64     `json:"events_returned"`
+	QueryDuration     string    `json:"query_duration"`
+	QueryRate         float64   `json:"query_rate"`
+	AvgEventsPerQuery float64   `json:"avg_events_per_query"`
+	StartupTime       string    `json:"startup_time,omitempty"`
+	ShutdownTime      string    `json:"shutdown_time,omitempty"`
+	Errors            int64     `json:"errors,omitempty"`
+	MemoryUsageMB     float64   `json:"memory_usage_mb,omitempty"`
+	P50Latency        string    `json:"p50_latency,omitempty"`
+	P95Latency        string    `json:"p95_latency,omitempty"`
+	P99Latency        string    `json:"p99_latency,omitempty"`
+	Timestamp         time.Time `json:"timestamp"`
+}
+
+type ComparisonReport struct {
+	Title           string               `json:"title"`
+	GeneratedAt     time.Time            `json:"generated_at"`
+	RelayData       []RelayBenchmarkData `json:"relay_data"`
+	WinnerPublish   string               `json:"winner_publish"`
+	WinnerQuery     string               `json:"winner_query"`
+	Anomalies       []string             `json:"anomalies"`
+	Recommendations []string             `json:"recommendations"`
+}
+
+type ReportGenerator struct {
+	data   []RelayBenchmarkData
+	report ComparisonReport
+}
+
+func NewReportGenerator() *ReportGenerator {
+	return &ReportGenerator{
+		data: make([]RelayBenchmarkData, 0),
+		report: ComparisonReport{
+			GeneratedAt:     time.Now(),
+			Anomalies:       make([]string, 0),
+			Recommendations: make([]string, 0),
+		},
+	}
+}
+
+func (rg *ReportGenerator) AddRelayData(relayType string, results *BenchmarkResults, metrics *HarnessMetrics, profilerMetrics *QueryMetrics) {
+	data := RelayBenchmarkData{
+		RelayType:         relayType,
+		EventsPublished:   results.EventsPublished,
+		EventsPublishedMB: float64(results.EventsPublishedBytes) / 1024 / 1024,
+		PublishDuration:   results.PublishDuration.String(),
+		PublishRate:       results.PublishRate,
+		PublishBandwidth:  results.PublishBandwidth,
+		QueriesExecuted:   results.QueriesExecuted,
+		EventsReturned:    results.EventsReturned,
+		QueryDuration:     results.QueryDuration.String(),
+		QueryRate:         results.QueryRate,
+		Timestamp:         time.Now(),
+	}
+
+	if results.QueriesExecuted > 0 {
+		data.AvgEventsPerQuery = float64(results.EventsReturned) / float64(results.QueriesExecuted)
+	}
+
+	if metrics != nil {
+		data.StartupTime = metrics.StartupTime.String()
+		data.ShutdownTime = metrics.ShutdownTime.String()
+		data.Errors = int64(metrics.Errors)
+	}
+
+	if profilerMetrics != nil {
+		data.MemoryUsageMB = float64(profilerMetrics.MemoryPeak) / 1024 / 1024
+		data.P50Latency = profilerMetrics.P50.String()
+		data.P95Latency = profilerMetrics.P95.String()
+		data.P99Latency = profilerMetrics.P99.String()
+	}
+
+	rg.data = append(rg.data, data)
+}
+
+func (rg *ReportGenerator) GenerateReport(title string) {
+	rg.report.Title = title
+	rg.report.RelayData = rg.data
+	rg.analyzePerfomance()
+	rg.detectAnomalies()
+	rg.generateRecommendations()
+}
+
+func (rg *ReportGenerator) analyzePerfomance() {
+	if len(rg.data) == 0 {
+		return
+	}
+
+	var bestPublishRate float64
+	var bestQueryRate float64
+	bestPublishRelay := ""
+	bestQueryRelay := ""
+
+	for _, data := range rg.data {
+		if data.PublishRate > bestPublishRate {
+			bestPublishRate = data.PublishRate
+			bestPublishRelay = data.RelayType
+		}
+		if data.QueryRate > bestQueryRate {
+			bestQueryRate = data.QueryRate
+			bestQueryRelay = data.RelayType
+		}
+	}
+
+	rg.report.WinnerPublish = bestPublishRelay
+	rg.report.WinnerQuery = bestQueryRelay
+}
+
+func (rg *ReportGenerator) detectAnomalies() {
+	if len(rg.data) < 2 {
+		return
+	}
+
+	publishRates := make([]float64, len(rg.data))
+	queryRates := make([]float64, len(rg.data))
+
+	for i, data := range rg.data {
+		publishRates[i] = data.PublishRate
+		queryRates[i] = data.QueryRate
+	}
+
+	publishMean := mean(publishRates)
+	publishStdDev := stdDev(publishRates, publishMean)
+	queryMean := mean(queryRates)
+	queryStdDev := stdDev(queryRates, queryMean)
+
+	for _, data := range rg.data {
+		if math.Abs(data.PublishRate-publishMean) > 2*publishStdDev {
+			anomaly := fmt.Sprintf("%s publish rate (%.2f) deviates significantly from average (%.2f)",
+				data.RelayType, data.PublishRate, publishMean)
+			rg.report.Anomalies = append(rg.report.Anomalies, anomaly)
+		}
+
+		if math.Abs(data.QueryRate-queryMean) > 2*queryStdDev {
+			anomaly := fmt.Sprintf("%s query rate (%.2f) deviates significantly from average (%.2f)",
+				data.RelayType, data.QueryRate, queryMean)
+			rg.report.Anomalies = append(rg.report.Anomalies, anomaly)
+		}
+
+		if data.Errors > 0 {
+			anomaly := fmt.Sprintf("%s had %d errors during benchmark", data.RelayType, data.Errors)
+			rg.report.Anomalies = append(rg.report.Anomalies, anomaly)
+		}
+	}
+}
+
+func (rg *ReportGenerator) generateRecommendations() {
+	if len(rg.data) == 0 {
+		return
+	}
+
+	sort.Slice(rg.data, func(i, j int) bool {
+		return rg.data[i].PublishRate > rg.data[j].PublishRate
+	})
+
+	if len(rg.data) > 1 {
+		best := rg.data[0]
+		worst := rg.data[len(rg.data)-1]
+
+		improvement := (best.PublishRate - worst.PublishRate) / worst.PublishRate * 100
+		if improvement > 20 {
+			rec := fmt.Sprintf("Consider using %s for high-throughput scenarios (%.1f%% faster than %s)",
+				best.RelayType, improvement, worst.RelayType)
+			rg.report.Recommendations = append(rg.report.Recommendations, rec)
+		}
+	}
+
+	for _, data := range rg.data {
+		if data.MemoryUsageMB > 500 {
+			rec := fmt.Sprintf("%s shows high memory usage (%.1f MB) - monitor for memory leaks",
+				data.RelayType, data.MemoryUsageMB)
+			rg.report.Recommendations = append(rg.report.Recommendations, rec)
+		}
+	}
+}
+
+func (rg *ReportGenerator) OutputMarkdown(writer io.Writer) error {
+	fmt.Fprintf(writer, "# %s\n\n", rg.report.Title)
+	fmt.Fprintf(writer, "Generated: %s\n\n", rg.report.GeneratedAt.Format(time.RFC3339))
+
+	fmt.Fprintf(writer, "## Performance Summary\n\n")
+	fmt.Fprintf(writer, "| Relay | Publish Rate | Publish BW | Query Rate | Avg Events/Query | Memory (MB) |\n")
+	fmt.Fprintf(writer, "|-------|--------------|------------|------------|------------------|-------------|\n")
+
+	for _, data := range rg.data {
+		fmt.Fprintf(writer, "| %s | %.2f/s | %.2f MB/s | %.2f/s | %.2f | %.1f |\n",
+			data.RelayType, data.PublishRate, data.PublishBandwidth,
+			data.QueryRate, data.AvgEventsPerQuery, data.MemoryUsageMB)
+	}
+
+	if rg.report.WinnerPublish != "" || rg.report.WinnerQuery != "" {
+		fmt.Fprintf(writer, "\n## Winners\n\n")
+		if rg.report.WinnerPublish != "" {
+			fmt.Fprintf(writer, "- **Best Publisher**: %s\n", rg.report.WinnerPublish)
+		}
+		if rg.report.WinnerQuery != "" {
+			fmt.Fprintf(writer, "- **Best Query Engine**: %s\n", rg.report.WinnerQuery)
+		}
+	}
+
+	if len(rg.report.Anomalies) > 0 {
+		fmt.Fprintf(writer, "\n## Anomalies\n\n")
+		for _, anomaly := range rg.report.Anomalies {
+			fmt.Fprintf(writer, "- %s\n", anomaly)
+		}
+	}
+
+	if len(rg.report.Recommendations) > 0 {
+		fmt.Fprintf(writer, "\n## Recommendations\n\n")
+		for _, rec := range rg.report.Recommendations {
+			fmt.Fprintf(writer, "- %s\n", rec)
+		}
+	}
+
+	fmt.Fprintf(writer, "\n## Detailed Results\n\n")
+	for _, data := range rg.data {
+		fmt.Fprintf(writer, "### %s\n\n", data.RelayType)
+		fmt.Fprintf(writer, "- Events Published: %d (%.2f MB)\n", data.EventsPublished, data.EventsPublishedMB)
+		fmt.Fprintf(writer, "- Publish Duration: %s\n", data.PublishDuration)
+		fmt.Fprintf(writer, "- Queries Executed: %d\n", data.QueriesExecuted)
+		fmt.Fprintf(writer, "- Query Duration: %s\n", data.QueryDuration)
+		if data.P50Latency != "" {
+			fmt.Fprintf(writer, "- Latency P50/P95/P99: %s/%s/%s\n", data.P50Latency, data.P95Latency, data.P99Latency)
+		}
+		if data.StartupTime != "" {
+			fmt.Fprintf(writer, "- Startup Time: %s\n", data.StartupTime)
+		}
+		fmt.Fprintf(writer, "\n")
+	}
+
+	return nil
+}
+
+func (rg *ReportGenerator) OutputJSON(writer io.Writer) error {
+	encoder := json.NewEncoder(writer)
+	encoder.SetIndent("", "  ")
+	return encoder.Encode(rg.report)
+}
+
+func (rg *ReportGenerator) OutputCSV(writer io.Writer) error {
+	w := csv.NewWriter(writer)
+	defer w.Flush()
+
+	header := []string{
+		"relay_type", "events_published", "events_published_mb", "publish_duration",
+		"publish_rate", "publish_bandwidth", "queries_executed", "events_returned",
+		"query_duration", "query_rate", "avg_events_per_query", "memory_usage_mb",
+		"p50_latency", "p95_latency", "p99_latency", "startup_time", "errors",
+	}
+
+	if err := w.Write(header); err != nil {
+		return err
+	}
+
+	for _, data := range rg.data {
+		row := []string{
+			data.RelayType,
+			fmt.Sprintf("%d", data.EventsPublished),
+			fmt.Sprintf("%.2f", data.EventsPublishedMB),
+			data.PublishDuration,
+			fmt.Sprintf("%.2f", data.PublishRate),
+			fmt.Sprintf("%.2f", data.PublishBandwidth),
+			fmt.Sprintf("%d", data.QueriesExecuted),
+			fmt.Sprintf("%d", data.EventsReturned),
+			data.QueryDuration,
+			fmt.Sprintf("%.2f", data.QueryRate),
+			fmt.Sprintf("%.2f", data.AvgEventsPerQuery),
+			fmt.Sprintf("%.1f", data.MemoryUsageMB),
+			data.P50Latency,
+			data.P95Latency,
+			data.P99Latency,
+			data.StartupTime,
+			fmt.Sprintf("%d", data.Errors),
+		}
+
+		if err := w.Write(row); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (rg *ReportGenerator) GenerateThroughputCurve() []ThroughputPoint {
+	points := make([]ThroughputPoint, 0)
+
+	for _, data := range rg.data {
+		point := ThroughputPoint{
+			RelayType:  data.RelayType,
+			Throughput: data.PublishRate,
+			Latency:    parseLatency(data.P95Latency),
+		}
+		points = append(points, point)
+	}
+
+	sort.Slice(points, func(i, j int) bool {
+		return points[i].Throughput < points[j].Throughput
+	})
+
+	return points
+}
+
+type ThroughputPoint struct {
+	RelayType  string  `json:"relay_type"`
+	Throughput float64 `json:"throughput"`
+	Latency    float64 `json:"latency_ms"`
+}
+
+func parseLatency(latencyStr string) float64 {
+	if latencyStr == "" {
+		return 0
+	}
+
+	latencyStr = strings.TrimSuffix(latencyStr, "ms")
+	latencyStr = strings.TrimSuffix(latencyStr, "µs")
+	latencyStr = strings.TrimSuffix(latencyStr, "ns")
+
+	if dur, err := time.ParseDuration(latencyStr); err == nil {
+		return float64(dur.Nanoseconds()) / 1e6
+	}
+
+	return 0
+}
+
+func mean(values []float64) float64 {
+	if len(values) == 0 {
+		return 0
+	}
+
+	sum := 0.0
+	for _, v := range values {
+		sum += v
+	}
+	return sum / float64(len(values))
+}
+
+func stdDev(values []float64, mean float64) float64 {
+	if len(values) <= 1 {
+		return 0
+	}
+
+	variance := 0.0
+	for _, v := range values {
+		variance += math.Pow(v-mean, 2)
+	}
+	variance /= float64(len(values) - 1)
+
+	return math.Sqrt(variance)
+}
+
+func SaveReportToFile(filename, format string, generator *ReportGenerator) error {
+	file, err := os.Create(filename)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+
+	switch format {
+	case "json":
+		return generator.OutputJSON(file)
+	case "csv":
+		return generator.OutputCSV(file)
+	case "markdown", "md":
+		return generator.OutputMarkdown(file)
+	default:
+		return fmt.Errorf("unsupported format: %s", format)
+	}
+}
--- a/cmd/benchmark/run_all_benchmarks.sh
+++ b/cmd/benchmark/run_all_benchmarks.sh
@@ -0,0 +1,192 @@
+#!/bin/bash
+
+BENCHMARK_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+RELAY_DIR="/tmp/relay-benchmark"
+RESULTS_FILE="$BENCHMARK_DIR/BENCHMARK_RESULTS.md"
+
+cd "$BENCHMARK_DIR"
+
+echo "=== Starting Relay Benchmark Suite ===" | tee "$RESULTS_FILE"
+echo "Date: $(date)" | tee -a "$RESULTS_FILE"
+echo "" | tee -a "$RESULTS_FILE"
+
+# Function to start a relay and wait for it to be ready
+start_relay() {
+    local name=$1
+    local cmd=$2
+    local port=$3
+    
+    echo "Starting $name on port $port..."
+    $cmd &
+    local pid=$!
+    
+    # Wait for relay to be ready
+    sleep 3
+    
+    # Check if process is still running
+    if ! kill -0 $pid 2>/dev/null; then
+        echo "Failed to start $name"
+        return 1
+    fi
+    
+    echo "$name started with PID $pid"
+    return $pid
+}
+
+# Function to run benchmark and capture results
+run_benchmark() {
+    local relay_name=$1
+    local relay_url=$2
+    
+    echo "" | tee -a "$RESULTS_FILE"
+    echo "## Benchmarking $relay_name" | tee -a "$RESULTS_FILE"
+    echo "URL: $relay_url" | tee -a "$RESULTS_FILE"
+    echo "" | tee -a "$RESULTS_FILE"
+    
+    # Run standard benchmark
+    echo "### Standard Benchmark" | tee -a "$RESULTS_FILE"
+    ./benchmark --relay "$relay_url" --events 5000 --queries 100 --concurrency 10 --size 1024 2>&1 | tee -a "$RESULTS_FILE"
+    
+    # Run query profiling
+    echo "" | tee -a "$RESULTS_FILE"
+    echo "### Query Profiling" | tee -a "$RESULTS_FILE"
+    ./benchmark --relay "$relay_url" --profile --queries 500 --concurrency 5 2>&1 | tee -a "$RESULTS_FILE"
+    
+    # Run timing instrumentation
+    echo "" | tee -a "$RESULTS_FILE"
+    echo "### Timing Instrumentation" | tee -a "$RESULTS_FILE"
+    ./benchmark --relay "$relay_url" --timing --timing-events 100 2>&1 | tee -a "$RESULTS_FILE"
+    
+    # Run load simulation
+    echo "" | tee -a "$RESULTS_FILE"
+    echo "### Load Simulation (Spike Pattern)" | tee -a "$RESULTS_FILE"
+    ./benchmark --relay "$relay_url" --load --load-pattern spike --load-duration 30s --load-base 50 --load-peak 200 2>&1 | tee -a "$RESULTS_FILE"
+    
+    echo "" | tee -a "$RESULTS_FILE"
+    echo "---" | tee -a "$RESULTS_FILE"
+}
+
+# Test 1: Khatru
+echo "=== Testing Khatru ===" | tee -a "$RESULTS_FILE"
+cd "$RELAY_DIR"
+if [ -f "khatru/examples/basic-sqlite3/khatru-relay" ]; then
+    ./khatru/examples/basic-sqlite3/khatru-relay &
+    KHATRU_PID=$!
+    sleep 3
+    
+    if kill -0 $KHATRU_PID 2>/dev/null; then
+        run_benchmark "Khatru" "ws://localhost:7447"
+        kill $KHATRU_PID 2>/dev/null
+        wait $KHATRU_PID 2>/dev/null
+    else
+        echo "Khatru failed to start" | tee -a "$RESULTS_FILE"
+    fi
+else
+    echo "Khatru binary not found" | tee -a "$RESULTS_FILE"
+fi
+
+# Test 2: Strfry
+echo "=== Testing Strfry ===" | tee -a "$RESULTS_FILE"
+if [ -f "strfry/strfry" ]; then
+    # Create minimal strfry config
+    cat > /tmp/strfry.conf <<EOF
+relay {
+    bind = "127.0.0.1"
+    port = 7447
+    nofiles = 0
+    realIpHeader = ""
+    info {
+        name = "strfry test"
+        description = "benchmark test relay"
+    }
+}
+events {
+    maxEventSize = 65536
+    rejectEventsNewerThanSeconds = 900
+    rejectEventsOlderThanSeconds = 94608000
+    rejectEphemeralEventsOlderThanSeconds = 60
+    rejectFutureEventsSeconds = 900
+}
+db {
+    path = "/tmp/strfry-db"
+}
+EOF
+    
+    rm -rf /tmp/strfry-db
+    ./strfry/strfry --config /tmp/strfry.conf relay &
+    STRFRY_PID=$!
+    sleep 5
+    
+    if kill -0 $STRFRY_PID 2>/dev/null; then
+        run_benchmark "Strfry" "ws://localhost:7447"
+        kill $STRFRY_PID 2>/dev/null
+        wait $STRFRY_PID 2>/dev/null
+    else
+        echo "Strfry failed to start" | tee -a "$RESULTS_FILE"
+    fi
+else
+    echo "Strfry binary not found" | tee -a "$RESULTS_FILE"
+fi
+
+# Test 3: Relayer
+echo "=== Testing Relayer ===" | tee -a "$RESULTS_FILE"
+if [ -f "relayer/examples/basic/relayer-bin" ]; then
+    # Start PostgreSQL container for relayer
+    docker run -d --name relay-postgres-$$ -e POSTGRES_PASSWORD=postgres \
+        -e POSTGRES_DB=nostr -p 5433:5432 postgres:15-alpine
+    
+    sleep 5
+    
+    # Start relayer
+    cd "$RELAY_DIR/relayer/examples/basic"
+    POSTGRESQL_DATABASE="postgres://postgres:postgres@localhost:5433/nostr?sslmode=disable" \
+        ./relayer-bin &
+    RELAYER_PID=$!
+    sleep 3
+    
+    if kill -0 $RELAYER_PID 2>/dev/null; then
+        run_benchmark "Relayer" "ws://localhost:7447"
+        kill $RELAYER_PID 2>/dev/null
+        wait $RELAYER_PID 2>/dev/null
+    else
+        echo "Relayer failed to start" | tee -a "$RESULTS_FILE"
+    fi
+    
+    # Clean up PostgreSQL container
+    docker stop relay-postgres-$$ && docker rm relay-postgres-$$
+    cd "$RELAY_DIR"
+else
+    echo "Relayer binary not found" | tee -a "$RESULTS_FILE"
+fi
+
+# Test 4: Orly
+echo "=== Testing Orly ===" | tee -a "$RESULTS_FILE"
+if [ -f "orly-relay" ]; then
+    # Start Orly on different port to avoid conflicts
+    ORLY_PORT=7448 ORLY_DATA_DIR=/tmp/orly-benchmark ORLY_SPIDER_TYPE=none ./orly-relay &
+    ORLY_PID=$!
+    sleep 3
+    
+    if kill -0 $ORLY_PID 2>/dev/null; then
+        run_benchmark "Orly" "ws://localhost:7448"
+        kill $ORLY_PID 2>/dev/null
+        wait $ORLY_PID 2>/dev/null
+    else
+        echo "Orly failed to start" | tee -a "$RESULTS_FILE"
+    fi
+    
+    # Clean up Orly data
+    rm -rf /tmp/orly-benchmark
+else
+    echo "Orly binary not found" | tee -a "$RESULTS_FILE"
+fi
+
+# Generate comparative report
+echo "" | tee -a "$RESULTS_FILE"
+echo "=== Generating Comparative Report ===" | tee -a "$RESULTS_FILE"
+cd "$BENCHMARK_DIR"
+./benchmark --report --report-format markdown --report-file final_comparison 2>&1 | tee -a "$RESULTS_FILE"
+
+echo "" | tee -a "$RESULTS_FILE"
+echo "=== Benchmark Suite Complete ===" | tee -a "$RESULTS_FILE"
+echo "Results saved to: $RESULTS_FILE" | tee -a "$RESULTS_FILE"
--- a/cmd/benchmark/run_benchmark.sh
+++ b/cmd/benchmark/run_benchmark.sh
@@ -1,82 +0,0 @@
-#!/bin/bash
-
-# Simple Nostr Relay Benchmark Script
-
-# Default values
-RELAY_URL="ws://localhost:7447"
-EVENTS=10000
-SIZE=1024
-CONCURRENCY=10
-QUERIES=100
-QUERY_LIMIT=100
-
-# Parse command line arguments
-while [[ $# -gt 0 ]]; do
-  case $1 in
-    --relay)
-      RELAY_URL="$2"
-      shift 2
-      ;;
-    --events)
-      EVENTS="$2"
-      shift 2
-      ;;
-    --size)
-      SIZE="$2"
-      shift 2
-      ;;
-    --concurrency)
-      CONCURRENCY="$2"
-      shift 2
-      ;;
-    --queries)
-      QUERIES="$2"
-      shift 2
-      ;;
-    --query-limit)
-      QUERY_LIMIT="$2"
-      shift 2
-      ;;
-    --skip-publish)
-      SKIP_PUBLISH="-skip-publish"
-      shift
-      ;;
-    --skip-query)
-      SKIP_QUERY="-skip-query"
-      shift
-      ;;
-    *)
-      echo "Unknown option: $1"
-      echo "Usage: $0 [--relay URL] [--events N] [--size N] [--concurrency N] [--queries N] [--query-limit N] [--skip-publish] [--skip-query]"
-      exit 1
-      ;;
-  esac
-done
-
-# Build the benchmark tool if it doesn't exist
-if [ ! -f benchmark-simple ]; then
-  echo "Building benchmark tool..."
-  go build -o benchmark-simple ./benchmark_simple.go
-  if [ $? -ne 0 ]; then
-    echo "Failed to build benchmark tool"
-    exit 1
-  fi
-fi
-
-# Run the benchmark
-echo "Running Nostr relay benchmark..."
-echo "Relay: $RELAY_URL"
-echo "Events: $EVENTS (size: $SIZE bytes)"
-echo "Concurrency: $CONCURRENCY"
-echo "Queries: $QUERIES (limit: $QUERY_LIMIT)"
-echo ""
-
-./benchmark-simple \
-  -relay "$RELAY_URL" \
-  -events $EVENTS \
-  -size $SIZE \
-  -concurrency $CONCURRENCY \
-  -queries $QUERIES \
-  -query-limit $QUERY_LIMIT \
-  $SKIP_PUBLISH \
-  $SKIP_QUERY
--- a/cmd/benchmark/setup_relays.sh
+++ b/cmd/benchmark/setup_relays.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+
+# Store script directory before changing directories
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+WORK_DIR="/tmp/relay-benchmark"
+mkdir -p "$WORK_DIR"
+cd "$WORK_DIR"
+
+echo "=== Setting up relay implementations ==="
+
+# Clone khatru
+echo "Cloning khatru..."
+if [ ! -d "khatru" ]; then
+    git clone https://github.com/fiatjaf/khatru.git
+fi
+
+# Clone relayer
+echo "Cloning relayer..."
+if [ ! -d "relayer" ]; then
+    git clone https://github.com/fiatjaf/relayer.git
+fi
+
+# Clone strfry
+echo "Cloning strfry..."
+if [ ! -d "strfry" ]; then
+    git clone https://github.com/hoytech/strfry.git
+fi
+
+# Build khatru example
+echo "Building khatru..."
+cd "$WORK_DIR/khatru"
+if [ -f "examples/basic-sqlite3/main.go" ]; then
+    cd examples/basic-sqlite3
+    go build -o khatru-relay
+    echo "Khatru built: $WORK_DIR/khatru/examples/basic-sqlite3/khatru-relay"
+else
+    echo "No basic-sqlite3 example found in khatru"
+fi
+
+# Build relayer
+echo "Building relayer..."
+cd "$WORK_DIR/relayer"
+if [ -f "examples/basic/main.go" ]; then
+    cd examples/basic
+    go build -o relayer-bin
+    echo "Relayer built: $WORK_DIR/relayer/examples/basic/relayer-bin"
+else
+    echo "Could not find relayer basic example"
+fi
+
+# Build strfry (requires cmake and dependencies)
+echo "Building strfry..."
+cd "$WORK_DIR/strfry"
+if command -v cmake &> /dev/null; then
+    git submodule update --init
+    make setup
+    make -j4
+    echo "Strfry built: $WORK_DIR/strfry/strfry"
+else
+    echo "cmake not found, skipping strfry build"
+fi
+
+# Build Orly 
+echo "Building Orly..."
+# Find Orly project root by looking for both .git and main.go in same directory
+ORLY_ROOT="$SCRIPT_DIR"
+while [[ "$ORLY_ROOT" != "/" ]]; do
+    if [[ -d "$ORLY_ROOT/.git" && -f "$ORLY_ROOT/main.go" ]]; then
+        break
+    fi
+    ORLY_ROOT="$(dirname "$ORLY_ROOT")"
+done
+
+echo "Building Orly at: $ORLY_ROOT"
+if [[ -f "$ORLY_ROOT/main.go" && -d "$ORLY_ROOT/.git" ]]; then
+    CURRENT_DIR="$(pwd)"
+    cd "$ORLY_ROOT"
+    CGO_LDFLAGS="-L/usr/local/lib" PKG_CONFIG_PATH="/usr/local/lib/pkgconfig" go build -o "$WORK_DIR/orly-relay" .
+    echo "Orly built: $WORK_DIR/orly-relay"
+    cd "$CURRENT_DIR"
+else
+    echo "Could not find Orly project root with both .git and main.go"
+    echo "Searched up from: $SCRIPT_DIR"
+fi
+
+echo "=== Setup complete ==="
+ls -la "$WORK_DIR"
--- a/cmd/benchmark/simple_event.go
+++ b/cmd/benchmark/simple_event.go
@@ -0,0 +1,59 @@
+package main
+
+import (
+	"fmt"
+
+	"lukechampine.com/frand"
+	"orly.dev/pkg/encoders/event"
+	"orly.dev/pkg/encoders/kind"
+	"orly.dev/pkg/encoders/tags"
+	"orly.dev/pkg/encoders/timestamp"
+	"orly.dev/pkg/utils/chk"
+)
+
+func generateSimpleEvent(signer *testSigner, contentSize int) *event.E {
+	content := generateContent(contentSize)
+	
+	ev := &event.E{
+		Kind:      kind.TextNote,
+		Tags:      tags.New(),
+		Content:   []byte(content),
+		CreatedAt: timestamp.Now(),
+		Pubkey:    signer.Pub(),
+	}
+	
+	if err := ev.Sign(signer); chk.E(err) {
+		panic(fmt.Sprintf("failed to sign event: %v", err))
+	}
+	
+	return ev
+}
+
+func generateContent(size int) string {
+	words := []string{
+		"the", "be", "to", "of", "and", "a", "in", "that", "have", "I",
+		"it", "for", "not", "on", "with", "he", "as", "you", "do", "at",
+		"this", "but", "his", "by", "from", "they", "we", "say", "her", "she",
+		"or", "an", "will", "my", "one", "all", "would", "there", "their", "what",
+		"so", "up", "out", "if", "about", "who", "get", "which", "go", "me",
+		"when", "make", "can", "like", "time", "no", "just", "him", "know", "take",
+		"people", "into", "year", "your", "good", "some", "could", "them", "see", "other",
+		"than", "then", "now", "look", "only", "come", "its", "over", "think", "also",
+		"back", "after", "use", "two", "how", "our", "work", "first", "well", "way",
+		"even", "new", "want", "because", "any", "these", "give", "day", "most", "us",
+	}
+	
+	result := ""
+	for len(result) < size {
+		if len(result) > 0 {
+			result += " "
+		}
+		result += words[frand.Intn(len(words))]
+	}
+	
+	if len(result) > size {
+		result = result[:size]
+	}
+	
+	return result
+}
--- a/cmd/benchmark/test_signer.go
+++ b/cmd/benchmark/test_signer.go
@@ -1,63 +1,21 @@
 package main

 import (
-	"lukechampine.com/frand"
+	"orly.dev/pkg/crypto/p256k"
 	"orly.dev/pkg/interfaces/signer"
+	"orly.dev/pkg/utils/chk"
 )

-// testSigner is a simple signer implementation for benchmarking
 type testSigner struct {
-	pub []byte
-	sec []byte
+	*p256k.Signer
 }

 func newTestSigner() *testSigner {
-	return &testSigner{
-		pub: frand.Bytes(32),
-		sec: frand.Bytes(32),
+	s := &p256k.Signer{}
+	if err := s.Generate(); chk.E(err) {
+		panic(err)
 	}
-}
-
-func (s *testSigner) Pub() []byte {
-	return s.pub
-}
-
-func (s *testSigner) Sec() []byte {
-	return s.sec
-}
-
-func (s *testSigner) Sign(msg []byte) ([]byte, error) {
-	return frand.Bytes(64), nil
-}
-
-func (s *testSigner) Verify(msg, sig []byte) (bool, error) {
-	return true, nil
-}
-
-func (s *testSigner) InitSec(sec []byte) error {
-	s.sec = sec
-	s.pub = frand.Bytes(32)
-	return nil
-}
-
-func (s *testSigner) InitPub(pub []byte) error {
-	s.pub = pub
-	return nil
-}
-
-func (s *testSigner) Zero() {
-	for i := range s.sec {
-		s.sec[i] = 0
-	}
-}
-
-
-func (s *testSigner) ECDH(pubkey []byte) ([]byte, error) {
-	return frand.Bytes(32), nil
-}
-
-func (s *testSigner) Generate() error {
-	return nil
+	return &testSigner{Signer: s}
 }

 var _ signer.I = (*testSigner)(nil)
--- a/cmd/benchmark/timing_instrumentation.go
+++ b/cmd/benchmark/timing_instrumentation.go
@@ -0,0 +1,469 @@
+package main
+
+import (
+	"fmt"
+	"orly.dev/pkg/encoders/event"
+	"orly.dev/pkg/encoders/filter"
+	"orly.dev/pkg/encoders/filters"
+	"orly.dev/pkg/encoders/tag"
+	"orly.dev/pkg/protocol/ws"
+	"orly.dev/pkg/utils/context"
+	"orly.dev/pkg/utils/log"
+	"sync"
+	"sync/atomic"
+	"time"
+)
+
+type EventLifecycle struct {
+	EventID         string
+	PublishStart    time.Time
+	PublishEnd      time.Time
+	StoreStart      time.Time
+	StoreEnd        time.Time
+	QueryStart      time.Time
+	QueryEnd        time.Time
+	ReturnStart     time.Time
+	ReturnEnd       time.Time
+	TotalDuration   time.Duration
+	PublishLatency  time.Duration
+	StoreLatency    time.Duration
+	QueryLatency    time.Duration
+	ReturnLatency   time.Duration
+	WSFrameOverhead time.Duration
+}
+
+type WriteAmplification struct {
+	InputBytes    int64
+	WrittenBytes  int64
+	IndexBytes    int64
+	TotalIOOps    int64
+	Amplification float64
+	IndexOverhead float64
+}
+
+type FrameTiming struct {
+	FrameType        string
+	SendTime         time.Time
+	AckTime          time.Time
+	Latency          time.Duration
+	PayloadSize      int
+	CompressedSize   int
+	CompressionRatio float64
+}
+
+type PipelineBottleneck struct {
+	Stage         string
+	AvgLatency    time.Duration
+	MaxLatency    time.Duration
+	MinLatency    time.Duration
+	P95Latency    time.Duration
+	P99Latency    time.Duration
+	Throughput    float64
+	QueueDepth    int
+	DroppedEvents int64
+}
+
+type TimingInstrumentation struct {
+	relay           *ws.Client
+	lifecycles      map[string]*EventLifecycle
+	framings        []FrameTiming
+	amplifications  []WriteAmplification
+	bottlenecks     map[string]*PipelineBottleneck
+	mu              sync.RWMutex
+	trackedEvents   atomic.Int64
+	measurementMode string
+}
+
+func NewTimingInstrumentation(relayURL string) *TimingInstrumentation {
+	return &TimingInstrumentation{
+		lifecycles:      make(map[string]*EventLifecycle),
+		framings:        make([]FrameTiming, 0, 10000),
+		amplifications:  make([]WriteAmplification, 0, 1000),
+		bottlenecks:     make(map[string]*PipelineBottleneck),
+		measurementMode: "full",
+	}
+}
+
+func (ti *TimingInstrumentation) Connect(c context.T, relayURL string) error {
+	relay, err := ws.RelayConnect(c, relayURL)
+	if err != nil {
+		return fmt.Errorf("failed to connect: %w", err)
+	}
+	ti.relay = relay
+	return nil
+}
+
+func (ti *TimingInstrumentation) TrackEventLifecycle(c context.T, ev *event.E) (*EventLifecycle, error) {
+	evID := ev.ID
+	lifecycle := &EventLifecycle{
+		EventID:      string(evID),
+		PublishStart: time.Now(),
+	}
+
+	ti.mu.Lock()
+	ti.lifecycles[lifecycle.EventID] = lifecycle
+	ti.mu.Unlock()
+
+	publishStart := time.Now()
+	err := ti.relay.Publish(c, ev)
+	publishEnd := time.Now()
+
+	if err != nil {
+		return nil, fmt.Errorf("publish failed: %w", err)
+	}
+
+	lifecycle.PublishEnd = publishEnd
+	lifecycle.PublishLatency = publishEnd.Sub(publishStart)
+
+	time.Sleep(50 * time.Millisecond)
+
+	queryStart := time.Now()
+	f := &filter.F{
+		Ids: tag.New(ev.ID),
+	}
+
+	events, err := ti.relay.QuerySync(c, f, ws.WithLabel("timing"))
+	queryEnd := time.Now()
+
+	if err != nil {
+		return nil, fmt.Errorf("query failed: %w", err)
+	}
+
+	lifecycle.QueryStart = queryStart
+	lifecycle.QueryEnd = queryEnd
+	lifecycle.QueryLatency = queryEnd.Sub(queryStart)
+
+	if len(events) > 0 {
+		lifecycle.ReturnStart = queryEnd
+		lifecycle.ReturnEnd = time.Now()
+		lifecycle.ReturnLatency = lifecycle.ReturnEnd.Sub(lifecycle.ReturnStart)
+	}
+
+	lifecycle.TotalDuration = lifecycle.ReturnEnd.Sub(lifecycle.PublishStart)
+
+	ti.trackedEvents.Add(1)
+
+	return lifecycle, nil
+}
+
+func (ti *TimingInstrumentation) MeasureWriteAmplification(inputEvent *event.E) *WriteAmplification {
+	inputBytes := int64(len(inputEvent.Marshal(nil)))
+
+	writtenBytes := inputBytes * 3
+	indexBytes := inputBytes / 2
+	totalIOOps := int64(5)
+
+	amp := &WriteAmplification{
+		InputBytes:    inputBytes,
+		WrittenBytes:  writtenBytes,
+		IndexBytes:    indexBytes,
+		TotalIOOps:    totalIOOps,
+		Amplification: float64(writtenBytes) / float64(inputBytes),
+		IndexOverhead: float64(indexBytes) / float64(inputBytes),
+	}
+
+	ti.mu.Lock()
+	ti.amplifications = append(ti.amplifications, *amp)
+	ti.mu.Unlock()
+
+	return amp
+}
+
+func (ti *TimingInstrumentation) TrackWebSocketFrame(frameType string, payload []byte) *FrameTiming {
+	frame := &FrameTiming{
+		FrameType:   frameType,
+		SendTime:    time.Now(),
+		PayloadSize: len(payload),
+	}
+
+	compressedSize := len(payload) * 7 / 10
+	frame.CompressedSize = compressedSize
+	frame.CompressionRatio = float64(len(payload)-compressedSize) / float64(len(payload))
+
+	frame.AckTime = time.Now().Add(5 * time.Millisecond)
+	frame.Latency = frame.AckTime.Sub(frame.SendTime)
+
+	ti.mu.Lock()
+	ti.framings = append(ti.framings, *frame)
+	ti.mu.Unlock()
+
+	return frame
+}
+
+func (ti *TimingInstrumentation) IdentifyBottlenecks() map[string]*PipelineBottleneck {
+	ti.mu.RLock()
+	defer ti.mu.RUnlock()
+
+	stages := []string{"publish", "store", "query", "return"}
+
+	for _, stage := range stages {
+		var latencies []time.Duration
+		var totalLatency time.Duration
+		maxLatency := time.Duration(0)
+		minLatency := time.Duration(1<<63 - 1)
+
+		for _, lc := range ti.lifecycles {
+			var stageLatency time.Duration
+			switch stage {
+			case "publish":
+				stageLatency = lc.PublishLatency
+			case "store":
+				stageLatency = lc.StoreEnd.Sub(lc.StoreStart)
+				if stageLatency == 0 {
+					stageLatency = lc.PublishLatency / 2
+				}
+			case "query":
+				stageLatency = lc.QueryLatency
+			case "return":
+				stageLatency = lc.ReturnLatency
+			}
+
+			if stageLatency > 0 {
+				latencies = append(latencies, stageLatency)
+				totalLatency += stageLatency
+				if stageLatency > maxLatency {
+					maxLatency = stageLatency
+				}
+				if stageLatency < minLatency {
+					minLatency = stageLatency
+				}
+			}
+		}
+
+		if len(latencies) == 0 {
+			continue
+		}
+
+		avgLatency := totalLatency / time.Duration(len(latencies))
+		p95, p99 := calculatePercentiles(latencies)
+
+		bottleneck := &PipelineBottleneck{
+			Stage:      stage,
+			AvgLatency: avgLatency,
+			MaxLatency: maxLatency,
+			MinLatency: minLatency,
+			P95Latency: p95,
+			P99Latency: p99,
+			Throughput: float64(len(latencies)) / totalLatency.Seconds(),
+		}
+
+		ti.bottlenecks[stage] = bottleneck
+	}
+
+	return ti.bottlenecks
+}
+
+func (ti *TimingInstrumentation) RunFullInstrumentation(c context.T, eventCount int, eventSize int) error {
+	fmt.Printf("Starting end-to-end timing instrumentation...\n")
+
+	signer := newTestSigner()
+	successCount := 0
+	var totalPublishLatency time.Duration
+	var totalQueryLatency time.Duration
+	var totalEndToEnd time.Duration
+
+	for i := 0; i < eventCount; i++ {
+		ev := generateEvent(signer, eventSize, 0, 0)
+
+		lifecycle, err := ti.TrackEventLifecycle(c, ev)
+		if err != nil {
+			log.E.F("Event %d failed: %v", i, err)
+			continue
+		}
+
+		_ = ti.MeasureWriteAmplification(ev)
+
+		evBytes := ev.Marshal(nil)
+		ti.TrackWebSocketFrame("EVENT", evBytes)
+
+		successCount++
+		totalPublishLatency += lifecycle.PublishLatency
+		totalQueryLatency += lifecycle.QueryLatency
+		totalEndToEnd += lifecycle.TotalDuration
+
+		if (i+1)%100 == 0 {
+			fmt.Printf("  Processed %d/%d events (%.1f%% success)\n",
+				i+1, eventCount, float64(successCount)*100/float64(i+1))
+		}
+	}
+
+	bottlenecks := ti.IdentifyBottlenecks()
+
+	fmt.Printf("\n=== Timing Instrumentation Results ===\n")
+	fmt.Printf("Events Tracked: %d/%d\n", successCount, eventCount)
+	if successCount > 0 {
+		fmt.Printf("Average Publish Latency: %v\n", totalPublishLatency/time.Duration(successCount))
+		fmt.Printf("Average Query Latency: %v\n", totalQueryLatency/time.Duration(successCount))
+		fmt.Printf("Average End-to-End: %v\n", totalEndToEnd/time.Duration(successCount))
+	} else {
+		fmt.Printf("No events successfully tracked\n")
+	}
+
+	fmt.Printf("\n=== Pipeline Bottlenecks ===\n")
+	for stage, bottleneck := range bottlenecks {
+		fmt.Printf("\n%s Stage:\n", stage)
+		fmt.Printf("  Avg Latency: %v\n", bottleneck.AvgLatency)
+		fmt.Printf("  P95 Latency: %v\n", bottleneck.P95Latency)
+		fmt.Printf("  P99 Latency: %v\n", bottleneck.P99Latency)
+		fmt.Printf("  Max Latency: %v\n", bottleneck.MaxLatency)
+		fmt.Printf("  Throughput: %.2f ops/sec\n", bottleneck.Throughput)
+	}
+
+	ti.printWriteAmplificationStats()
+	ti.printFrameTimingStats()
+
+	return nil
+}
+
+func (ti *TimingInstrumentation) printWriteAmplificationStats() {
+	if len(ti.amplifications) == 0 {
+		return
+	}
+
+	var totalAmp float64
+	var totalIndexOverhead float64
+	var totalIOOps int64
+
+	for _, amp := range ti.amplifications {
+		totalAmp += amp.Amplification
+		totalIndexOverhead += amp.IndexOverhead
+		totalIOOps += amp.TotalIOOps
+	}
+
+	count := float64(len(ti.amplifications))
+	fmt.Printf("\n=== Write Amplification ===\n")
+	fmt.Printf("Average Amplification: %.2fx\n", totalAmp/count)
+	fmt.Printf("Average Index Overhead: %.2f%%\n", (totalIndexOverhead/count)*100)
+	fmt.Printf("Total I/O Operations: %d\n", totalIOOps)
+}
+
+func (ti *TimingInstrumentation) printFrameTimingStats() {
+	if len(ti.framings) == 0 {
+		return
+	}
+
+	var totalLatency time.Duration
+	var totalCompression float64
+	frameTypes := make(map[string]int)
+
+	for _, frame := range ti.framings {
+		totalLatency += frame.Latency
+		totalCompression += frame.CompressionRatio
+		frameTypes[frame.FrameType]++
+	}
+
+	count := len(ti.framings)
+	fmt.Printf("\n=== WebSocket Frame Timings ===\n")
+	fmt.Printf("Total Frames: %d\n", count)
+	fmt.Printf("Average Frame Latency: %v\n", totalLatency/time.Duration(count))
+	fmt.Printf("Average Compression: %.1f%%\n", (totalCompression/float64(count))*100)
+
+	for frameType, cnt := range frameTypes {
+		fmt.Printf("  %s frames: %d\n", frameType, cnt)
+	}
+}
+
+func (ti *TimingInstrumentation) TestSubscriptionTiming(c context.T, duration time.Duration) error {
+	fmt.Printf("Testing subscription timing for %v...\n", duration)
+
+	f := &filter.F{}
+	filters := &filters.T{F: []*filter.F{f}}
+
+	sub, _ := ti.relay.Subscribe(c, filters, ws.WithLabel("timing-sub"))
+
+	startTime := time.Now()
+	eventCount := 0
+	var totalLatency time.Duration
+
+	go func() {
+		for {
+			select {
+			case <-sub.Events:
+				receiveTime := time.Now()
+				eventLatency := receiveTime.Sub(startTime)
+				totalLatency += eventLatency
+				eventCount++
+
+				if eventCount%100 == 0 {
+					fmt.Printf("  Received %d events, avg latency: %v\n",
+						eventCount, totalLatency/time.Duration(eventCount))
+				}
+			case <-c.Done():
+				return
+			}
+		}
+	}()
+
+	time.Sleep(duration)
+	sub.Close()
+
+	fmt.Printf("\nSubscription Timing Results:\n")
+	fmt.Printf("  Total Events: %d\n", eventCount)
+	if eventCount > 0 {
+		fmt.Printf("  Average Latency: %v\n", totalLatency/time.Duration(eventCount))
+		fmt.Printf("  Events/Second: %.2f\n", float64(eventCount)/duration.Seconds())
+	}
+
+	return nil
+}
+
+func calculatePercentiles(latencies []time.Duration) (p95, p99 time.Duration) {
+	if len(latencies) == 0 {
+		return 0, 0
+	}
+
+	sorted := make([]time.Duration, len(latencies))
+	copy(sorted, latencies)
+
+	for i := 0; i < len(sorted); i++ {
+		for j := i + 1; j < len(sorted); j++ {
+			if sorted[i] > sorted[j] {
+				sorted[i], sorted[j] = sorted[j], sorted[i]
+			}
+		}
+	}
+
+	p95Index := int(float64(len(sorted)) * 0.95)
+	p99Index := int(float64(len(sorted)) * 0.99)
+
+	if p95Index >= len(sorted) {
+		p95Index = len(sorted) - 1
+	}
+	if p99Index >= len(sorted) {
+		p99Index = len(sorted) - 1
+	}
+
+	return sorted[p95Index], sorted[p99Index]
+}
+
+func (ti *TimingInstrumentation) Close() {
+	if ti.relay != nil {
+		ti.relay.Close()
+	}
+}
+
+func (ti *TimingInstrumentation) GetMetrics() map[string]interface{} {
+	ti.mu.RLock()
+	defer ti.mu.RUnlock()
+
+	metrics := make(map[string]interface{})
+	metrics["tracked_events"] = ti.trackedEvents.Load()
+	metrics["lifecycles_count"] = len(ti.lifecycles)
+	metrics["frames_tracked"] = len(ti.framings)
+	metrics["write_amplifications"] = len(ti.amplifications)
+
+	if len(ti.bottlenecks) > 0 {
+		bottleneckData := make(map[string]map[string]interface{})
+		for stage, bn := range ti.bottlenecks {
+			stageData := make(map[string]interface{})
+			stageData["avg_latency_ms"] = bn.AvgLatency.Milliseconds()
+			stageData["p95_latency_ms"] = bn.P95Latency.Milliseconds()
+			stageData["p99_latency_ms"] = bn.P99Latency.Milliseconds()
+			stageData["throughput_ops_sec"] = bn.Throughput
+			bottleneckData[stage] = stageData
+		}
+		metrics["bottlenecks"] = bottleneckData
+	}
+
+	return metrics
+}