Add documentation and improve BBolt import memory efficiency (v0.48.8)
Some checks failed
Go / build-and-release (push) Has been cancelled
Some checks failed
Go / build-and-release (push) Has been cancelled
- Add README.md table of contents for easier navigation - Add Curation ACL documentation section to README.md - Create detailed Curation Mode Guide (docs/CURATION_MODE_GUIDE.md) - Fix OOM during BBolt index building by closing temp file before build - Add GC calls before index building to reclaim batch buffer memory - Improve import-export.go with processJSONLEventsReturningCount - Add policy-aware import path for sync operations Files modified: - README.md: Added TOC and curation ACL documentation - docs/CURATION_MODE_GUIDE.md: New comprehensive curation mode guide - pkg/bbolt/import-export.go: Memory-safe import with deferred cleanup - pkg/bbolt/import-minimal.go: Added GC before index build - pkg/version/version: Bump to v0.48.8 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -31,9 +31,10 @@ func (b *B) ImportEventsFromReader(ctx context.Context, rr io.Reader) error {
|
||||
if chk.E(err) {
|
||||
return err
|
||||
}
|
||||
defer os.Remove(tmp.Name()) // Clean up temp file when done
|
||||
tmpName := tmp.Name()
|
||||
defer os.Remove(tmpName) // Clean up temp file when done
|
||||
|
||||
log.I.F("bbolt import: buffering upload to %s", tmp.Name())
|
||||
log.I.F("bbolt import: buffering upload to %s", tmpName)
|
||||
bufferStart := time.Now()
|
||||
bytesBuffered, err := io.Copy(tmp, rr)
|
||||
if chk.E(err) {
|
||||
@@ -48,12 +49,30 @@ func (b *B) ImportEventsFromReader(ctx context.Context, rr io.Reader) error {
|
||||
return err
|
||||
}
|
||||
|
||||
processErr := b.processJSONLEvents(ctx, tmp)
|
||||
count, processErr := b.processJSONLEventsReturningCount(ctx, tmp)
|
||||
|
||||
// Close temp file to release resources before index building
|
||||
tmp.Close()
|
||||
|
||||
if processErr != nil {
|
||||
return processErr
|
||||
}
|
||||
|
||||
// Build indexes after events are stored (minimal import mode)
|
||||
if count > 0 {
|
||||
// Force garbage collection to reclaim memory before index building
|
||||
debug.FreeOSMemory()
|
||||
log.I.F("bbolt import: building indexes for %d events...", count)
|
||||
if err := b.BuildIndexes(ctx); err != nil {
|
||||
log.E.F("bbolt import: failed to build indexes: %v", err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
totalElapsed := time.Since(startTime)
|
||||
log.I.F("bbolt import: total operation time: %v", totalElapsed.Round(time.Millisecond))
|
||||
|
||||
return processErr
|
||||
return nil
|
||||
}
|
||||
|
||||
// ImportEventsFromStrings imports events from a slice of JSON strings with policy filtering
|
||||
@@ -67,7 +86,95 @@ func (b *B) ImportEventsFromStrings(ctx context.Context, eventJSONs []string, po
|
||||
|
||||
// processJSONLEvents processes JSONL events from a reader
|
||||
func (b *B) processJSONLEvents(ctx context.Context, rr io.Reader) error {
|
||||
return b.processJSONLEventsWithPolicy(ctx, rr, nil)
|
||||
_, err := b.processJSONLEventsReturningCount(ctx, rr)
|
||||
return err
|
||||
}
|
||||
|
||||
// processJSONLEventsReturningCount processes JSONL events and returns the count saved
|
||||
// This is used by ImportEventsFromReader for migration mode (minimal import without inline indexes)
|
||||
func (b *B) processJSONLEventsReturningCount(ctx context.Context, rr io.Reader) (int, error) {
|
||||
// Create a scanner to read the buffer line by line
|
||||
scan := bufio.NewScanner(rr)
|
||||
scanBuf := make([]byte, maxLen)
|
||||
scan.Buffer(scanBuf, maxLen)
|
||||
|
||||
// Performance tracking
|
||||
startTime := time.Now()
|
||||
lastLogTime := startTime
|
||||
const logInterval = 5 * time.Second
|
||||
|
||||
var count, total, skipped, unmarshalErrors, saveErrors int
|
||||
for scan.Scan() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.I.F("bbolt import: context closed after %d events", count)
|
||||
return count, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
line := scan.Bytes()
|
||||
total += len(line) + 1
|
||||
if len(line) < 1 {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
ev := event.New()
|
||||
if _, err := ev.Unmarshal(line); err != nil {
|
||||
ev.Free()
|
||||
unmarshalErrors++
|
||||
log.W.F("bbolt import: failed to unmarshal event: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Minimal path for migration: store events only, indexes built later
|
||||
if err := b.SaveEventMinimal(ev); err != nil {
|
||||
ev.Free()
|
||||
saveErrors++
|
||||
log.W.F("bbolt import: failed to save event: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
ev.Free()
|
||||
line = nil
|
||||
count++
|
||||
|
||||
// Progress logging every logInterval
|
||||
if time.Since(lastLogTime) >= logInterval {
|
||||
elapsed := time.Since(startTime)
|
||||
eventsPerSec := float64(count) / elapsed.Seconds()
|
||||
mbPerSec := float64(total) / elapsed.Seconds() / 1024 / 1024
|
||||
log.I.F("bbolt import: progress %d events saved, %.2f MB read, %.0f events/sec, %.2f MB/sec",
|
||||
count, float64(total)/1024/1024, eventsPerSec, mbPerSec)
|
||||
lastLogTime = time.Now()
|
||||
debug.FreeOSMemory()
|
||||
}
|
||||
}
|
||||
|
||||
// Flush any remaining batched events
|
||||
if b.batcher != nil {
|
||||
b.batcher.Flush()
|
||||
}
|
||||
|
||||
// Final summary
|
||||
elapsed := time.Since(startTime)
|
||||
eventsPerSec := float64(count) / elapsed.Seconds()
|
||||
mbPerSec := float64(total) / elapsed.Seconds() / 1024 / 1024
|
||||
log.I.F("bbolt import: completed - %d events saved, %.2f MB in %v (%.0f events/sec, %.2f MB/sec)",
|
||||
count, float64(total)/1024/1024, elapsed.Round(time.Millisecond), eventsPerSec, mbPerSec)
|
||||
if unmarshalErrors > 0 || saveErrors > 0 || skipped > 0 {
|
||||
log.I.F("bbolt import: stats - %d unmarshal errors, %d save errors, %d skipped empty lines",
|
||||
unmarshalErrors, saveErrors, skipped)
|
||||
}
|
||||
|
||||
if err := scan.Err(); err != nil {
|
||||
return count, err
|
||||
}
|
||||
|
||||
// Clear scanner buffer to help GC
|
||||
scanBuf = nil
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
// processJSONLEventsWithPolicy processes JSONL events from a reader with optional policy filtering
|
||||
@@ -181,15 +288,6 @@ func (b *B) processJSONLEventsWithPolicy(ctx context.Context, rr io.Reader, poli
|
||||
return err
|
||||
}
|
||||
|
||||
// Build indexes after minimal import (when no policy manager = migration mode)
|
||||
if policyManager == nil && count > 0 {
|
||||
log.I.F("bbolt import: building indexes for %d events...", count)
|
||||
if err := b.BuildIndexes(ctx); err != nil {
|
||||
log.E.F("bbolt import: failed to build indexes: %v", err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -62,9 +62,12 @@ func (b *B) BuildIndexes(ctx context.Context) error {
|
||||
log.I.F("bbolt: starting index build...")
|
||||
startTime := time.Now()
|
||||
|
||||
// Process in chunks to avoid OOM
|
||||
// With ~15 indexes per event and ~50 bytes per key, 200k events = ~150MB
|
||||
const chunkSize = 200000
|
||||
// Force GC before starting to reclaim batch buffer memory
|
||||
debug.FreeOSMemory()
|
||||
|
||||
// Process in small chunks to avoid OOM on memory-constrained systems
|
||||
// With ~15 indexes per event and ~50 bytes per key, 50k events = ~37.5MB per chunk
|
||||
const chunkSize = 50000
|
||||
|
||||
var totalEvents int
|
||||
var lastSerial uint64 = 0
|
||||
|
||||
@@ -1 +1 @@
|
||||
v0.48.6
|
||||
v0.48.8
|
||||
|
||||
Reference in New Issue
Block a user