Add unicode normalization for word indexing (v0.36.10)

- Add unicode_normalize.go with mappings for small caps and fraktur - Map 77 decorative unicode characters to ASCII equivalents: - Small caps (25 chars): ᴅᴇᴀᴛʜ → death - Fraktur lowercase (26 chars): 𝔡𝔢𝔞𝔱𝔥 → death - Fraktur uppercase (26 chars): 𝔇𝔈𝔄𝔗ℌ → death - Fix broken utf8DecodeRuneInString() that failed on multi-byte UTF-8 - Add migration v7 to rebuild word indexes with normalization - Add comprehensive unit tests for all character mappings Files modified: - pkg/database/unicode_normalize.go: New - character mapping tables - pkg/database/unicode_normalize_test.go: New - unit tests - pkg/database/tokenize.go: Integrate normalizeRune(), fix UTF-8 decoder - pkg/database/migrations.go: Add version 7 migration 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-22 18:53:30 +01:00
parent 11d1b6bfd1
commit 0addc61549
5 changed files with 439 additions and 13 deletions
--- a/pkg/database/migrations.go
+++ b/pkg/database/migrations.go
@@ -18,7 +18,7 @@ import (
 )
 const (
-	currentVersion uint32 = 6
+	currentVersion uint32 = 7
 )
 func (d *D) RunMigrations() {
@@ -107,6 +107,14 @@ func (d *D) RunMigrations() {
 		// bump to version 6
 		_ = d.writeVersionTag(6)
 	}
 	if dbVersion < 7 {
 		log.I.F("migrating to version 7...")
 		// Rebuild word indexes with unicode normalization (small caps, fraktur → ASCII)
 		// This consolidates duplicate indexes from decorative unicode text
 		d.RebuildWordIndexesWithNormalization()
 		// bump to version 7
 		_ = d.writeVersionTag(7)
 	}
 }
 // writeVersionTag writes a new version tag key to the database (no value)
@@ -1018,3 +1026,56 @@ func (d *D) CleanupLegacyEventStorage() {
 	log.I.F("legacy storage cleanup complete: removed %d evt entries, %d sev entries, reclaimed approximately %d bytes (%.2f MB)",
 		cleanedEvt, cleanedSev, bytesReclaimed, float64(bytesReclaimed)/(1024.0*1024.0))
 }
 // RebuildWordIndexesWithNormalization rebuilds all word indexes with unicode
 // normalization applied. This migration:
 // 1. Deletes all existing word indexes (wrd prefix)
 // 2. Re-tokenizes all events with normalizeRune() applied
 // 3. Creates new consolidated indexes where decorative unicode maps to ASCII
 //
 // After this migration, "ᴅᴇᴀᴛʜ" (small caps) and "𝔇𝔢𝔞𝔱𝔥" (fraktur) will index
 // the same as "death", eliminating duplicate entries and enabling proper search.
 func (d *D) RebuildWordIndexesWithNormalization() {
 	log.I.F("rebuilding word indexes with unicode normalization...")
 	var err error
 	// Step 1: Delete all existing word indexes
 	var deletedCount int
 	if err = d.Update(func(txn *badger.Txn) error {
 		wrdPrf := new(bytes.Buffer)
 		if err = indexes.WordEnc(nil, nil).MarshalWrite(wrdPrf); chk.E(err) {
 			return err
 		}
 		opts := badger.DefaultIteratorOptions
 		opts.Prefix = wrdPrf.Bytes()
 		opts.PrefetchValues = false // Keys only for deletion
 		it := txn.NewIterator(opts)
 		defer it.Close()
 		// Collect keys to delete (can't delete during iteration)
 		var keysToDelete [][]byte
 		for it.Rewind(); it.Valid(); it.Next() {
 			keysToDelete = append(keysToDelete, it.Item().KeyCopy(nil))
 		}
 		for _, key := range keysToDelete {
 			if err = txn.Delete(key); err == nil {
 				deletedCount++
 			}
 		}
 		return nil
 	}); chk.E(err) {
 		log.W.F("failed to delete old word indexes: %v", err)
 		return
 	}
 	log.I.F("deleted %d old word index entries", deletedCount)
 	// Step 2: Rebuild word indexes from all events
 	// Reuse the existing UpdateWordIndexes logic which now uses normalizeRune
 	d.UpdateWordIndexes()
 	log.I.F("word index rebuild with unicode normalization complete")
 }
--- a/pkg/database/tokenize.go
+++ b/pkg/database/tokenize.go
@@ -1,3 +1,5 @@
 //go:build !(js && wasm)
 package database
 import (
@@ -65,7 +67,9 @@ func TokenHashes(content []byte) [][]byte {
 				r2, size2 = utf8DecodeRuneInString(s[i:])
 			}
 			if unicode.IsLetter(r2) || unicode.IsNumber(r2) {
-				runes = append(runes, unicode.ToLower(r2))
+				// Normalize decorative unicode (small caps, fraktur) to ASCII
 				// before lowercasing for consistent indexing
 				runes = append(runes, unicode.ToLower(normalizeRune(r2)))
 				i += size2
 				continue
 			}
@@ -142,18 +146,39 @@ func allAlphaNum(s string) bool {
 func isWordStart(r rune) bool { return unicode.IsLetter(r) || unicode.IsNumber(r) }
-// Minimal utf8 rune decode without importing utf8 to avoid extra deps elsewhere
+// utf8DecodeRuneInString decodes the first UTF-8 rune from s.
 // Returns the rune and the number of bytes consumed.
 func utf8DecodeRuneInString(s string) (r rune, size int) {
-	// Fallback to standard library if available; however, using basic decoding
+	if len(s) == 0 {
-	for i := 1; i <= 4 && i <= len(s); i++ {
+		return 0, 0
 		r, size = rune(s[0]), 1
 		if r < 0x80 {
 			return r, 1
 	}
-		// Use stdlib for correctness
+	// ASCII fast path
-		return []rune(s[:i])[0], len(string([]rune(s[:i])[0]))
+	b := s[0]
 	if b < 0x80 {
 		return rune(b), 1
 	}
-	return rune(s[0]), 1
+	// Multi-byte: determine expected length from first byte
 	var expectedLen int
 	switch {
 	case b&0xE0 == 0xC0: // 110xxxxx - 2 bytes
 		expectedLen = 2
 	case b&0xF0 == 0xE0: // 1110xxxx - 3 bytes
 		expectedLen = 3
 	case b&0xF8 == 0xF0: // 11110xxx - 4 bytes
 		expectedLen = 4
 	default:
 		// Invalid UTF-8 start byte
 		return 0xFFFD, 1
 	}
 	if len(s) < expectedLen {
 		return 0xFFFD, 1
 	}
 	// Decode using Go's built-in rune conversion (simple and correct)
 	runes := []rune(s[:expectedLen])
 	if len(runes) == 0 {
 		return 0xFFFD, 1
 	}
 	return runes[0], expectedLen
 }
 // isHex64 returns true if s is exactly 64 hex characters (0-9, a-f)
--- a/pkg/database/unicode_normalize.go
+++ b/pkg/database/unicode_normalize.go
@@ -0,0 +1,135 @@
 //go:build !(js && wasm)
 package database
 // normalizeRune maps decorative unicode characters (small caps, fraktur) back to
 // their ASCII equivalents for consistent word indexing. This ensures that text
 // written with decorative alphabets (e.g., "ᴅᴇᴀᴛʜ" or "𝔇𝔢𝔞𝔱𝔥") indexes the same
 // as regular ASCII ("death").
 //
 // Character sets normalized:
 // - Small Caps (used for DEATH-style text in Terry Pratchett tradition)
 // - Mathematical Fraktur lowercase (𝔞-𝔷)
 // - Mathematical Fraktur uppercase (𝔄-ℨ, including Letterlike Symbols block exceptions)
 func normalizeRune(r rune) rune {
 	// Check small caps first (scattered codepoints)
 	if mapped, ok := smallCapsToASCII[r]; ok {
 		return mapped
 	}
 	// Check fraktur lowercase: U+1D51E to U+1D537 (contiguous range)
 	if r >= 0x1D51E && r <= 0x1D537 {
 		return 'a' + (r - 0x1D51E)
 	}
 	// Check fraktur uppercase main range: U+1D504 to U+1D51C (with gaps)
 	if r >= 0x1D504 && r <= 0x1D51C {
 		if mapped, ok := frakturUpperToASCII[r]; ok {
 			return mapped
 		}
 	}
 	// Check fraktur uppercase exceptions from Letterlike Symbols block
 	if mapped, ok := frakturLetterlikeToASCII[r]; ok {
 		return mapped
 	}
 	return r
 }
 // smallCapsToASCII maps small capital letters to lowercase ASCII.
 // These are scattered across multiple Unicode blocks (IPA Extensions,
 // Phonetic Extensions, Latin Extended-D).
 var smallCapsToASCII = map[rune]rune{
 	'ᴀ': 'a', // U+1D00 LATIN LETTER SMALL CAPITAL A
 	'ʙ': 'b', // U+0299 LATIN LETTER SMALL CAPITAL B
 	'ᴄ': 'c', // U+1D04 LATIN LETTER SMALL CAPITAL C
 	'ᴅ': 'd', // U+1D05 LATIN LETTER SMALL CAPITAL D
 	'ᴇ': 'e', // U+1D07 LATIN LETTER SMALL CAPITAL E
 	'ꜰ': 'f', // U+A730 LATIN LETTER SMALL CAPITAL F
 	'ɢ': 'g', // U+0262 LATIN LETTER SMALL CAPITAL G
 	'ʜ': 'h', // U+029C LATIN LETTER SMALL CAPITAL H
 	'ɪ': 'i', // U+026A LATIN LETTER SMALL CAPITAL I
 	'ᴊ': 'j', // U+1D0A LATIN LETTER SMALL CAPITAL J
 	'ᴋ': 'k', // U+1D0B LATIN LETTER SMALL CAPITAL K
 	'ʟ': 'l', // U+029F LATIN LETTER SMALL CAPITAL L
 	'ᴍ': 'm', // U+1D0D LATIN LETTER SMALL CAPITAL M
 	'ɴ': 'n', // U+0274 LATIN LETTER SMALL CAPITAL N
 	'ᴏ': 'o', // U+1D0F LATIN LETTER SMALL CAPITAL O
 	'ᴘ': 'p', // U+1D18 LATIN LETTER SMALL CAPITAL P
 	'ǫ': 'q', // U+01EB LATIN SMALL LETTER O WITH OGONEK (no true small cap Q)
 	'ʀ': 'r', // U+0280 LATIN LETTER SMALL CAPITAL R
 	'ꜱ': 's', // U+A731 LATIN LETTER SMALL CAPITAL S
 	'ᴛ': 't', // U+1D1B LATIN LETTER SMALL CAPITAL T
 	'ᴜ': 'u', // U+1D1C LATIN LETTER SMALL CAPITAL U
 	'ᴠ': 'v', // U+1D20 LATIN LETTER SMALL CAPITAL V
 	'ᴡ': 'w', // U+1D21 LATIN LETTER SMALL CAPITAL W
 	// Note: no small cap X exists in standard use
 	'ʏ': 'y', // U+028F LATIN LETTER SMALL CAPITAL Y
 	'ᴢ': 'z', // U+1D22 LATIN LETTER SMALL CAPITAL Z
 }
 // frakturUpperToASCII maps Mathematical Fraktur uppercase letters to lowercase ASCII.
 // The main range U+1D504-U+1D51C has gaps where C, H, I, R, Z use Letterlike Symbols.
 var frakturUpperToASCII = map[rune]rune{
 	'𝔄': 'a', // U+1D504 MATHEMATICAL FRAKTUR CAPITAL A
 	'𝔅': 'b', // U+1D505 MATHEMATICAL FRAKTUR CAPITAL B
 	// C is at U+212D (Letterlike Symbols)
 	'𝔇': 'd', // U+1D507 MATHEMATICAL FRAKTUR CAPITAL D
 	'𝔈': 'e', // U+1D508 MATHEMATICAL FRAKTUR CAPITAL E
 	'𝔉': 'f', // U+1D509 MATHEMATICAL FRAKTUR CAPITAL F
 	'𝔊': 'g', // U+1D50A MATHEMATICAL FRAKTUR CAPITAL G
 	// H is at U+210C (Letterlike Symbols)
 	// I is at U+2111 (Letterlike Symbols)
 	'𝔍': 'j', // U+1D50D MATHEMATICAL FRAKTUR CAPITAL J
 	'𝔎': 'k', // U+1D50E MATHEMATICAL FRAKTUR CAPITAL K
 	'𝔏': 'l', // U+1D50F MATHEMATICAL FRAKTUR CAPITAL L
 	'𝔐': 'm', // U+1D510 MATHEMATICAL FRAKTUR CAPITAL M
 	'𝔑': 'n', // U+1D511 MATHEMATICAL FRAKTUR CAPITAL N
 	'𝔒': 'o', // U+1D512 MATHEMATICAL FRAKTUR CAPITAL O
 	'𝔓': 'p', // U+1D513 MATHEMATICAL FRAKTUR CAPITAL P
 	'𝔔': 'q', // U+1D514 MATHEMATICAL FRAKTUR CAPITAL Q
 	// R is at U+211C (Letterlike Symbols)
 	'𝔖': 's', // U+1D516 MATHEMATICAL FRAKTUR CAPITAL S
 	'𝔗': 't', // U+1D517 MATHEMATICAL FRAKTUR CAPITAL T
 	'𝔘': 'u', // U+1D518 MATHEMATICAL FRAKTUR CAPITAL U
 	'𝔙': 'v', // U+1D519 MATHEMATICAL FRAKTUR CAPITAL V
 	'𝔚': 'w', // U+1D51A MATHEMATICAL FRAKTUR CAPITAL W
 	'𝔛': 'x', // U+1D51B MATHEMATICAL FRAKTUR CAPITAL X
 	'𝔜': 'y', // U+1D51C MATHEMATICAL FRAKTUR CAPITAL Y
 	// Z is at U+2128 (Letterlike Symbols)
 }
 // frakturLetterlikeToASCII maps the Fraktur characters that live in the
 // Letterlike Symbols block (U+2100-U+214F) rather than Mathematical Alphanumeric Symbols.
 var frakturLetterlikeToASCII = map[rune]rune{
 	'ℭ': 'c', // U+212D BLACK-LETTER CAPITAL C
 	'ℌ': 'h', // U+210C BLACK-LETTER CAPITAL H
 	'ℑ': 'i', // U+2111 BLACK-LETTER CAPITAL I
 	'ℜ': 'r', // U+211C BLACK-LETTER CAPITAL R
 	'ℨ': 'z', // U+2128 BLACK-LETTER CAPITAL Z
 }
 // hasDecorativeUnicode checks if text contains any small caps or fraktur characters
 // that would need normalization. Used by migration to identify events needing re-indexing.
 func hasDecorativeUnicode(s string) bool {
 	for _, r := range s {
 		// Check small caps
 		if _, ok := smallCapsToASCII[r]; ok {
 			return true
 		}
 		// Check fraktur lowercase range
 		if r >= 0x1D51E && r <= 0x1D537 {
 			return true
 		}
 		// Check fraktur uppercase range
 		if r >= 0x1D504 && r <= 0x1D51C {
 			return true
 		}
 		// Check letterlike symbols fraktur
 		if _, ok := frakturLetterlikeToASCII[r]; ok {
 			return true
 		}
 	}
 	return false
 }
--- a/pkg/database/unicode_normalize_test.go
+++ b/pkg/database/unicode_normalize_test.go
@@ -0,0 +1,205 @@
 //go:build !(js && wasm)
 package database
 import (
 	"bytes"
 	"testing"
 )
 func TestNormalizeRune(t *testing.T) {
 	tests := []struct {
 		name     string
 		input    rune
 		expected rune
 	}{
 		// Small caps
 		{"small cap A", 'ᴀ', 'a'},
 		{"small cap B", 'ʙ', 'b'},
 		{"small cap C", 'ᴄ', 'c'},
 		{"small cap D", 'ᴅ', 'd'},
 		{"small cap E", 'ᴇ', 'e'},
 		{"small cap F", 'ꜰ', 'f'},
 		{"small cap G", 'ɢ', 'g'},
 		{"small cap H", 'ʜ', 'h'},
 		{"small cap I", 'ɪ', 'i'},
 		{"small cap J", 'ᴊ', 'j'},
 		{"small cap K", 'ᴋ', 'k'},
 		{"small cap L", 'ʟ', 'l'},
 		{"small cap M", 'ᴍ', 'm'},
 		{"small cap N", 'ɴ', 'n'},
 		{"small cap O", 'ᴏ', 'o'},
 		{"small cap P", 'ᴘ', 'p'},
 		{"small cap Q (ogonek)", 'ǫ', 'q'},
 		{"small cap R", 'ʀ', 'r'},
 		{"small cap S", 'ꜱ', 's'},
 		{"small cap T", 'ᴛ', 't'},
 		{"small cap U", 'ᴜ', 'u'},
 		{"small cap V", 'ᴠ', 'v'},
 		{"small cap W", 'ᴡ', 'w'},
 		{"small cap Y", 'ʏ', 'y'},
 		{"small cap Z", 'ᴢ', 'z'},
 		// Fraktur lowercase
 		{"fraktur lower a", '𝔞', 'a'},
 		{"fraktur lower b", '𝔟', 'b'},
 		{"fraktur lower c", '𝔠', 'c'},
 		{"fraktur lower d", '𝔡', 'd'},
 		{"fraktur lower e", '𝔢', 'e'},
 		{"fraktur lower f", '𝔣', 'f'},
 		{"fraktur lower g", '𝔤', 'g'},
 		{"fraktur lower h", '𝔥', 'h'},
 		{"fraktur lower i", '𝔦', 'i'},
 		{"fraktur lower j", '𝔧', 'j'},
 		{"fraktur lower k", '𝔨', 'k'},
 		{"fraktur lower l", '𝔩', 'l'},
 		{"fraktur lower m", '𝔪', 'm'},
 		{"fraktur lower n", '𝔫', 'n'},
 		{"fraktur lower o", '𝔬', 'o'},
 		{"fraktur lower p", '𝔭', 'p'},
 		{"fraktur lower q", '𝔮', 'q'},
 		{"fraktur lower r", '𝔯', 'r'},
 		{"fraktur lower s", '𝔰', 's'},
 		{"fraktur lower t", '𝔱', 't'},
 		{"fraktur lower u", '𝔲', 'u'},
 		{"fraktur lower v", '𝔳', 'v'},
 		{"fraktur lower w", '𝔴', 'w'},
 		{"fraktur lower x", '𝔵', 'x'},
 		{"fraktur lower y", '𝔶', 'y'},
 		{"fraktur lower z", '𝔷', 'z'},
 		// Fraktur uppercase (main range)
 		{"fraktur upper A", '𝔄', 'a'},
 		{"fraktur upper B", '𝔅', 'b'},
 		{"fraktur upper D", '𝔇', 'd'},
 		{"fraktur upper E", '𝔈', 'e'},
 		{"fraktur upper F", '𝔉', 'f'},
 		{"fraktur upper G", '𝔊', 'g'},
 		{"fraktur upper J", '𝔍', 'j'},
 		{"fraktur upper K", '𝔎', 'k'},
 		{"fraktur upper L", '𝔏', 'l'},
 		{"fraktur upper M", '𝔐', 'm'},
 		{"fraktur upper N", '𝔑', 'n'},
 		{"fraktur upper O", '𝔒', 'o'},
 		{"fraktur upper P", '𝔓', 'p'},
 		{"fraktur upper Q", '𝔔', 'q'},
 		{"fraktur upper S", '𝔖', 's'},
 		{"fraktur upper T", '𝔗', 't'},
 		{"fraktur upper U", '𝔘', 'u'},
 		{"fraktur upper V", '𝔙', 'v'},
 		{"fraktur upper W", '𝔚', 'w'},
 		{"fraktur upper X", '𝔛', 'x'},
 		{"fraktur upper Y", '𝔜', 'y'},
 		// Fraktur uppercase (Letterlike Symbols block)
 		{"fraktur upper C (letterlike)", 'ℭ', 'c'},
 		{"fraktur upper H (letterlike)", 'ℌ', 'h'},
 		{"fraktur upper I (letterlike)", 'ℑ', 'i'},
 		{"fraktur upper R (letterlike)", 'ℜ', 'r'},
 		{"fraktur upper Z (letterlike)", 'ℨ', 'z'},
 		// Regular ASCII should pass through unchanged
 		{"regular lowercase a", 'a', 'a'},
 		{"regular lowercase z", 'z', 'z'},
 		{"regular uppercase A", 'A', 'A'},
 		{"regular digit 5", '5', '5'},
 		// Other unicode should pass through unchanged
 		{"cyrillic д", 'д', 'д'},
 		{"greek α", 'α', 'α'},
 		{"emoji", '🎉', '🎉'},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result := normalizeRune(tt.input)
 			if result != tt.expected {
 				t.Errorf("normalizeRune(%q) = %q, want %q", tt.input, result, tt.expected)
 			}
 		})
 	}
 }
 func TestHasDecorativeUnicode(t *testing.T) {
 	tests := []struct {
 		name     string
 		input    string
 		expected bool
 	}{
 		{"plain ASCII", "hello world", false},
 		{"small caps word", "ᴅᴇᴀᴛʜ", true},
 		{"fraktur lowercase", "𝔥𝔢𝔩𝔩𝔬", true},
 		{"fraktur uppercase", "𝔇𝔈𝔄𝔗ℌ", true},
 		{"mixed with ASCII", "hello ᴡᴏʀʟᴅ", true},
 		{"single small cap", "aᴀa", true},
 		{"cyrillic (no normalize)", "привет", false},
 		{"empty string", "", false},
 		{"letterlike fraktur C", "ℭool", true},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result := hasDecorativeUnicode(tt.input)
 			if result != tt.expected {
 				t.Errorf("hasDecorativeUnicode(%q) = %v, want %v", tt.input, result, tt.expected)
 			}
 		})
 	}
 }
 func TestTokenHashesNormalization(t *testing.T) {
 	// All three representations should produce the same hash
 	ascii := TokenHashes([]byte("death"))
 	smallCaps := TokenHashes([]byte("ᴅᴇᴀᴛʜ"))
 	frakturLower := TokenHashes([]byte("𝔡𝔢𝔞𝔱𝔥"))
 	frakturUpper := TokenHashes([]byte("𝔇𝔈𝔄𝔗ℌ"))
 	if len(ascii) != 1 {
 		t.Fatalf("expected 1 hash for 'death', got %d", len(ascii))
 	}
 	if len(smallCaps) != 1 {
 		t.Fatalf("expected 1 hash for small caps, got %d", len(smallCaps))
 	}
 	if len(frakturLower) != 1 {
 		t.Fatalf("expected 1 hash for fraktur lower, got %d", len(frakturLower))
 	}
 	if len(frakturUpper) != 1 {
 		t.Fatalf("expected 1 hash for fraktur upper, got %d", len(frakturUpper))
 	}
 	// All should match the ASCII version
 	if !bytes.Equal(ascii[0], smallCaps[0]) {
 		t.Errorf("small caps hash differs from ASCII\nASCII:      %x\nsmall caps: %x", ascii[0], smallCaps[0])
 	}
 	if !bytes.Equal(ascii[0], frakturLower[0]) {
 		t.Errorf("fraktur lower hash differs from ASCII\nASCII:         %x\nfraktur lower: %x", ascii[0], frakturLower[0])
 	}
 	if !bytes.Equal(ascii[0], frakturUpper[0]) {
 		t.Errorf("fraktur upper hash differs from ASCII\nASCII:         %x\nfraktur upper: %x", ascii[0], frakturUpper[0])
 	}
 }
 func TestTokenHashesMixedContent(t *testing.T) {
 	// Test that mixed content normalizes correctly
 	content := []byte("ᴛʜᴇ quick 𝔟𝔯𝔬𝔴𝔫 fox")
 	hashes := TokenHashes(content)
 	// Should get: "the", "quick", "brown", "fox" (4 unique words)
 	if len(hashes) != 4 {
 		t.Errorf("expected 4 hashes from mixed content, got %d", len(hashes))
 	}
 	// Verify "the" matches between decorated and plain
 	thePlain := TokenHashes([]byte("the"))
 	theDecorated := TokenHashes([]byte("ᴛʜᴇ"))
 	if !bytes.Equal(thePlain[0], theDecorated[0]) {
 		t.Errorf("'the' hash mismatch: plain=%x, decorated=%x", thePlain[0], theDecorated[0])
 	}
 	// Verify "brown" matches between decorated and plain
 	brownPlain := TokenHashes([]byte("brown"))
 	brownDecorated := TokenHashes([]byte("𝔟𝔯𝔬𝔴𝔫"))
 	if !bytes.Equal(brownPlain[0], brownDecorated[0]) {
 		t.Errorf("'brown' hash mismatch: plain=%x, decorated=%x", brownPlain[0], brownDecorated[0])
 	}
 }
--- a/pkg/version/version
+++ b/pkg/version/version
@@ -1 +1 @@
-v0.36.9
+v0.36.10