Files
next.orly.dev/pkg/encoders/text/escape.go
mleku 53fb12443e Add benchmark tests and optimize encryption performance
- Introduced comprehensive benchmark tests for NIP-44 and NIP-4 encryption/decryption, including various message sizes and round-trip operations.
- Implemented optimizations to reduce memory allocations and CPU processing time in encryption functions, focusing on pre-allocating buffers and minimizing reallocations.
- Enhanced error handling in encryption and decryption processes to ensure robustness.
- Documented performance improvements in the new PERFORMANCE_REPORT.md file, highlighting significant reductions in execution time and memory usage.
2025-11-02 18:08:11 +00:00

191 lines
5.0 KiB
Go

package text
// NostrEscape for JSON encoding according to RFC8259.
//
// This is the efficient implementation based on the NIP-01 specification:
//
// To prevent implementation differences from creating a different event ID for
// the same event, the following rules MUST be followed while serializing:
//
// No whitespace, line breaks or other unnecessary formatting should be included
// in the output JSON. No characters except the following should be escaped, and
// instead should be included verbatim:
//
// - A line break, 0x0A, as \n
// - A double quote, 0x22, as \"
// - A backslash, 0x5C, as \\
// - A carriage return, 0x0D, as \r
// - A tab character, 0x09, as \t
// - A backspace, 0x08, as \b
// - A form feed, 0x0C, as \f
//
// UTF-8 should be used for encoding.
//
// NOTE: We also escape all other control characters (0x00-0x1F excluding those above)
// to ensure valid JSON, even though NIP-01 doesn't require it. This prevents
// JSON parsing errors when events with binary data in content are sent to relays.
func NostrEscape(dst, src []byte) []byte {
l := len(src)
// Pre-allocate buffer if nil to reduce reallocations
// Estimate: worst case is all control chars which expand to 6 bytes each (\u00XX)
// but most strings have few escapes, so estimate len(src) * 1.5 as a safe middle ground
if dst == nil && l > 0 {
estimatedSize := l * 3 / 2
if estimatedSize < l {
estimatedSize = l
}
dst = make([]byte, 0, estimatedSize)
}
for i := 0; i < l; i++ {
c := src[i]
if c == '"' {
dst = append(dst, '\\', '"')
} else if c == '\\' {
// if i+1 < l && src[i+1] == 'u' || i+1 < l && src[i+1] == '/' {
if i+1 < l && src[i+1] == 'u' {
dst = append(dst, '\\')
} else {
dst = append(dst, '\\', '\\')
}
} else if c == '\b' {
dst = append(dst, '\\', 'b')
} else if c == '\t' {
dst = append(dst, '\\', 't')
} else if c == '\n' {
dst = append(dst, '\\', 'n')
} else if c == '\f' {
dst = append(dst, '\\', 'f')
} else if c == '\r' {
dst = append(dst, '\\', 'r')
} else if c < 32 {
// Escape all other control characters (0x00-0x1F except those handled above) as \uXXXX
// This ensures valid JSON even when content contains binary data
dst = append(dst, '\\', 'u', '0', '0')
hexHigh := (c >> 4) & 0x0F
hexLow := c & 0x0F
if hexHigh < 10 {
dst = append(dst, byte('0'+hexHigh))
} else {
dst = append(dst, byte('a'+(hexHigh-10)))
}
if hexLow < 10 {
dst = append(dst, byte('0'+hexLow))
} else {
dst = append(dst, byte('a'+(hexLow-10)))
}
} else {
dst = append(dst, c)
}
}
return dst
}
// NostrUnescape reverses the operation of NostrEscape except instead of
// appending it to the provided slice, it rewrites it, eliminating a memory
// copy. Keep in mind that the original JSON will be mangled by this operation,
// but the resultant slices will cost zero allocations.
func NostrUnescape(dst []byte) (b []byte) {
var r, w int
for ; r < len(dst); r++ {
if dst[r] == '\\' {
r++
c := dst[r]
switch {
// nip-01 specifies the following single letter C-style escapes for
// control codes under 0x20.
//
// no others are specified but must be preserved, so only these can
// be safely decoded at runtime as they must be re-encoded when
// marshalled.
case c == '"':
dst[w] = '"'
w++
case c == '\\':
dst[w] = '\\'
w++
case c == 'b':
dst[w] = '\b'
w++
case c == 't':
dst[w] = '\t'
w++
case c == 'n':
dst[w] = '\n'
w++
case c == 'f':
dst[w] = '\f'
w++
case c == 'r':
dst[w] = '\r'
w++
// special cases for non-nip-01 specified json escapes (must be
// preserved for ID generation).
case c == 'u':
// Check if this is a \u0000-\u001F sequence we generated
if r+4 < len(dst) && dst[r+1] == '0' && dst[r+2] == '0' {
// Extract hex digits
hexHigh := dst[r+3]
hexLow := dst[r+4]
var val byte
if hexHigh >= '0' && hexHigh <= '9' {
val = (hexHigh - '0') << 4
} else if hexHigh >= 'a' && hexHigh <= 'f' {
val = (hexHigh - 'a' + 10) << 4
} else if hexHigh >= 'A' && hexHigh <= 'F' {
val = (hexHigh - 'A' + 10) << 4
}
if hexLow >= '0' && hexLow <= '9' {
val |= hexLow - '0'
} else if hexLow >= 'a' && hexLow <= 'f' {
val |= hexLow - 'a' + 10
} else if hexLow >= 'A' && hexLow <= 'F' {
val |= hexLow - 'A' + 10
}
// Only decode if it's a control character (0x00-0x1F)
if val < 32 {
dst[w] = val
w++
r += 4 // Skip the u00XX part
continue
}
}
// Not our generated \u0000-\u001F, preserve as-is
dst[w] = '\\'
w++
dst[w] = 'u'
w++
case c == '/':
dst[w] = '\\'
w++
dst[w] = '/'
w++
// special case for octal escapes (must be preserved for ID
// generation).
case c >= '0' && c <= '9':
dst[w] = '\\'
w++
dst[w] = c
w++
// anything else after a reverse solidus just preserve it.
default:
dst[w] = dst[r]
w++
dst[w] = c
w++
}
} else {
dst[w] = dst[r]
w++
}
}
b = dst[:w]
return
}