Remove deprecated JSON encoder/decoder and update dependencies

- Deleted the custom JSON encoder/decoder implementation in favor of the standard library's `encoding/json` package.
- Removed the `next.orly.dev/pkg/encoders/json` package and its associated files.
- Updated `go.mod` to remove the `github.com/nostr-dev-kit/ndk` dependency and bump the version of `lol.mleku.dev` from v1.0.3 to v1.0.4.
- Cleaned up import statements across various files to reflect the removal of the custom JSON package.
- Ensured that all references to the old JSON encoding/decoding methods are replaced with the standard library equivalents.
This commit is contained in:
2025-10-22 11:37:39 +01:00
parent 6cff006e54
commit 117e5924fd
38 changed files with 12 additions and 12377 deletions

View File

@@ -7,8 +7,9 @@ import (
"strings"
"time"
"encoding/json"
"github.com/dgraph-io/badger/v4"
"next.orly.dev/pkg/encoders/json"
)
type Subscription struct {
@@ -192,7 +193,7 @@ func (d *D) GetPaymentHistory(pubkey []byte) ([]Payment, error) {
// IsFirstTimeUser checks if a user is logging in for the first time and marks them as seen
func (d *D) IsFirstTimeUser(pubkey []byte) (bool, error) {
key := fmt.Sprintf("firstlogin:%s", hex.EncodeToString(pubkey))
isFirstTime := false
err := d.DB.Update(
func(txn *badger.Txn) error {
@@ -212,6 +213,6 @@ func (d *D) IsFirstTimeUser(pubkey []byte) (bool, error) {
return err // Return any other error as-is
},
)
return isFirstTime, err
}

View File

@@ -24,9 +24,6 @@ import (
// encode <, >, and & characters due to legacy bullcrap in the encoding/json
// library. Either call MarshalJSON directly or use a json.Encoder with html
// escaping disabled.
//
// Or import "next.orly.dev/pkg/encoders/json" and use json.Marshal which is the
// same as go 1.25 json v1 except with this one stupidity removed.
type E struct {
// ID is the SHA256 hash of the canonical encoding of the event in binary
@@ -89,7 +86,7 @@ func (ev *E) Clone() *E {
CreatedAt: ev.CreatedAt,
Kind: ev.Kind,
}
// Deep copy all byte slices with independent memory
if ev.ID != nil {
clone.ID = make([]byte, len(ev.ID))
@@ -107,7 +104,7 @@ func (ev *E) Clone() *E {
clone.Sig = make([]byte, len(ev.Sig))
copy(clone.Sig, ev.Sig)
}
// Deep copy tags
if ev.Tags != nil {
clone.Tags = tag.NewS()
@@ -124,7 +121,7 @@ func (ev *E) Clone() *E {
}
}
}
return clone
}

View File

@@ -11,7 +11,7 @@ import (
"lukechampine.com/frand"
"next.orly.dev/pkg/encoders/event/examples"
"next.orly.dev/pkg/encoders/hex"
"next.orly.dev/pkg/encoders/json"
"encoding/json"
"next.orly.dev/pkg/encoders/tag"
"next.orly.dev/pkg/utils"
"next.orly.dev/pkg/utils/bufpool"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,50 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import (
"unicode"
"unicode/utf8"
)
// foldName returns a folded string such that foldName(x) == foldName(y)
// is identical to bytes.EqualFold(x, y).
func foldName(in []byte) []byte {
// This is inlinable to take advantage of "function outlining".
var arr [32]byte // large enough for most JSON names
return appendFoldedName(arr[:0], in)
}
func appendFoldedName(out, in []byte) []byte {
for i := 0; i < len(in); {
// Handle single-byte ASCII.
if c := in[i]; c < utf8.RuneSelf {
if 'a' <= c && c <= 'z' {
c -= 'a' - 'A'
}
out = append(out, c)
i++
continue
}
// Handle multi-byte Unicode.
r, n := utf8.DecodeRune(in[i:])
out = utf8.AppendRune(out, foldRune(r))
i += n
}
return out
}
// foldRune is returns the smallest rune for all runes in the same fold set.
func foldRune(r rune) rune {
for {
r2 := unicode.SimpleFold(r)
if r2 <= r {
return r2
}
r = r2
}
}

View File

@@ -1,184 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import "bytes"
// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
// characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
// so that the JSON will be safe to embed inside HTML <script> tags.
// For historical reasons, web browsers don't honor standard HTML
// escaping within <script> tags, so an alternative JSON encoding must be used.
func HTMLEscape(dst *bytes.Buffer, src []byte) {
dst.Grow(len(src))
dst.Write(appendHTMLEscape(dst.AvailableBuffer(), src))
}
func appendHTMLEscape(dst, src []byte) []byte {
// The characters can only appear in string literals,
// so just scan the string one byte at a time.
start := 0
for i, c := range src {
if c == '<' || c == '>' || c == '&' {
dst = append(dst, src[start:i]...)
dst = append(dst, '\\', 'u', '0', '0', hex[c>>4], hex[c&0xF])
start = i + 1
}
// Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9).
if c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 {
dst = append(dst, src[start:i]...)
dst = append(dst, '\\', 'u', '2', '0', '2', hex[src[i+2]&0xF])
start = i + len("\u2029")
}
}
return append(dst, src[start:]...)
}
// Compact appends to dst the JSON-encoded src with
// insignificant space characters elided.
func Compact(dst *bytes.Buffer, src []byte) error {
dst.Grow(len(src))
b := dst.AvailableBuffer()
b, err := appendCompact(b, src, false)
dst.Write(b)
return err
}
func appendCompact(dst, src []byte, escape bool) ([]byte, error) {
origLen := len(dst)
scan := newScanner()
defer freeScanner(scan)
start := 0
for i, c := range src {
if escape && (c == '<' || c == '>' || c == '&') {
if start < i {
dst = append(dst, src[start:i]...)
}
dst = append(dst, '\\', 'u', '0', '0', hex[c>>4], hex[c&0xF])
start = i + 1
}
// Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9).
if escape && c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 {
if start < i {
dst = append(dst, src[start:i]...)
}
dst = append(dst, '\\', 'u', '2', '0', '2', hex[src[i+2]&0xF])
start = i + 3
}
v := scan.step(scan, c)
if v >= scanSkipSpace {
if v == scanError {
break
}
if start < i {
dst = append(dst, src[start:i]...)
}
start = i + 1
}
}
if scan.eof() == scanError {
return dst[:origLen], scan.err
}
if start < len(src) {
dst = append(dst, src[start:]...)
}
return dst, nil
}
func appendNewline(dst []byte, prefix, indent string, depth int) []byte {
dst = append(dst, '\n')
dst = append(dst, prefix...)
for i := 0; i < depth; i++ {
dst = append(dst, indent...)
}
return dst
}
// indentGrowthFactor specifies the growth factor of indenting JSON input.
// Empirically, the growth factor was measured to be between 1.4x to 1.8x
// for some set of compacted JSON with the indent being a single tab.
// Specify a growth factor slightly larger than what is observed
// to reduce probability of allocation in appendIndent.
// A factor no higher than 2 ensures that wasted space never exceeds 50%.
const indentGrowthFactor = 2
// Indent appends to dst an indented form of the JSON-encoded src.
// Each element in a JSON object or array begins on a new,
// indented line beginning with prefix followed by one or more
// copies of indent according to the indentation nesting.
// The data appended to dst does not begin with the prefix nor
// any indentation, to make it easier to embed inside other formatted JSON data.
// Although leading space characters (space, tab, carriage return, newline)
// at the beginning of src are dropped, trailing space characters
// at the end of src are preserved and copied to dst.
// For example, if src has no trailing spaces, neither will dst;
// if src ends in a trailing newline, so will dst.
func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error {
dst.Grow(indentGrowthFactor * len(src))
b := dst.AvailableBuffer()
b, err := appendIndent(b, src, prefix, indent)
dst.Write(b)
return err
}
func appendIndent(dst, src []byte, prefix, indent string) ([]byte, error) {
origLen := len(dst)
scan := newScanner()
defer freeScanner(scan)
needIndent := false
depth := 0
for _, c := range src {
scan.bytes++
v := scan.step(scan, c)
if v == scanSkipSpace {
continue
}
if v == scanError {
break
}
if needIndent && v != scanEndObject && v != scanEndArray {
needIndent = false
depth++
dst = appendNewline(dst, prefix, indent, depth)
}
// Emit semantically uninteresting bytes
// (in particular, punctuation in strings) unmodified.
if v == scanContinue {
dst = append(dst, c)
continue
}
// Add spacing around real punctuation.
switch c {
case '{', '[':
// delay indent so that empty object and array are formatted as {} and [].
needIndent = true
dst = append(dst, c)
case ',':
dst = append(dst, c)
dst = appendNewline(dst, prefix, indent, depth)
case ':':
dst = append(dst, c, ' ')
case '}', ']':
if needIndent {
// suppress indent in empty object/array
needIndent = false
} else {
depth--
dst = appendNewline(dst, prefix, indent, depth)
}
dst = append(dst, c)
default:
dst = append(dst, c)
}
}
if scan.eof() == scanError {
return dst[:origLen], scan.err
}
return dst, nil
}

View File

@@ -1,41 +0,0 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package internal
import "errors"
// NotForPublicUse is a marker type that an API is for internal use only.
// It does not perfectly prevent usage of that API, but helps to restrict usage.
// Anything with this marker is not covered by the Go compatibility agreement.
type NotForPublicUse struct{}
// AllowInternalUse is passed from "json" to "jsontext" to authenticate
// that the caller can have access to internal functionality.
var AllowInternalUse NotForPublicUse
// Sentinel error values internally shared between jsonv1 and jsonv2.
var (
ErrCycle = errors.New("encountered a cycle")
ErrNonNilReference = errors.New("value must be passed as a non-nil pointer reference")
)
var (
// TransformMarshalError converts a v2 error into a v1 error.
// It is called only at the top-level of a Marshal function.
TransformMarshalError func(any, error) error
// NewMarshalerError constructs a jsonv1.MarshalerError.
// It is called after a user-defined Marshal method/function fails.
NewMarshalerError func(any, error, string) error
// TransformUnmarshalError converts a v2 error into a v1 error.
// It is called only at the top-level of a Unmarshal function.
TransformUnmarshalError func(any, error) error
// NewRawNumber returns new(jsonv1.Number).
NewRawNumber func() any
// RawNumberOf returns jsonv1.Number(b).
RawNumberOf func(b []byte) any
)

View File

@@ -1,215 +0,0 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// jsonflags implements all the optional boolean flags.
// These flags are shared across both "json", "jsontext", and "jsonopts".
package jsonflags
import "next.orly.dev/pkg/json/internal"
// Bools represents zero or more boolean flags, all set to true or false.
// The least-significant bit is the boolean value of all flags in the set.
// The remaining bits identify which particular flags.
//
// In common usage, this is OR'd with 0 or 1. For example:
// - (AllowInvalidUTF8 | 0) means "AllowInvalidUTF8 is false"
// - (Multiline | Indent | 1) means "Multiline and Indent are true"
type Bools uint64
func (Bools) JSONOptions(internal.NotForPublicUse) {}
const (
// AllFlags is the set of all flags.
AllFlags = AllCoderFlags | AllArshalV2Flags | AllArshalV1Flags
// AllCoderFlags is the set of all encoder/decoder flags.
AllCoderFlags = (maxCoderFlag - 1) - initFlag
// AllArshalV2Flags is the set of all v2 marshal/unmarshal flags.
AllArshalV2Flags = (maxArshalV2Flag - 1) - (maxCoderFlag - 1)
// AllArshalV1Flags is the set of all v1 marshal/unmarshal flags.
AllArshalV1Flags = (maxArshalV1Flag - 1) - (maxArshalV2Flag - 1)
// NonBooleanFlags is the set of non-boolean flags,
// where the value is some other concrete Go type.
// The value of the flag is stored within jsonopts.Struct.
NonBooleanFlags = 0 |
Indent |
IndentPrefix |
ByteLimit |
DepthLimit |
Marshalers |
Unmarshalers
// DefaultV1Flags is the set of booleans flags that default to true under
// v1 semantics. None of the non-boolean flags differ between v1 and v2.
DefaultV1Flags = 0 |
AllowDuplicateNames |
AllowInvalidUTF8 |
EscapeForHTML |
EscapeForJS |
PreserveRawStrings |
Deterministic |
FormatNilMapAsNull |
FormatNilSliceAsNull |
MatchCaseInsensitiveNames |
CallMethodsWithLegacySemantics |
FormatByteArrayAsArray |
FormatBytesWithLegacySemantics |
FormatDurationAsNano |
MatchCaseSensitiveDelimiter |
MergeWithLegacySemantics |
OmitEmptyWithLegacySemantics |
ParseBytesWithLooseRFC4648 |
ParseTimeWithLooseRFC3339 |
ReportErrorsWithLegacySemantics |
StringifyWithLegacySemantics |
UnmarshalArrayFromAnyLength
// AnyWhitespace reports whether the encoded output might have any whitespace.
AnyWhitespace = Multiline | SpaceAfterColon | SpaceAfterComma
// WhitespaceFlags is the set of flags related to whitespace formatting.
// In contrast to AnyWhitespace, this includes Indent and IndentPrefix
// as those settings take no effect if Multiline is false.
WhitespaceFlags = AnyWhitespace | Indent | IndentPrefix
// AnyEscape is the set of flags related to escaping in a JSON string.
AnyEscape = EscapeForHTML | EscapeForJS
// CanonicalizeNumbers is the set of flags related to raw number canonicalization.
CanonicalizeNumbers = CanonicalizeRawInts | CanonicalizeRawFloats
)
// Encoder and decoder flags.
const (
initFlag Bools = 1 << iota // reserved for the boolean value itself
AllowDuplicateNames // encode or decode
AllowInvalidUTF8 // encode or decode
WithinArshalCall // encode or decode; for internal use by json.Marshal and json.Unmarshal
OmitTopLevelNewline // encode only; for internal use by json.Marshal and json.MarshalWrite
PreserveRawStrings // encode only
CanonicalizeRawInts // encode only
CanonicalizeRawFloats // encode only
ReorderRawObjects // encode only
EscapeForHTML // encode only
EscapeForJS // encode only
Multiline // encode only
SpaceAfterColon // encode only
SpaceAfterComma // encode only
Indent // encode only; non-boolean flag
IndentPrefix // encode only; non-boolean flag
ByteLimit // encode or decode; non-boolean flag
DepthLimit // encode or decode; non-boolean flag
maxCoderFlag
)
// Marshal and Unmarshal flags (for v2).
const (
_ Bools = (maxCoderFlag >> 1) << iota
StringifyNumbers // marshal or unmarshal
Deterministic // marshal only
FormatNilMapAsNull // marshal only
FormatNilSliceAsNull // marshal only
OmitZeroStructFields // marshal only
MatchCaseInsensitiveNames // marshal or unmarshal
DiscardUnknownMembers // marshal only
RejectUnknownMembers // unmarshal only
Marshalers // marshal only; non-boolean flag
Unmarshalers // unmarshal only; non-boolean flag
maxArshalV2Flag
)
// Marshal and Unmarshal flags (for v1).
const (
_ Bools = (maxArshalV2Flag >> 1) << iota
CallMethodsWithLegacySemantics // marshal or unmarshal
FormatByteArrayAsArray // marshal or unmarshal
FormatBytesWithLegacySemantics // marshal or unmarshal
FormatDurationAsNano // marshal or unmarshal
MatchCaseSensitiveDelimiter // marshal or unmarshal
MergeWithLegacySemantics // unmarshal
OmitEmptyWithLegacySemantics // marshal
ParseBytesWithLooseRFC4648 // unmarshal
ParseTimeWithLooseRFC3339 // unmarshal
ReportErrorsWithLegacySemantics // marshal or unmarshal
StringifyWithLegacySemantics // marshal or unmarshal
StringifyBoolsAndStrings // marshal or unmarshal; for internal use by jsonv2.makeStructArshaler
UnmarshalAnyWithRawNumber // unmarshal; for internal use by jsonv1.Decoder.UseNumber
UnmarshalArrayFromAnyLength // unmarshal
maxArshalV1Flag
)
// bitsUsed is the number of bits used in the 64-bit boolean flags
const bitsUsed = 42
// Static compile check that bitsUsed and maxArshalV1Flag are in sync.
const _ = uint64((1<<bitsUsed)-maxArshalV1Flag) + uint64(maxArshalV1Flag-(1<<bitsUsed))
// Flags is a set of boolean flags.
// If the presence bit is zero, then the value bit must also be zero.
// The least-significant bit of both fields is always zero.
//
// Unlike Bools, which can represent a set of bools that are all true or false,
// Flags represents a set of bools, each individually may be true or false.
type Flags struct{ Presence, Values uint64 }
// Join joins two sets of flags such that the latter takes precedence.
func (dst *Flags) Join(src Flags) {
// Copy over all source presence bits over to the destination (using OR),
// then invert the source presence bits to clear out source value (using AND-NOT),
// then copy over source value bits over to the destination (using OR).
// e.g., dst := Flags{Presence: 0b_1100_0011, Value: 0b_1000_0011}
// e.g., src := Flags{Presence: 0b_0101_1010, Value: 0b_1001_0010}
dst.Presence |= src.Presence // e.g., 0b_1100_0011 | 0b_0101_1010 -> 0b_110_11011
dst.Values &= ^src.Presence // e.g., 0b_1000_0011 & 0b_1010_0101 -> 0b_100_00001
dst.Values |= src.Values // e.g., 0b_1000_0001 | 0b_1001_0010 -> 0b_100_10011
}
// Set sets both the presence and value for the provided bool (or set of bools).
func (fs *Flags) Set(f Bools) {
// Select out the bits for the flag identifiers (everything except LSB),
// then set the presence for all the identifier bits (using OR),
// then invert the identifier bits to clear out the values (using AND-NOT),
// then copy over all the identifier bits to the value if LSB is 1.
// e.g., fs := Flags{Presence: 0b_0101_0010, Value: 0b_0001_0010}
// e.g., f := 0b_1001_0001
id := uint64(f) &^ uint64(1) // e.g., 0b_1001_0001 & 0b_1111_1110 -> 0b_1001_0000
fs.Presence |= id // e.g., 0b_0101_0010 | 0b_1001_0000 -> 0b_1101_0011
fs.Values &= ^id // e.g., 0b_0001_0010 & 0b_0110_1111 -> 0b_0000_0010
fs.Values |= uint64(f&1) * id // e.g., 0b_0000_0010 | 0b_1001_0000 -> 0b_1001_0010
}
// Get reports whether the bool (or any of the bools) is true.
// This is generally only used with a singular bool.
// The value bit of f (i.e., the LSB) is ignored.
func (fs Flags) Get(f Bools) bool {
return fs.Values&uint64(f) > 0
}
// Has reports whether the bool (or any of the bools) is set.
// The value bit of f (i.e., the LSB) is ignored.
func (fs Flags) Has(f Bools) bool {
return fs.Presence&uint64(f) > 0
}
// Clear clears both the presence and value for the provided bool or bools.
// The value bit of f (i.e., the LSB) is ignored.
func (fs *Flags) Clear(f Bools) {
// Invert f to produce a mask to clear all bits in f (using AND).
// e.g., fs := Flags{Presence: 0b_0101_0010, Value: 0b_0001_0010}
// e.g., f := 0b_0001_1000
mask := uint64(^f) // e.g., 0b_0001_1000 -> 0b_1110_0111
fs.Presence &= mask // e.g., 0b_0101_0010 & 0b_1110_0111 -> 0b_0100_0010
fs.Values &= mask // e.g., 0b_0001_0010 & 0b_1110_0111 -> 0b_0000_0010
}

View File

@@ -1,202 +0,0 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsonopts
import (
"next.orly.dev/pkg/json/internal"
"next.orly.dev/pkg/json/internal/jsonflags"
)
// Options is the common options type shared across json packages.
type Options interface {
// JSONOptions is exported so related json packages can implement Options.
JSONOptions(internal.NotForPublicUse)
}
// Struct is the combination of all options in struct form.
// This is efficient to pass down the call stack and to query.
type Struct struct {
Flags jsonflags.Flags
CoderValues
ArshalValues
}
type CoderValues struct {
Indent string // jsonflags.Indent
IndentPrefix string // jsonflags.IndentPrefix
ByteLimit int64 // jsonflags.ByteLimit
DepthLimit int // jsonflags.DepthLimit
}
type ArshalValues struct {
// The Marshalers and Unmarshalers fields use the any type to avoid a
// concrete dependency on *json.Marshalers and *json.Unmarshalers,
// which would in turn create a dependency on the "reflect" package.
Marshalers any // jsonflags.Marshalers
Unmarshalers any // jsonflags.Unmarshalers
Format string
FormatDepth int
}
// DefaultOptionsV2 is the set of all options that define default v2 behavior.
var DefaultOptionsV2 = Struct{
Flags: jsonflags.Flags{
Presence: uint64(jsonflags.AllFlags & ^jsonflags.WhitespaceFlags),
Values: uint64(0),
},
}
// DefaultOptionsV1 is the set of all options that define default v1 behavior.
var DefaultOptionsV1 = Struct{
Flags: jsonflags.Flags{
Presence: uint64(jsonflags.AllFlags & ^jsonflags.WhitespaceFlags),
Values: uint64(jsonflags.DefaultV1Flags),
},
}
func (*Struct) JSONOptions(internal.NotForPublicUse) {}
// GetUnknownOption is injected by the "json" package to handle Options
// declared in that package so that "jsonopts" can handle them.
var GetUnknownOption = func(Struct, Options) (any, bool) { panic("unknown option") }
func GetOption[T any](opts Options, setter func(T) Options) (T, bool) {
// Collapse the options to *Struct to simplify lookup.
structOpts, ok := opts.(*Struct)
if !ok {
var structOpts2 Struct
structOpts2.Join(opts)
structOpts = &structOpts2
}
// Lookup the option based on the return value of the setter.
var zero T
switch opt := setter(zero).(type) {
case jsonflags.Bools:
v := structOpts.Flags.Get(opt)
ok := structOpts.Flags.Has(opt)
return any(v).(T), ok
case Indent:
if !structOpts.Flags.Has(jsonflags.Indent) {
return zero, false
}
return any(structOpts.Indent).(T), true
case IndentPrefix:
if !structOpts.Flags.Has(jsonflags.IndentPrefix) {
return zero, false
}
return any(structOpts.IndentPrefix).(T), true
case ByteLimit:
if !structOpts.Flags.Has(jsonflags.ByteLimit) {
return zero, false
}
return any(structOpts.ByteLimit).(T), true
case DepthLimit:
if !structOpts.Flags.Has(jsonflags.DepthLimit) {
return zero, false
}
return any(structOpts.DepthLimit).(T), true
default:
v, ok := GetUnknownOption(*structOpts, opt)
return v.(T), ok
}
}
// JoinUnknownOption is injected by the "json" package to handle Options
// declared in that package so that "jsonopts" can handle them.
var JoinUnknownOption = func(Struct, Options) Struct { panic("unknown option") }
func (dst *Struct) Join(srcs ...Options) {
dst.join(false, srcs...)
}
func (dst *Struct) JoinWithoutCoderOptions(srcs ...Options) {
dst.join(true, srcs...)
}
func (dst *Struct) join(excludeCoderOptions bool, srcs ...Options) {
for _, src := range srcs {
switch src := src.(type) {
case nil:
continue
case jsonflags.Bools:
if excludeCoderOptions {
src &= ^jsonflags.AllCoderFlags
}
dst.Flags.Set(src)
case Indent:
if excludeCoderOptions {
continue
}
dst.Flags.Set(jsonflags.Multiline | jsonflags.Indent | 1)
dst.Indent = string(src)
case IndentPrefix:
if excludeCoderOptions {
continue
}
dst.Flags.Set(jsonflags.Multiline | jsonflags.IndentPrefix | 1)
dst.IndentPrefix = string(src)
case ByteLimit:
if excludeCoderOptions {
continue
}
dst.Flags.Set(jsonflags.ByteLimit | 1)
dst.ByteLimit = int64(src)
case DepthLimit:
if excludeCoderOptions {
continue
}
dst.Flags.Set(jsonflags.DepthLimit | 1)
dst.DepthLimit = int(src)
case *Struct:
srcFlags := src.Flags // shallow copy the flags
if excludeCoderOptions {
srcFlags.Clear(jsonflags.AllCoderFlags)
}
dst.Flags.Join(srcFlags)
if srcFlags.Has(jsonflags.NonBooleanFlags) {
if srcFlags.Has(jsonflags.Indent) {
dst.Indent = src.Indent
}
if srcFlags.Has(jsonflags.IndentPrefix) {
dst.IndentPrefix = src.IndentPrefix
}
if srcFlags.Has(jsonflags.ByteLimit) {
dst.ByteLimit = src.ByteLimit
}
if srcFlags.Has(jsonflags.DepthLimit) {
dst.DepthLimit = src.DepthLimit
}
if srcFlags.Has(jsonflags.Marshalers) {
dst.Marshalers = src.Marshalers
}
if srcFlags.Has(jsonflags.Unmarshalers) {
dst.Unmarshalers = src.Unmarshalers
}
}
default:
*dst = JoinUnknownOption(*dst, src)
}
}
}
type (
Indent string // jsontext.WithIndent
IndentPrefix string // jsontext.WithIndentPrefix
ByteLimit int64 // jsontext.WithByteLimit
DepthLimit int // jsontext.WithDepthLimit
// type for jsonflags.Marshalers declared in "json" package
// type for jsonflags.Unmarshalers declared in "json" package
)
func (Indent) JSONOptions(internal.NotForPublicUse) {}
func (IndentPrefix) JSONOptions(internal.NotForPublicUse) {}
func (ByteLimit) JSONOptions(internal.NotForPublicUse) {}
func (DepthLimit) JSONOptions(internal.NotForPublicUse) {}

View File

@@ -1,629 +0,0 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsonwire
import (
"io"
"math"
"slices"
"strconv"
"unicode/utf16"
"unicode/utf8"
)
type ValueFlags uint
const (
_ ValueFlags = (1 << iota) / 2 // powers of two starting with zero
stringNonVerbatim // string cannot be naively treated as valid UTF-8
stringNonCanonical // string not formatted according to RFC 8785, section 3.2.2.2.
// TODO: Track whether a number is a non-integer?
)
func (f *ValueFlags) Join(f2 ValueFlags) { *f |= f2 }
func (f ValueFlags) IsVerbatim() bool { return f&stringNonVerbatim == 0 }
func (f ValueFlags) IsCanonical() bool { return f&stringNonCanonical == 0 }
// ConsumeWhitespace consumes leading JSON whitespace per RFC 7159, section 2.
func ConsumeWhitespace(b []byte) (n int) {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
for len(b) > n && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
n++
}
return n
}
// ConsumeNull consumes the next JSON null literal per RFC 7159, section 3.
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
func ConsumeNull(b []byte) int {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
const literal = "null"
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
return len(literal)
}
return 0
}
// ConsumeFalse consumes the next JSON false literal per RFC 7159, section 3.
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
func ConsumeFalse(b []byte) int {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
const literal = "false"
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
return len(literal)
}
return 0
}
// ConsumeTrue consumes the next JSON true literal per RFC 7159, section 3.
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
func ConsumeTrue(b []byte) int {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
const literal = "true"
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
return len(literal)
}
return 0
}
// ConsumeLiteral consumes the next JSON literal per RFC 7159, section 3.
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
func ConsumeLiteral(b []byte, lit string) (n int, err error) {
for i := 0; i < len(b) && i < len(lit); i++ {
if b[i] != lit[i] {
return i, NewInvalidCharacterError(b[i:], "in literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")")
}
}
if len(b) < len(lit) {
return len(b), io.ErrUnexpectedEOF
}
return len(lit), nil
}
// ConsumeSimpleString consumes the next JSON string per RFC 7159, section 7
// but is limited to the grammar for an ASCII string without escape sequences.
// It returns 0 if it is invalid or more complicated than a simple string,
// in which case consumeString should be called.
//
// It rejects '<', '>', and '&' for compatibility reasons since these were
// always escaped in the v1 implementation. Thus, if this function reports
// non-zero then we know that the string would be encoded the same way
// under both v1 or v2 escape semantics.
func ConsumeSimpleString(b []byte) (n int) {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if len(b) > 0 && b[0] == '"' {
n++
for len(b) > n && b[n] < utf8.RuneSelf && escapeASCII[b[n]] == 0 {
n++
}
if uint(len(b)) > uint(n) && b[n] == '"' {
n++
return n
}
}
return 0
}
// ConsumeString consumes the next JSON string per RFC 7159, section 7.
// If validateUTF8 is false, then this allows the presence of invalid UTF-8
// characters within the string itself.
// It reports the number of bytes consumed and whether an error was encountered.
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
func ConsumeString(flags *ValueFlags, b []byte, validateUTF8 bool) (n int, err error) {
return ConsumeStringResumable(flags, b, 0, validateUTF8)
}
// ConsumeStringResumable is identical to consumeString but supports resuming
// from a previous call that returned io.ErrUnexpectedEOF.
func ConsumeStringResumable(flags *ValueFlags, b []byte, resumeOffset int, validateUTF8 bool) (n int, err error) {
// Consume the leading double quote.
switch {
case resumeOffset > 0:
n = resumeOffset // already handled the leading quote
case uint(len(b)) == 0:
return n, io.ErrUnexpectedEOF
case b[0] == '"':
n++
default:
return n, NewInvalidCharacterError(b[n:], `at start of string (expecting '"')`)
}
// Consume every character in the string.
for uint(len(b)) > uint(n) {
// Optimize for long sequences of unescaped characters.
noEscape := func(c byte) bool {
return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
}
for uint(len(b)) > uint(n) && noEscape(b[n]) {
n++
}
if uint(len(b)) <= uint(n) {
return n, io.ErrUnexpectedEOF
}
// Check for terminating double quote.
if b[n] == '"' {
n++
return n, nil
}
switch r, rn := utf8.DecodeRune(b[n:]); {
// Handle UTF-8 encoded byte sequence.
// Due to specialized handling of ASCII above, we know that
// all normal sequences at this point must be 2 bytes or larger.
case rn > 1:
n += rn
// Handle escape sequence.
case r == '\\':
flags.Join(stringNonVerbatim)
resumeOffset = n
if uint(len(b)) < uint(n+2) {
return resumeOffset, io.ErrUnexpectedEOF
}
switch r := b[n+1]; r {
case '/':
// Forward slash is the only character with 3 representations.
// Per RFC 8785, section 3.2.2.2., this must not be escaped.
flags.Join(stringNonCanonical)
n += 2
case '"', '\\', 'b', 'f', 'n', 'r', 't':
n += 2
case 'u':
if uint(len(b)) < uint(n+6) {
if hasEscapedUTF16Prefix(b[n:], false) {
return resumeOffset, io.ErrUnexpectedEOF
}
flags.Join(stringNonCanonical)
return n, NewInvalidEscapeSequenceError(b[n:])
}
v1, ok := parseHexUint16(b[n+2 : n+6])
if !ok {
flags.Join(stringNonCanonical)
return n, NewInvalidEscapeSequenceError(b[n : n+6])
}
// Only certain control characters can use the \uFFFF notation
// for canonical formatting (per RFC 8785, section 3.2.2.2.).
switch v1 {
// \uFFFF notation not permitted for these characters.
case '\b', '\f', '\n', '\r', '\t':
flags.Join(stringNonCanonical)
default:
// \uFFFF notation only permitted for control characters.
if v1 >= ' ' {
flags.Join(stringNonCanonical)
} else {
// \uFFFF notation must be lower case.
for _, c := range b[n+2 : n+6] {
if 'A' <= c && c <= 'F' {
flags.Join(stringNonCanonical)
}
}
}
}
n += 6
r := rune(v1)
if validateUTF8 && utf16.IsSurrogate(r) {
if uint(len(b)) < uint(n+6) {
if hasEscapedUTF16Prefix(b[n:], true) {
return resumeOffset, io.ErrUnexpectedEOF
}
flags.Join(stringNonCanonical)
return n - 6, NewInvalidEscapeSequenceError(b[n-6:])
} else if v2, ok := parseHexUint16(b[n+2 : n+6]); b[n] != '\\' || b[n+1] != 'u' || !ok {
flags.Join(stringNonCanonical)
return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6])
} else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError {
flags.Join(stringNonCanonical)
return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6])
} else {
n += 6
}
}
default:
flags.Join(stringNonCanonical)
return n, NewInvalidEscapeSequenceError(b[n : n+2])
}
// Handle invalid UTF-8.
case r == utf8.RuneError:
if !utf8.FullRune(b[n:]) {
return n, io.ErrUnexpectedEOF
}
flags.Join(stringNonVerbatim | stringNonCanonical)
if validateUTF8 {
return n, ErrInvalidUTF8
}
n++
// Handle invalid control characters.
case r < ' ':
flags.Join(stringNonVerbatim | stringNonCanonical)
return n, NewInvalidCharacterError(b[n:], "in string (expecting non-control character)")
default:
panic("BUG: unhandled character " + QuoteRune(b[n:]))
}
}
return n, io.ErrUnexpectedEOF
}
// AppendUnquote appends the unescaped form of a JSON string in src to dst.
// Any invalid UTF-8 within the string will be replaced with utf8.RuneError,
// but the error will be specified as having encountered such an error.
// The input must be an entire JSON string with no surrounding whitespace.
func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) (v []byte, err error) {
dst = slices.Grow(dst, len(src))
// Consume the leading double quote.
var i, n int
switch {
case uint(len(src)) == 0:
return dst, io.ErrUnexpectedEOF
case src[0] == '"':
i, n = 1, 1
default:
return dst, NewInvalidCharacterError(src, `at start of string (expecting '"')`)
}
// Consume every character in the string.
for uint(len(src)) > uint(n) {
// Optimize for long sequences of unescaped characters.
noEscape := func(c byte) bool {
return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
}
for uint(len(src)) > uint(n) && noEscape(src[n]) {
n++
}
if uint(len(src)) <= uint(n) {
dst = append(dst, src[i:n]...)
return dst, io.ErrUnexpectedEOF
}
// Check for terminating double quote.
if src[n] == '"' {
dst = append(dst, src[i:n]...)
n++
if n < len(src) {
err = NewInvalidCharacterError(src[n:], "after string value")
}
return dst, err
}
switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); {
// Handle UTF-8 encoded byte sequence.
// Due to specialized handling of ASCII above, we know that
// all normal sequences at this point must be 2 bytes or larger.
case rn > 1:
n += rn
// Handle escape sequence.
case r == '\\':
dst = append(dst, src[i:n]...)
// Handle escape sequence.
if uint(len(src)) < uint(n+2) {
return dst, io.ErrUnexpectedEOF
}
switch r := src[n+1]; r {
case '"', '\\', '/':
dst = append(dst, r)
n += 2
case 'b':
dst = append(dst, '\b')
n += 2
case 'f':
dst = append(dst, '\f')
n += 2
case 'n':
dst = append(dst, '\n')
n += 2
case 'r':
dst = append(dst, '\r')
n += 2
case 't':
dst = append(dst, '\t')
n += 2
case 'u':
if uint(len(src)) < uint(n+6) {
if hasEscapedUTF16Prefix(src[n:], false) {
return dst, io.ErrUnexpectedEOF
}
return dst, NewInvalidEscapeSequenceError(src[n:])
}
v1, ok := parseHexUint16(src[n+2 : n+6])
if !ok {
return dst, NewInvalidEscapeSequenceError(src[n : n+6])
}
n += 6
// Check whether this is a surrogate half.
r := rune(v1)
if utf16.IsSurrogate(r) {
r = utf8.RuneError // assume failure unless the following succeeds
if uint(len(src)) < uint(n+6) {
if hasEscapedUTF16Prefix(src[n:], true) {
return utf8.AppendRune(dst, r), io.ErrUnexpectedEOF
}
err = NewInvalidEscapeSequenceError(src[n-6:])
} else if v2, ok := parseHexUint16(src[n+2 : n+6]); src[n] != '\\' || src[n+1] != 'u' || !ok {
err = NewInvalidEscapeSequenceError(src[n-6 : n+6])
} else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError {
err = NewInvalidEscapeSequenceError(src[n-6 : n+6])
} else {
n += 6
}
}
dst = utf8.AppendRune(dst, r)
default:
return dst, NewInvalidEscapeSequenceError(src[n : n+2])
}
i = n
// Handle invalid UTF-8.
case r == utf8.RuneError:
dst = append(dst, src[i:n]...)
if !utf8.FullRuneInString(string(truncateMaxUTF8(src[n:]))) {
return dst, io.ErrUnexpectedEOF
}
// NOTE: An unescaped string may be longer than the escaped string
// because invalid UTF-8 bytes are being replaced.
dst = append(dst, "\uFFFD"...)
n += rn
i = n
err = ErrInvalidUTF8
// Handle invalid control characters.
case r < ' ':
dst = append(dst, src[i:n]...)
return dst, NewInvalidCharacterError(src[n:], "in string (expecting non-control character)")
default:
panic("BUG: unhandled character " + QuoteRune(src[n:]))
}
}
dst = append(dst, src[i:n]...)
return dst, io.ErrUnexpectedEOF
}
// hasEscapedUTF16Prefix reports whether b is possibly
// the truncated prefix of a \uFFFF escape sequence.
func hasEscapedUTF16Prefix[Bytes ~[]byte | ~string](b Bytes, lowerSurrogateHalf bool) bool {
for i := range len(b) {
switch c := b[i]; {
case i == 0 && c != '\\':
return false
case i == 1 && c != 'u':
return false
case i == 2 && lowerSurrogateHalf && c != 'd' && c != 'D':
return false // not within ['\uDC00':'\uDFFF']
case i == 3 && lowerSurrogateHalf && !('c' <= c && c <= 'f') && !('C' <= c && c <= 'F'):
return false // not within ['\uDC00':'\uDFFF']
case i >= 2 && i < 6 && !('0' <= c && c <= '9') && !('a' <= c && c <= 'f') && !('A' <= c && c <= 'F'):
return false
}
}
return true
}
// UnquoteMayCopy returns the unescaped form of b.
// If there are no escaped characters, the output is simply a subslice of
// the input with the surrounding quotes removed.
// Otherwise, a new buffer is allocated for the output.
// It assumes the input is valid.
func UnquoteMayCopy(b []byte, isVerbatim bool) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if isVerbatim {
return b[len(`"`) : len(b)-len(`"`)]
}
b, _ = AppendUnquote(nil, b)
return b
}
// ConsumeSimpleNumber consumes the next JSON number per RFC 7159, section 6
// but is limited to the grammar for a positive integer.
// It returns 0 if it is invalid or more complicated than a simple integer,
// in which case consumeNumber should be called.
func ConsumeSimpleNumber(b []byte) (n int) {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if len(b) > 0 {
if b[0] == '0' {
n++
} else if '1' <= b[0] && b[0] <= '9' {
n++
for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
n++
}
} else {
return 0
}
if uint(len(b)) <= uint(n) || (b[n] != '.' && b[n] != 'e' && b[n] != 'E') {
return n
}
}
return 0
}
type ConsumeNumberState uint
const (
consumeNumberInit ConsumeNumberState = iota
beforeIntegerDigits
withinIntegerDigits
beforeFractionalDigits
withinFractionalDigits
beforeExponentDigits
withinExponentDigits
)
// ConsumeNumber consumes the next JSON number per RFC 7159, section 6.
// It reports the number of bytes consumed and whether an error was encountered.
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
//
// Note that JSON numbers are not self-terminating.
// If the entire input is consumed, then the caller needs to consider whether
// there may be subsequent unread data that may still be part of this number.
func ConsumeNumber(b []byte) (n int, err error) {
n, _, err = ConsumeNumberResumable(b, 0, consumeNumberInit)
return n, err
}
// ConsumeNumberResumable is identical to consumeNumber but supports resuming
// from a previous call that returned io.ErrUnexpectedEOF.
func ConsumeNumberResumable(b []byte, resumeOffset int, state ConsumeNumberState) (n int, _ ConsumeNumberState, err error) {
// Jump to the right state when resuming from a partial consumption.
n = resumeOffset
if state > consumeNumberInit {
switch state {
case withinIntegerDigits, withinFractionalDigits, withinExponentDigits:
// Consume leading digits.
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
n++
}
if uint(len(b)) <= uint(n) {
return n, state, nil // still within the same state
}
state++ // switches "withinX" to "beforeY" where Y is the state after X
}
switch state {
case beforeIntegerDigits:
goto beforeInteger
case beforeFractionalDigits:
goto beforeFractional
case beforeExponentDigits:
goto beforeExponent
default:
return n, state, nil
}
}
// Consume required integer component (with optional minus sign).
beforeInteger:
resumeOffset = n
if uint(len(b)) > 0 && b[0] == '-' {
n++
}
switch {
case uint(len(b)) <= uint(n):
return resumeOffset, beforeIntegerDigits, io.ErrUnexpectedEOF
case b[n] == '0':
n++
state = beforeFractionalDigits
case '1' <= b[n] && b[n] <= '9':
n++
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
n++
}
state = withinIntegerDigits
default:
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
}
// Consume optional fractional component.
beforeFractional:
if uint(len(b)) > uint(n) && b[n] == '.' {
resumeOffset = n
n++
switch {
case uint(len(b)) <= uint(n):
return resumeOffset, beforeFractionalDigits, io.ErrUnexpectedEOF
case '0' <= b[n] && b[n] <= '9':
n++
default:
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
}
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
n++
}
state = withinFractionalDigits
}
// Consume optional exponent component.
beforeExponent:
if uint(len(b)) > uint(n) && (b[n] == 'e' || b[n] == 'E') {
resumeOffset = n
n++
if uint(len(b)) > uint(n) && (b[n] == '-' || b[n] == '+') {
n++
}
switch {
case uint(len(b)) <= uint(n):
return resumeOffset, beforeExponentDigits, io.ErrUnexpectedEOF
case '0' <= b[n] && b[n] <= '9':
n++
default:
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
}
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
n++
}
state = withinExponentDigits
}
return n, state, nil
}
// parseHexUint16 is similar to strconv.ParseUint,
// but operates directly on []byte and is optimized for base-16.
// See https://go.dev/issue/42429.
func parseHexUint16[Bytes ~[]byte | ~string](b Bytes) (v uint16, ok bool) {
if len(b) != 4 {
return 0, false
}
for i := range 4 {
c := b[i]
switch {
case '0' <= c && c <= '9':
c = c - '0'
case 'a' <= c && c <= 'f':
c = 10 + c - 'a'
case 'A' <= c && c <= 'F':
c = 10 + c - 'A'
default:
return 0, false
}
v = v*16 + uint16(c)
}
return v, true
}
// ParseUint parses b as a decimal unsigned integer according to
// a strict subset of the JSON number grammar, returning the value if valid.
// It returns (0, false) if there is a syntax error and
// returns (math.MaxUint64, false) if there is an overflow.
func ParseUint(b []byte) (v uint64, ok bool) {
const unsafeWidth = 20 // len(fmt.Sprint(uint64(math.MaxUint64)))
var n int
for ; len(b) > n && ('0' <= b[n] && b[n] <= '9'); n++ {
v = 10*v + uint64(b[n]-'0')
}
switch {
case n == 0 || len(b) != n || (b[0] == '0' && string(b) != "0"):
return 0, false
case n >= unsafeWidth && (b[0] != '1' || v < 1e19 || n > unsafeWidth):
return math.MaxUint64, false
}
return v, true
}
// ParseFloat parses a floating point number according to the Go float grammar.
// Note that the JSON number grammar is a strict subset.
//
// If the number overflows the finite representation of a float,
// then we return MaxFloat since any finite value will always be infinitely
// more accurate at representing another finite value than an infinite value.
func ParseFloat(b []byte, bits int) (v float64, ok bool) {
fv, err := strconv.ParseFloat(string(b), bits)
if math.IsInf(fv, 0) {
switch {
case bits == 32 && math.IsInf(fv, +1):
fv = +math.MaxFloat32
case bits == 64 && math.IsInf(fv, +1):
fv = +math.MaxFloat64
case bits == 32 && math.IsInf(fv, -1):
fv = -math.MaxFloat32
case bits == 64 && math.IsInf(fv, -1):
fv = -math.MaxFloat64
}
}
return fv, err == nil
}

View File

@@ -1,290 +0,0 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsonwire
import (
"math"
"slices"
"strconv"
"unicode/utf16"
"unicode/utf8"
"next.orly.dev/pkg/json/internal/jsonflags"
)
// escapeASCII reports whether the ASCII character needs to be escaped.
// It conservatively assumes EscapeForHTML.
var escapeASCII = [...]uint8{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // escape '"' and '&'
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, // escape '<' and '>'
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // escape '\\'
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}
// NeedEscape reports whether src needs escaping of any characters.
// It conservatively assumes EscapeForHTML and EscapeForJS.
// It reports true for inputs with invalid UTF-8.
func NeedEscape[Bytes ~[]byte | ~string](src Bytes) bool {
var i int
for uint(len(src)) > uint(i) {
if c := src[i]; c < utf8.RuneSelf {
if escapeASCII[c] > 0 {
return true
}
i++
} else {
r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[i:])))
if r == utf8.RuneError || r == '\u2028' || r == '\u2029' {
return true
}
i += rn
}
}
return false
}
// AppendQuote appends src to dst as a JSON string per RFC 7159, section 7.
//
// It takes in flags and respects the following:
// - EscapeForHTML escapes '<', '>', and '&'.
// - EscapeForJS escapes '\u2028' and '\u2029'.
// - AllowInvalidUTF8 avoids reporting an error for invalid UTF-8.
//
// Regardless of whether AllowInvalidUTF8 is specified,
// invalid bytes are replaced with the Unicode replacement character ('\ufffd').
// If no escape flags are set, then the shortest representable form is used,
// which is also the canonical form for strings (RFC 8785, section 3.2.2.2).
func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflags.Flags) ([]byte, error) {
var i, n int
var hasInvalidUTF8 bool
dst = slices.Grow(dst, len(`"`)+len(src)+len(`"`))
dst = append(dst, '"')
for uint(len(src)) > uint(n) {
if c := src[n]; c < utf8.RuneSelf {
// Handle single-byte ASCII.
n++
if escapeASCII[c] == 0 {
continue // no escaping possibly needed
}
// Handle escaping of single-byte ASCII.
if !(c == '<' || c == '>' || c == '&') || flags.Get(jsonflags.EscapeForHTML) {
dst = append(dst, src[i:n-1]...)
dst = appendEscapedASCII(dst, c)
i = n
}
} else {
// Handle multi-byte Unicode.
r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:])))
n += rn
if r != utf8.RuneError && r != '\u2028' && r != '\u2029' {
continue // no escaping possibly needed
}
// Handle escaping of multi-byte Unicode.
switch {
case isInvalidUTF8(r, rn):
hasInvalidUTF8 = true
dst = append(dst, src[i:n-rn]...)
dst = append(dst, "\ufffd"...)
i = n
case (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS):
dst = append(dst, src[i:n-rn]...)
dst = appendEscapedUnicode(dst, r)
i = n
}
}
}
dst = append(dst, src[i:n]...)
dst = append(dst, '"')
if hasInvalidUTF8 && !flags.Get(jsonflags.AllowInvalidUTF8) {
return dst, ErrInvalidUTF8
}
return dst, nil
}
func appendEscapedASCII(dst []byte, c byte) []byte {
switch c {
case '"', '\\':
dst = append(dst, '\\', c)
case '\b':
dst = append(dst, "\\b"...)
case '\f':
dst = append(dst, "\\f"...)
case '\n':
dst = append(dst, "\\n"...)
case '\r':
dst = append(dst, "\\r"...)
case '\t':
dst = append(dst, "\\t"...)
default:
dst = appendEscapedUTF16(dst, uint16(c))
}
return dst
}
func appendEscapedUnicode(dst []byte, r rune) []byte {
if r1, r2 := utf16.EncodeRune(r); r1 != '\ufffd' && r2 != '\ufffd' {
dst = appendEscapedUTF16(dst, uint16(r1))
dst = appendEscapedUTF16(dst, uint16(r2))
} else {
dst = appendEscapedUTF16(dst, uint16(r))
}
return dst
}
func appendEscapedUTF16(dst []byte, x uint16) []byte {
const hex = "0123456789abcdef"
return append(dst, '\\', 'u', hex[(x>>12)&0xf], hex[(x>>8)&0xf], hex[(x>>4)&0xf], hex[(x>>0)&0xf])
}
// ReformatString consumes a JSON string from src and appends it to dst,
// reformatting it if necessary according to the specified flags.
// It returns the appended output and the number of consumed input bytes.
func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
// TODO: Should this update ValueFlags as input?
var valFlags ValueFlags
n, err := ConsumeString(&valFlags, src, !flags.Get(jsonflags.AllowInvalidUTF8))
if err != nil {
return dst, n, err
}
// If the output requires no special escapes, and the input
// is already in canonical form or should be preserved verbatim,
// then directly copy the input to the output.
if !flags.Get(jsonflags.AnyEscape) &&
(valFlags.IsCanonical() || flags.Get(jsonflags.PreserveRawStrings)) {
dst = append(dst, src[:n]...) // copy the string verbatim
return dst, n, nil
}
// Under [jsonflags.PreserveRawStrings], any pre-escaped sequences
// remain escaped, however we still need to respect the
// [jsonflags.EscapeForHTML] and [jsonflags.EscapeForJS] options.
if flags.Get(jsonflags.PreserveRawStrings) {
var i, lastAppendIndex int
for i < n {
if c := src[i]; c < utf8.RuneSelf {
if (c == '<' || c == '>' || c == '&') && flags.Get(jsonflags.EscapeForHTML) {
dst = append(dst, src[lastAppendIndex:i]...)
dst = appendEscapedASCII(dst, c)
lastAppendIndex = i + 1
}
i++
} else {
r, rn := utf8.DecodeRune(truncateMaxUTF8(src[i:]))
if (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS) {
dst = append(dst, src[lastAppendIndex:i]...)
dst = appendEscapedUnicode(dst, r)
lastAppendIndex = i + rn
}
i += rn
}
}
return append(dst, src[lastAppendIndex:n]...), n, nil
}
// The input contains characters that might need escaping,
// unnecessary escape sequences, or invalid UTF-8.
// Perform a round-trip unquote and quote to properly reformat
// these sequences according the current flags.
b, _ := AppendUnquote(nil, src[:n])
dst, _ = AppendQuote(dst, b, flags)
return dst, n, nil
}
// AppendFloat appends src to dst as a JSON number per RFC 7159, section 6.
// It formats numbers similar to the ES6 number-to-string conversion.
// See https://go.dev/issue/14135.
//
// The output is identical to ECMA-262, 6th edition, section 7.1.12.1 and with
// RFC 8785, section 3.2.2.3 for 64-bit floating-point numbers except for -0,
// which is formatted as -0 instead of just 0.
//
// For 32-bit floating-point numbers,
// the output is a 32-bit equivalent of the algorithm.
// Note that ECMA-262 specifies no algorithm for 32-bit numbers.
func AppendFloat(dst []byte, src float64, bits int) []byte {
if bits == 32 {
src = float64(float32(src))
}
abs := math.Abs(src)
fmt := byte('f')
if abs != 0 {
if bits == 64 && (float64(abs) < 1e-6 || float64(abs) >= 1e21) ||
bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
fmt = 'e'
}
}
dst = strconv.AppendFloat(dst, src, fmt, -1, bits)
if fmt == 'e' {
// Clean up e-09 to e-9.
n := len(dst)
if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' {
dst[n-2] = dst[n-1]
dst = dst[:n-1]
}
}
return dst
}
// ReformatNumber consumes a JSON string from src and appends it to dst,
// canonicalizing it if specified.
// It returns the appended output and the number of consumed input bytes.
func ReformatNumber(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
n, err := ConsumeNumber(src)
if err != nil {
return dst, n, err
}
if !flags.Get(jsonflags.CanonicalizeNumbers) {
dst = append(dst, src[:n]...) // copy the number verbatim
return dst, n, nil
}
// Identify the kind of number.
var isFloat bool
for _, c := range src[:n] {
if c == '.' || c == 'e' || c == 'E' {
isFloat = true // has fraction or exponent
break
}
}
// Check if need to canonicalize this kind of number.
switch {
case string(src[:n]) == "-0":
break // canonicalize -0 as 0 regardless of kind
case isFloat:
if !flags.Get(jsonflags.CanonicalizeRawFloats) {
dst = append(dst, src[:n]...) // copy the number verbatim
return dst, n, nil
}
default:
// As an optimization, we can copy integer numbers below 2⁵³ verbatim
// since the canonical form is always identical.
const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10))
if !flags.Get(jsonflags.CanonicalizeRawInts) || n < maxExactIntegerDigits {
dst = append(dst, src[:n]...) // copy the number verbatim
return dst, n, nil
}
}
// Parse and reformat the number (which uses a canonical format).
fv, _ := strconv.ParseFloat(string(src[:n]), 64)
switch {
case fv == 0:
fv = 0 // normalize negative zero as just zero
case math.IsInf(fv, +1):
fv = +math.MaxFloat64
case math.IsInf(fv, -1):
fv = -math.MaxFloat64
}
return AppendFloat(dst, fv, 64), n, nil
}

View File

@@ -1,217 +0,0 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// Package jsonwire implements stateless functionality for handling JSON text.
package jsonwire
import (
"cmp"
"errors"
"strconv"
"strings"
"unicode"
"unicode/utf16"
"unicode/utf8"
)
// TrimSuffixWhitespace trims JSON from the end of b.
func TrimSuffixWhitespace(b []byte) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
n := len(b) - 1
for n >= 0 && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
n--
}
return b[:n+1]
}
// TrimSuffixString trims a valid JSON string at the end of b.
// The behavior is undefined if there is not a valid JSON string present.
func TrimSuffixString(b []byte) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if len(b) > 0 && b[len(b)-1] == '"' {
b = b[:len(b)-1]
}
for len(b) >= 2 && !(b[len(b)-1] == '"' && b[len(b)-2] != '\\') {
b = b[:len(b)-1] // trim all characters except an unescaped quote
}
if len(b) > 0 && b[len(b)-1] == '"' {
b = b[:len(b)-1]
}
return b
}
// HasSuffixByte reports whether b ends with c.
func HasSuffixByte(b []byte, c byte) bool {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
return len(b) > 0 && b[len(b)-1] == c
}
// TrimSuffixByte removes c from the end of b if it is present.
func TrimSuffixByte(b []byte, c byte) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if len(b) > 0 && b[len(b)-1] == c {
return b[:len(b)-1]
}
return b
}
// QuoteRune quotes the first rune in the input.
func QuoteRune[Bytes ~[]byte | ~string](b Bytes) string {
r, n := utf8.DecodeRuneInString(string(truncateMaxUTF8(b)))
if r == utf8.RuneError && n == 1 {
return `'\x` + strconv.FormatUint(uint64(b[0]), 16) + `'`
}
return strconv.QuoteRune(r)
}
// CompareUTF16 lexicographically compares x to y according
// to the UTF-16 codepoints of the UTF-8 encoded input strings.
// This implements the ordering specified in RFC 8785, section 3.2.3.
func CompareUTF16[Bytes ~[]byte | ~string](x, y Bytes) int {
// NOTE: This is an optimized, mostly allocation-free implementation
// of CompareUTF16Simple in wire_test.go. FuzzCompareUTF16 verifies that the
// two implementations agree on the result of comparing any two strings.
isUTF16Self := func(r rune) bool {
return ('\u0000' <= r && r <= '\uD7FF') || ('\uE000' <= r && r <= '\uFFFF')
}
for {
if len(x) == 0 || len(y) == 0 {
return cmp.Compare(len(x), len(y))
}
// ASCII fast-path.
if x[0] < utf8.RuneSelf || y[0] < utf8.RuneSelf {
if x[0] != y[0] {
return cmp.Compare(x[0], y[0])
}
x, y = x[1:], y[1:]
continue
}
// Decode next pair of runes as UTF-8.
rx, nx := utf8.DecodeRuneInString(string(truncateMaxUTF8(x)))
ry, ny := utf8.DecodeRuneInString(string(truncateMaxUTF8(y)))
selfx := isUTF16Self(rx)
selfy := isUTF16Self(ry)
switch {
// The x rune is a single UTF-16 codepoint, while
// the y rune is a surrogate pair of UTF-16 codepoints.
case selfx && !selfy:
ry, _ = utf16.EncodeRune(ry)
// The y rune is a single UTF-16 codepoint, while
// the x rune is a surrogate pair of UTF-16 codepoints.
case selfy && !selfx:
rx, _ = utf16.EncodeRune(rx)
}
if rx != ry {
return cmp.Compare(rx, ry)
}
// Check for invalid UTF-8, in which case,
// we just perform a byte-for-byte comparison.
if isInvalidUTF8(rx, nx) || isInvalidUTF8(ry, ny) {
if x[0] != y[0] {
return cmp.Compare(x[0], y[0])
}
}
x, y = x[nx:], y[ny:]
}
}
// truncateMaxUTF8 truncates b such it contains at least one rune.
//
// The utf8 package currently lacks generic variants, which complicates
// generic functions that operates on either []byte or string.
// As a hack, we always call the utf8 function operating on strings,
// but always truncate the input such that the result is identical.
//
// Example usage:
//
// utf8.DecodeRuneInString(string(truncateMaxUTF8(b)))
//
// Converting a []byte to a string is stack allocated since
// truncateMaxUTF8 guarantees that the []byte is short.
func truncateMaxUTF8[Bytes ~[]byte | ~string](b Bytes) Bytes {
// TODO(https://go.dev/issue/56948): Remove this function and
// instead directly call generic utf8 functions wherever used.
if len(b) > utf8.UTFMax {
return b[:utf8.UTFMax]
}
return b
}
// TODO(https://go.dev/issue/70547): Use utf8.ErrInvalid instead.
var ErrInvalidUTF8 = errors.New("invalid UTF-8")
func NewInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) error {
what := QuoteRune(prefix)
return errors.New("invalid character " + what + " " + where)
}
func NewInvalidEscapeSequenceError[Bytes ~[]byte | ~string](what Bytes) error {
label := "escape sequence"
if len(what) > 6 {
label = "surrogate pair"
}
needEscape := strings.IndexFunc(string(what), func(r rune) bool {
return r == '`' || r == utf8.RuneError || unicode.IsSpace(r) || !unicode.IsPrint(r)
}) >= 0
if needEscape {
return errors.New("invalid " + label + " " + strconv.Quote(string(what)) + " in string")
} else {
return errors.New("invalid " + label + " `" + string(what) + "` in string")
}
}
// TruncatePointer optionally truncates the JSON pointer,
// enforcing that the length roughly does not exceed n.
func TruncatePointer(s string, n int) string {
if len(s) <= n {
return s
}
i := n / 2
j := len(s) - n/2
// Avoid truncating a name if there are multiple names present.
if k := strings.LastIndexByte(s[:i], '/'); k > 0 {
i = k
}
if k := strings.IndexByte(s[j:], '/'); k >= 0 {
j += k + len("/")
}
// Avoid truncation in the middle of a UTF-8 rune.
for i > 0 && isInvalidUTF8(utf8.DecodeLastRuneInString(s[:i])) {
i--
}
for j < len(s) && isInvalidUTF8(utf8.DecodeRuneInString(s[j:])) {
j++
}
// Determine the right middle fragment to use.
var middle string
switch strings.Count(s[i:j], "/") {
case 0:
middle = "…"
case 1:
middle = "…/…"
default:
middle = "…/…/…"
}
if strings.HasPrefix(s[i:j], "/") && middle != "…" {
middle = strings.TrimPrefix(middle, "…")
}
if strings.HasSuffix(s[i:j], "/") && middle != "…" {
middle = strings.TrimSuffix(middle, "…")
}
return s[:i] + middle + s[j:]
}
func isInvalidUTF8(r rune, rn int) bool {
return r == utf8.RuneError && rn == 1
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,116 +0,0 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// Package jsontext implements syntactic processing of JSON
// as specified in RFC 4627, RFC 7159, RFC 7493, RFC 8259, and RFC 8785.
// JSON is a simple data interchange format that can represent
// primitive data types such as booleans, strings, and numbers,
// in addition to structured data types such as objects and arrays.
//
// This package (encoding/json/jsontext) is experimental,
// and not subject to the Go 1 compatibility promise.
// It only exists when building with the GOEXPERIMENT=jsonv2 environment variable set.
// Most users should use [encoding/json].
//
// The [Encoder] and [Decoder] types are used to encode or decode
// a stream of JSON tokens or values.
//
// # Tokens and Values
//
// A JSON token refers to the basic structural elements of JSON:
//
// - a JSON literal (i.e., null, true, or false)
// - a JSON string (e.g., "hello, world!")
// - a JSON number (e.g., 123.456)
// - a begin or end delimiter for a JSON object (i.e., '{' or '}')
// - a begin or end delimiter for a JSON array (i.e., '[' or ']')
//
// A JSON token is represented by the [Token] type in Go. Technically,
// there are two additional structural characters (i.e., ':' and ','),
// but there is no [Token] representation for them since their presence
// can be inferred by the structure of the JSON grammar itself.
// For example, there must always be an implicit colon between
// the name and value of a JSON object member.
//
// A JSON value refers to a complete unit of JSON data:
//
// - a JSON literal, string, or number
// - a JSON object (e.g., `{"name":"value"}`)
// - a JSON array (e.g., `[1,2,3,]`)
//
// A JSON value is represented by the [Value] type in Go and is a []byte
// containing the raw textual representation of the value. There is some overlap
// between tokens and values as both contain literals, strings, and numbers.
// However, only a value can represent the entirety of a JSON object or array.
//
// The [Encoder] and [Decoder] types contain methods to read or write the next
// [Token] or [Value] in a sequence. They maintain a state machine to validate
// whether the sequence of JSON tokens and/or values produces a valid JSON.
// [Options] may be passed to the [NewEncoder] or [NewDecoder] constructors
// to configure the syntactic behavior of encoding and decoding.
//
// # Terminology
//
// The terms "encode" and "decode" are used for syntactic functionality
// that is concerned with processing JSON based on its grammar, and
// the terms "marshal" and "unmarshal" are used for semantic functionality
// that determines the meaning of JSON values as Go values and vice-versa.
// This package (i.e., [jsontext]) deals with JSON at a syntactic layer,
// while [encoding/json/v2] deals with JSON at a semantic layer.
// The goal is to provide a clear distinction between functionality that
// is purely concerned with encoding versus that of marshaling.
// For example, one can directly encode a stream of JSON tokens without
// needing to marshal a concrete Go value representing them.
// Similarly, one can decode a stream of JSON tokens without
// needing to unmarshal them into a concrete Go value.
//
// This package uses JSON terminology when discussing JSON, which may differ
// from related concepts in Go or elsewhere in computing literature.
//
// - a JSON "object" refers to an unordered collection of name/value members.
// - a JSON "array" refers to an ordered sequence of elements.
// - a JSON "value" refers to either a literal (i.e., null, false, or true),
// string, number, object, or array.
//
// See RFC 8259 for more information.
//
// # Specifications
//
// Relevant specifications include RFC 4627, RFC 7159, RFC 7493, RFC 8259,
// and RFC 8785. Each RFC is generally a stricter subset of another RFC.
// In increasing order of strictness:
//
// - RFC 4627 and RFC 7159 do not require (but recommend) the use of UTF-8
// and also do not require (but recommend) that object names be unique.
// - RFC 8259 requires the use of UTF-8,
// but does not require (but recommends) that object names be unique.
// - RFC 7493 requires the use of UTF-8
// and also requires that object names be unique.
// - RFC 8785 defines a canonical representation. It requires the use of UTF-8
// and also requires that object names be unique and in a specific ordering.
// It specifies exactly how strings and numbers must be formatted.
//
// The primary difference between RFC 4627 and RFC 7159 is that the former
// restricted top-level values to only JSON objects and arrays, while
// RFC 7159 and subsequent RFCs permit top-level values to additionally be
// JSON nulls, booleans, strings, or numbers.
//
// By default, this package operates on RFC 7493, but can be configured
// to operate according to the other RFC specifications.
// RFC 7493 is a stricter subset of RFC 8259 and fully compliant with it.
// In particular, it makes specific choices about behavior that RFC 8259
// leaves as undefined in order to ensure greater interoperability.
//
// # Security Considerations
//
// See the "Security Considerations" section in [encoding/json/v2].
package jsontext
// requireKeyedLiterals can be embedded in a struct to require keyed literals.
type requireKeyedLiterals struct{}
// nonComparable can be embedded in a struct to prevent comparability.
type nonComparable [0]func()

View File

@@ -1,972 +0,0 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"io"
"math/bits"
"next.orly.dev/pkg/json/internal/jsonflags"
"next.orly.dev/pkg/json/internal/jsonopts"
"next.orly.dev/pkg/json/internal/jsonwire"
)
// Encoder is a streaming encoder from raw JSON tokens and values.
// It is used to write a stream of top-level JSON values,
// each terminated with a newline character.
//
// [Encoder.WriteToken] and [Encoder.WriteValue] calls may be interleaved.
// For example, the following JSON value:
//
// {"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}}
//
// can be composed with the following calls (ignoring errors for brevity):
//
// e.WriteToken(BeginObject) // {
// e.WriteToken(String("name")) // "name"
// e.WriteToken(String("value")) // "value"
// e.WriteValue(Value(`"array"`)) // "array"
// e.WriteToken(BeginArray) // [
// e.WriteToken(Null) // null
// e.WriteToken(False) // false
// e.WriteValue(Value("true")) // true
// e.WriteToken(Float(3.14159)) // 3.14159
// e.WriteToken(EndArray) // ]
// e.WriteValue(Value(`"object"`)) // "object"
// e.WriteValue(Value(`{"k":"v"}`)) // {"k":"v"}
// e.WriteToken(EndObject) // }
//
// The above is one of many possible sequence of calls and
// may not represent the most sensible method to call for any given token/value.
// For example, it is probably more common to call [Encoder.WriteToken] with a string
// for object names.
type Encoder struct {
s encoderState
}
// encoderState is the low-level state of Encoder.
// It has exported fields and method for use by the "json" package.
type encoderState struct {
state
encodeBuffer
jsonopts.Struct
SeenPointers map[any]struct{} // only used when marshaling; identical to json.seenPointers
}
// encodeBuffer is a buffer split into 2 segments:
//
// - buf[0:len(buf)] // written (but unflushed) portion of the buffer
// - buf[len(buf):cap(buf)] // unused portion of the buffer
type encodeBuffer struct {
Buf []byte // may alias wr if it is a bytes.Buffer
// baseOffset is added to len(buf) to obtain the absolute offset
// relative to the start of io.Writer stream.
baseOffset int64
wr io.Writer
// maxValue is the approximate maximum Value size passed to WriteValue.
maxValue int
// availBuffer is the buffer returned by the AvailableBuffer method.
availBuffer []byte // always has zero length
// bufStats is statistics about buffer utilization.
// It is only used with pooled encoders in pools.go.
bufStats bufferStatistics
}
// NewEncoder constructs a new streaming encoder writing to w
// configured with the provided options.
// It flushes the internal buffer when the buffer is sufficiently full or
// when a top-level value has been written.
//
// If w is a [bytes.Buffer], then the encoder appends directly into the buffer
// without copying the contents from an intermediate buffer.
func NewEncoder(w io.Writer, opts ...Options) *Encoder {
e := new(Encoder)
e.Reset(w, opts...)
return e
}
// Reset resets an encoder such that it is writing afresh to w and
// configured with the provided options. Reset must not be called on
// a Encoder passed to the [encoding/json/v2.MarshalerTo.MarshalJSONTo] method
// or the [encoding/json/v2.MarshalToFunc] function.
func (e *Encoder) Reset(w io.Writer, opts ...Options) {
switch {
case e == nil:
panic("jsontext: invalid nil Encoder")
case w == nil:
panic("jsontext: invalid nil io.Writer")
case e.s.Flags.Get(jsonflags.WithinArshalCall):
panic("jsontext: cannot reset Encoder passed to json.MarshalerTo")
}
e.s.reset(nil, w, opts...)
}
func (e *encoderState) reset(b []byte, w io.Writer, opts ...Options) {
e.state.reset()
e.encodeBuffer = encodeBuffer{Buf: b, wr: w, bufStats: e.bufStats}
if bb, ok := w.(*bytes.Buffer); ok && bb != nil {
e.Buf = bb.AvailableBuffer() // alias the unused buffer of bb
}
opts2 := jsonopts.Struct{} // avoid mutating e.Struct in case it is part of opts
opts2.Join(opts...)
e.Struct = opts2
if e.Flags.Get(jsonflags.Multiline) {
if !e.Flags.Has(jsonflags.SpaceAfterColon) {
e.Flags.Set(jsonflags.SpaceAfterColon | 1)
}
if !e.Flags.Has(jsonflags.SpaceAfterComma) {
e.Flags.Set(jsonflags.SpaceAfterComma | 0)
}
if !e.Flags.Has(jsonflags.Indent) {
e.Flags.Set(jsonflags.Indent | 1)
e.Indent = "\t"
}
}
}
// Options returns the options used to construct the decoder and
// may additionally contain semantic options passed to a
// [encoding/json/v2.MarshalEncode] call.
//
// If operating within
// a [encoding/json/v2.MarshalerTo.MarshalJSONTo] method call or
// a [encoding/json/v2.MarshalToFunc] function call,
// then the returned options are only valid within the call.
func (e *Encoder) Options() Options {
return &e.s.Struct
}
// NeedFlush determines whether to flush at this point.
func (e *encoderState) NeedFlush() bool {
// NOTE: This function is carefully written to be inlinable.
// Avoid flushing if e.wr is nil since there is no underlying writer.
// Flush if less than 25% of the capacity remains.
// Flushing at some constant fraction ensures that the buffer stops growing
// so long as the largest Token or Value fits within that unused capacity.
return e.wr != nil && (e.Tokens.Depth() == 1 || len(e.Buf) > 3*cap(e.Buf)/4)
}
// Flush flushes the buffer to the underlying io.Writer.
// It may append a trailing newline after the top-level value.
func (e *encoderState) Flush() error {
if e.wr == nil || e.avoidFlush() {
return nil
}
// In streaming mode, always emit a newline after the top-level value.
if e.Tokens.Depth() == 1 && !e.Flags.Get(jsonflags.OmitTopLevelNewline) {
e.Buf = append(e.Buf, '\n')
}
// Inform objectNameStack that we are about to flush the buffer content.
e.Names.copyQuotedBuffer(e.Buf)
// Specialize bytes.Buffer for better performance.
if bb, ok := e.wr.(*bytes.Buffer); ok {
// If e.buf already aliases the internal buffer of bb,
// then the Write call simply increments the internal offset,
// otherwise Write operates as expected.
// See https://go.dev/issue/42986.
n, _ := bb.Write(e.Buf) // never fails unless bb is nil
e.baseOffset += int64(n)
// If the internal buffer of bytes.Buffer is too small,
// append operations elsewhere in the Encoder may grow the buffer.
// This would be semantically correct, but hurts performance.
// As such, ensure 25% of the current length is always available
// to reduce the probability that other appends must allocate.
if avail := bb.Available(); avail < bb.Len()/4 {
bb.Grow(avail + 1)
}
e.Buf = bb.AvailableBuffer()
return nil
}
// Flush the internal buffer to the underlying io.Writer.
n, err := e.wr.Write(e.Buf)
e.baseOffset += int64(n)
if err != nil {
// In the event of an error, preserve the unflushed portion.
// Thus, write errors aren't fatal so long as the io.Writer
// maintains consistent state after errors.
if n > 0 {
e.Buf = e.Buf[:copy(e.Buf, e.Buf[n:])]
}
return &ioError{action: "write", err: err}
}
e.Buf = e.Buf[:0]
// Check whether to grow the buffer.
// Note that cap(e.buf) may already exceed maxBufferSize since
// an append elsewhere already grew it to store a large token.
const maxBufferSize = 4 << 10
const growthSizeFactor = 2 // higher value is faster
const growthRateFactor = 2 // higher value is slower
// By default, grow if below the maximum buffer size.
grow := cap(e.Buf) <= maxBufferSize/growthSizeFactor
// Growing can be expensive, so only grow
// if a sufficient number of bytes have been processed.
grow = grow && int64(cap(e.Buf)) < e.previousOffsetEnd()/growthRateFactor
if grow {
e.Buf = make([]byte, 0, cap(e.Buf)*growthSizeFactor)
}
return nil
}
func (d *encodeBuffer) offsetAt(pos int) int64 { return d.baseOffset + int64(pos) }
func (e *encodeBuffer) previousOffsetEnd() int64 { return e.baseOffset + int64(len(e.Buf)) }
func (e *encodeBuffer) unflushedBuffer() []byte { return e.Buf }
// avoidFlush indicates whether to avoid flushing to ensure there is always
// enough in the buffer to unwrite the last object member if it were empty.
func (e *encoderState) avoidFlush() bool {
switch {
case e.Tokens.Last.Length() == 0:
// Never flush after BeginObject or BeginArray since we don't know yet
// if the object or array will end up being empty.
return true
case e.Tokens.Last.needObjectValue():
// Never flush before the object value since we don't know yet
// if the object value will end up being empty.
return true
case e.Tokens.Last.NeedObjectName() && len(e.Buf) >= 2:
// Never flush after the object value if it does turn out to be empty.
switch string(e.Buf[len(e.Buf)-2:]) {
case `ll`, `""`, `{}`, `[]`: // last two bytes of every empty value
return true
}
}
return false
}
// UnwriteEmptyObjectMember unwrites the last object member if it is empty
// and reports whether it performed an unwrite operation.
func (e *encoderState) UnwriteEmptyObjectMember(prevName *string) bool {
if last := e.Tokens.Last; !last.isObject() || !last.NeedObjectName() || last.Length() == 0 {
panic("BUG: must be called on an object after writing a value")
}
// The flushing logic is modified to never flush a trailing empty value.
// The encoder never writes trailing whitespace eagerly.
b := e.unflushedBuffer()
// Detect whether the last value was empty.
var n int
if len(b) >= 3 {
switch string(b[len(b)-2:]) {
case "ll": // last two bytes of `null`
n = len(`null`)
case `""`:
// It is possible for a non-empty string to have `""` as a suffix
// if the second to the last quote was escaped.
if b[len(b)-3] == '\\' {
return false // e.g., `"\""` is not empty
}
n = len(`""`)
case `{}`:
n = len(`{}`)
case `[]`:
n = len(`[]`)
}
}
if n == 0 {
return false
}
// Unwrite the value, whitespace, colon, name, whitespace, and comma.
b = b[:len(b)-n]
b = jsonwire.TrimSuffixWhitespace(b)
b = jsonwire.TrimSuffixByte(b, ':')
b = jsonwire.TrimSuffixString(b)
b = jsonwire.TrimSuffixWhitespace(b)
b = jsonwire.TrimSuffixByte(b, ',')
e.Buf = b // store back truncated unflushed buffer
// Undo state changes.
e.Tokens.Last.decrement() // for object member value
e.Tokens.Last.decrement() // for object member name
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if e.Tokens.Last.isActiveNamespace() {
e.Namespaces.Last().removeLast()
}
}
e.Names.clearLast()
if prevName != nil {
e.Names.copyQuotedBuffer(e.Buf) // required by objectNameStack.replaceLastUnquotedName
e.Names.replaceLastUnquotedName(*prevName)
}
return true
}
// UnwriteOnlyObjectMemberName unwrites the only object member name
// and returns the unquoted name.
func (e *encoderState) UnwriteOnlyObjectMemberName() string {
if last := e.Tokens.Last; !last.isObject() || last.Length() != 1 {
panic("BUG: must be called on an object after writing first name")
}
// Unwrite the name and whitespace.
b := jsonwire.TrimSuffixString(e.Buf)
isVerbatim := bytes.IndexByte(e.Buf[len(b):], '\\') < 0
name := string(jsonwire.UnquoteMayCopy(e.Buf[len(b):], isVerbatim))
e.Buf = jsonwire.TrimSuffixWhitespace(b)
// Undo state changes.
e.Tokens.Last.decrement()
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if e.Tokens.Last.isActiveNamespace() {
e.Namespaces.Last().removeLast()
}
}
e.Names.clearLast()
return name
}
// WriteToken writes the next token and advances the internal write offset.
//
// The provided token kind must be consistent with the JSON grammar.
// For example, it is an error to provide a number when the encoder
// is expecting an object name (which is always a string), or
// to provide an end object delimiter when the encoder is finishing an array.
// If the provided token is invalid, then it reports a [SyntacticError] and
// the internal state remains unchanged. The offset reported
// in [SyntacticError] will be relative to the [Encoder.OutputOffset].
func (e *Encoder) WriteToken(t Token) error {
return e.s.WriteToken(t)
}
func (e *encoderState) WriteToken(t Token) error {
k := t.Kind()
b := e.Buf // use local variable to avoid mutating e in case of error
// Append any delimiters or optional whitespace.
b = e.Tokens.MayAppendDelim(b, k)
if e.Flags.Get(jsonflags.AnyWhitespace) {
b = e.appendWhitespace(b, k)
}
pos := len(b) // offset before the token
// Append the token to the output and to the state machine.
var err error
switch k {
case 'n':
b = append(b, "null"...)
err = e.Tokens.appendLiteral()
case 'f':
b = append(b, "false"...)
err = e.Tokens.appendLiteral()
case 't':
b = append(b, "true"...)
err = e.Tokens.appendLiteral()
case '"':
if b, err = t.appendString(b, &e.Flags); err != nil {
break
}
if e.Tokens.Last.NeedObjectName() {
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if !e.Tokens.Last.isValidNamespace() {
err = errInvalidNamespace
break
}
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) {
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
break
}
}
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
}
err = e.Tokens.appendString()
case '0':
if b, err = t.appendNumber(b, &e.Flags); err != nil {
break
}
err = e.Tokens.appendNumber()
case '{':
b = append(b, '{')
if err = e.Tokens.pushObject(); err != nil {
break
}
e.Names.push()
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
e.Namespaces.push()
}
case '}':
b = append(b, '}')
if err = e.Tokens.popObject(); err != nil {
break
}
e.Names.pop()
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
e.Namespaces.pop()
}
case '[':
b = append(b, '[')
err = e.Tokens.pushArray()
case ']':
b = append(b, ']')
err = e.Tokens.popArray()
default:
err = errInvalidToken
}
if err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
// Finish off the buffer and store it back into e.
e.Buf = b
if e.NeedFlush() {
return e.Flush()
}
return nil
}
// AppendRaw appends either a raw string (without double quotes) or number.
// Specify safeASCII if the string output is guaranteed to be ASCII
// without any characters (including '<', '>', and '&') that need escaping,
// otherwise this will validate whether the string needs escaping.
// The appended bytes for a JSON number must be valid.
//
// This is a specialized implementation of Encoder.WriteValue
// that allows appending directly into the buffer.
// It is only called from marshal logic in the "json" package.
func (e *encoderState) AppendRaw(k Kind, safeASCII bool, appendFn func([]byte) ([]byte, error)) error {
b := e.Buf // use local variable to avoid mutating e in case of error
// Append any delimiters or optional whitespace.
b = e.Tokens.MayAppendDelim(b, k)
if e.Flags.Get(jsonflags.AnyWhitespace) {
b = e.appendWhitespace(b, k)
}
pos := len(b) // offset before the token
var err error
switch k {
case '"':
// Append directly into the encoder buffer by assuming that
// most of the time none of the characters need escaping.
b = append(b, '"')
if b, err = appendFn(b); err != nil {
return err
}
b = append(b, '"')
// Check whether we need to escape the string and if necessary
// copy it to a scratch buffer and then escape it back.
isVerbatim := safeASCII || !jsonwire.NeedEscape(b[pos+len(`"`):len(b)-len(`"`)])
if !isVerbatim {
var err error
b2 := append(e.availBuffer, b[pos+len(`"`):len(b)-len(`"`)]...)
b, err = jsonwire.AppendQuote(b[:pos], string(b2), &e.Flags)
e.availBuffer = b2[:0]
if err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
}
// Update the state machine.
if e.Tokens.Last.NeedObjectName() {
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if !e.Tokens.Last.isValidNamespace() {
return wrapSyntacticError(e, err, pos, +1)
}
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], isVerbatim) {
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
return wrapSyntacticError(e, err, pos, +1)
}
}
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
}
if err := e.Tokens.appendString(); err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
case '0':
if b, err = appendFn(b); err != nil {
return err
}
if err := e.Tokens.appendNumber(); err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
default:
panic("BUG: invalid kind")
}
// Finish off the buffer and store it back into e.
e.Buf = b
if e.NeedFlush() {
return e.Flush()
}
return nil
}
// WriteValue writes the next raw value and advances the internal write offset.
// The Encoder does not simply copy the provided value verbatim, but
// parses it to ensure that it is syntactically valid and reformats it
// according to how the Encoder is configured to format whitespace and strings.
// If [AllowInvalidUTF8] is specified, then any invalid UTF-8 is mangled
// as the Unicode replacement character, U+FFFD.
//
// The provided value kind must be consistent with the JSON grammar
// (see examples on [Encoder.WriteToken]). If the provided value is invalid,
// then it reports a [SyntacticError] and the internal state remains unchanged.
// The offset reported in [SyntacticError] will be relative to the
// [Encoder.OutputOffset] plus the offset into v of any encountered syntax error.
func (e *Encoder) WriteValue(v Value) error {
return e.s.WriteValue(v)
}
func (e *encoderState) WriteValue(v Value) error {
e.maxValue |= len(v) // bitwise OR is a fast approximation of max
k := v.Kind()
b := e.Buf // use local variable to avoid mutating e in case of error
// Append any delimiters or optional whitespace.
b = e.Tokens.MayAppendDelim(b, k)
if e.Flags.Get(jsonflags.AnyWhitespace) {
b = e.appendWhitespace(b, k)
}
pos := len(b) // offset before the value
// Append the value the output.
var n int
n += jsonwire.ConsumeWhitespace(v[n:])
b, m, err := e.reformatValue(b, v[n:], e.Tokens.Depth())
if err != nil {
return wrapSyntacticError(e, err, pos+n+m, +1)
}
n += m
n += jsonwire.ConsumeWhitespace(v[n:])
if len(v) > n {
err = jsonwire.NewInvalidCharacterError(v[n:], "after top-level value")
return wrapSyntacticError(e, err, pos+n, 0)
}
// Append the kind to the state machine.
switch k {
case 'n', 'f', 't':
err = e.Tokens.appendLiteral()
case '"':
if e.Tokens.Last.NeedObjectName() {
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if !e.Tokens.Last.isValidNamespace() {
err = errInvalidNamespace
break
}
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) {
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
break
}
}
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
}
err = e.Tokens.appendString()
case '0':
err = e.Tokens.appendNumber()
case '{':
if err = e.Tokens.pushObject(); err != nil {
break
}
if err = e.Tokens.popObject(); err != nil {
panic("BUG: popObject should never fail immediately after pushObject: " + err.Error())
}
if e.Flags.Get(jsonflags.ReorderRawObjects) {
mustReorderObjects(b[pos:])
}
case '[':
if err = e.Tokens.pushArray(); err != nil {
break
}
if err = e.Tokens.popArray(); err != nil {
panic("BUG: popArray should never fail immediately after pushArray: " + err.Error())
}
if e.Flags.Get(jsonflags.ReorderRawObjects) {
mustReorderObjects(b[pos:])
}
}
if err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
// Finish off the buffer and store it back into e.
e.Buf = b
if e.NeedFlush() {
return e.Flush()
}
return nil
}
// CountNextDelimWhitespace counts the number of bytes of delimiter and
// whitespace bytes assuming the upcoming token is a JSON value.
// This method is used for error reporting at the semantic layer.
func (e *encoderState) CountNextDelimWhitespace() (n int) {
const next = Kind('"') // arbitrary kind as next JSON value
delim := e.Tokens.needDelim(next)
if delim > 0 {
n += len(",") | len(":")
}
if delim == ':' {
if e.Flags.Get(jsonflags.SpaceAfterColon) {
n += len(" ")
}
} else {
if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) {
n += len(" ")
}
if e.Flags.Get(jsonflags.Multiline) {
if m := e.Tokens.NeedIndent(next); m > 0 {
n += len("\n") + len(e.IndentPrefix) + (m-1)*len(e.Indent)
}
}
}
return n
}
// appendWhitespace appends whitespace that immediately precedes the next token.
func (e *encoderState) appendWhitespace(b []byte, next Kind) []byte {
if delim := e.Tokens.needDelim(next); delim == ':' {
if e.Flags.Get(jsonflags.SpaceAfterColon) {
b = append(b, ' ')
}
} else {
if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) {
b = append(b, ' ')
}
if e.Flags.Get(jsonflags.Multiline) {
b = e.AppendIndent(b, e.Tokens.NeedIndent(next))
}
}
return b
}
// AppendIndent appends the appropriate number of indentation characters
// for the current nested level, n.
func (e *encoderState) AppendIndent(b []byte, n int) []byte {
if n == 0 {
return b
}
b = append(b, '\n')
b = append(b, e.IndentPrefix...)
for ; n > 1; n-- {
b = append(b, e.Indent...)
}
return b
}
// reformatValue parses a JSON value from the start of src and
// appends it to the end of dst, reformatting whitespace and strings as needed.
// It returns the extended dst buffer and the number of consumed input bytes.
func (e *encoderState) reformatValue(dst []byte, src Value, depth int) ([]byte, int, error) {
// TODO: Should this update ValueFlags as input?
if len(src) == 0 {
return dst, 0, io.ErrUnexpectedEOF
}
switch k := Kind(src[0]).normalize(); k {
case 'n':
if jsonwire.ConsumeNull(src) == 0 {
n, err := jsonwire.ConsumeLiteral(src, "null")
return dst, n, err
}
return append(dst, "null"...), len("null"), nil
case 'f':
if jsonwire.ConsumeFalse(src) == 0 {
n, err := jsonwire.ConsumeLiteral(src, "false")
return dst, n, err
}
return append(dst, "false"...), len("false"), nil
case 't':
if jsonwire.ConsumeTrue(src) == 0 {
n, err := jsonwire.ConsumeLiteral(src, "true")
return dst, n, err
}
return append(dst, "true"...), len("true"), nil
case '"':
if n := jsonwire.ConsumeSimpleString(src); n > 0 {
dst = append(dst, src[:n]...) // copy simple strings verbatim
return dst, n, nil
}
return jsonwire.ReformatString(dst, src, &e.Flags)
case '0':
if n := jsonwire.ConsumeSimpleNumber(src); n > 0 && !e.Flags.Get(jsonflags.CanonicalizeNumbers) {
dst = append(dst, src[:n]...) // copy simple numbers verbatim
return dst, n, nil
}
return jsonwire.ReformatNumber(dst, src, &e.Flags)
case '{':
return e.reformatObject(dst, src, depth)
case '[':
return e.reformatArray(dst, src, depth)
default:
return dst, 0, jsonwire.NewInvalidCharacterError(src, "at start of value")
}
}
// reformatObject parses a JSON object from the start of src and
// appends it to the end of src, reformatting whitespace and strings as needed.
// It returns the extended dst buffer and the number of consumed input bytes.
func (e *encoderState) reformatObject(dst []byte, src Value, depth int) ([]byte, int, error) {
// Append object begin.
if len(src) == 0 || src[0] != '{' {
panic("BUG: reformatObject must be called with a buffer that starts with '{'")
} else if depth == maxNestingDepth+1 {
return dst, 0, errMaxDepth
}
dst = append(dst, '{')
n := len("{")
// Append (possible) object end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
if src[n] == '}' {
dst = append(dst, '}')
n += len("}")
return dst, n, nil
}
var err error
var names *objectNamespace
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
e.Namespaces.push()
defer e.Namespaces.pop()
names = e.Namespaces.Last()
}
depth++
for {
// Append optional newline and indentation.
if e.Flags.Get(jsonflags.Multiline) {
dst = e.AppendIndent(dst, depth)
}
// Append object name.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
m := jsonwire.ConsumeSimpleString(src[n:])
isVerbatim := m > 0
if isVerbatim {
dst = append(dst, src[n:n+m]...)
} else {
dst, m, err = jsonwire.ReformatString(dst, src[n:], &e.Flags)
if err != nil {
return dst, n + m, err
}
}
quotedName := src[n : n+m]
if !e.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(quotedName, isVerbatim) {
return dst, n, wrapWithObjectName(ErrDuplicateName, quotedName)
}
n += m
// Append colon.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, wrapWithObjectName(io.ErrUnexpectedEOF, quotedName)
}
if src[n] != ':' {
err = jsonwire.NewInvalidCharacterError(src[n:], "after object name (expecting ':')")
return dst, n, wrapWithObjectName(err, quotedName)
}
dst = append(dst, ':')
n += len(":")
if e.Flags.Get(jsonflags.SpaceAfterColon) {
dst = append(dst, ' ')
}
// Append object value.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, wrapWithObjectName(io.ErrUnexpectedEOF, quotedName)
}
dst, m, err = e.reformatValue(dst, src[n:], depth)
if err != nil {
return dst, n + m, wrapWithObjectName(err, quotedName)
}
n += m
// Append comma or object end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
switch src[n] {
case ',':
dst = append(dst, ',')
if e.Flags.Get(jsonflags.SpaceAfterComma) {
dst = append(dst, ' ')
}
n += len(",")
continue
case '}':
if e.Flags.Get(jsonflags.Multiline) {
dst = e.AppendIndent(dst, depth-1)
}
dst = append(dst, '}')
n += len("}")
return dst, n, nil
default:
return dst, n, jsonwire.NewInvalidCharacterError(src[n:], "after object value (expecting ',' or '}')")
}
}
}
// reformatArray parses a JSON array from the start of src and
// appends it to the end of dst, reformatting whitespace and strings as needed.
// It returns the extended dst buffer and the number of consumed input bytes.
func (e *encoderState) reformatArray(dst []byte, src Value, depth int) ([]byte, int, error) {
// Append array begin.
if len(src) == 0 || src[0] != '[' {
panic("BUG: reformatArray must be called with a buffer that starts with '['")
} else if depth == maxNestingDepth+1 {
return dst, 0, errMaxDepth
}
dst = append(dst, '[')
n := len("[")
// Append (possible) array end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
if src[n] == ']' {
dst = append(dst, ']')
n += len("]")
return dst, n, nil
}
var idx int64
var err error
depth++
for {
// Append optional newline and indentation.
if e.Flags.Get(jsonflags.Multiline) {
dst = e.AppendIndent(dst, depth)
}
// Append array value.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
var m int
dst, m, err = e.reformatValue(dst, src[n:], depth)
if err != nil {
return dst, n + m, wrapWithArrayIndex(err, idx)
}
n += m
// Append comma or array end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
switch src[n] {
case ',':
dst = append(dst, ',')
if e.Flags.Get(jsonflags.SpaceAfterComma) {
dst = append(dst, ' ')
}
n += len(",")
idx++
continue
case ']':
if e.Flags.Get(jsonflags.Multiline) {
dst = e.AppendIndent(dst, depth-1)
}
dst = append(dst, ']')
n += len("]")
return dst, n, nil
default:
return dst, n, jsonwire.NewInvalidCharacterError(src[n:], "after array value (expecting ',' or ']')")
}
}
}
// OutputOffset returns the current output byte offset. It gives the location
// of the next byte immediately after the most recently written token or value.
// The number of bytes actually written to the underlying [io.Writer] may be less
// than this offset due to internal buffering effects.
func (e *Encoder) OutputOffset() int64 {
return e.s.previousOffsetEnd()
}
// AvailableBuffer returns a zero-length buffer with a possible non-zero capacity.
// This buffer is intended to be used to populate a [Value]
// being passed to an immediately succeeding [Encoder.WriteValue] call.
//
// Example usage:
//
// b := d.AvailableBuffer()
// b = append(b, '"')
// b = appendString(b, v) // append the string formatting of v
// b = append(b, '"')
// ... := d.WriteValue(b)
//
// It is the user's responsibility to ensure that the value is valid JSON.
func (e *Encoder) AvailableBuffer() []byte {
// NOTE: We don't return e.buf[len(e.buf):cap(e.buf)] since WriteValue would
// need to take special care to avoid mangling the data while reformatting.
// WriteValue can't easily identify whether the input Value aliases e.buf
// without using unsafe.Pointer. Thus, we just return a different buffer.
// Should this ever alias e.buf, we need to consider how it operates with
// the specialized performance optimization for bytes.Buffer.
n := 1 << bits.Len(uint(e.s.maxValue|63)) // fast approximation for max length
if cap(e.s.availBuffer) < n {
e.s.availBuffer = make([]byte, 0, n)
}
return e.s.availBuffer
}
// StackDepth returns the depth of the state machine for written JSON data.
// Each level on the stack represents a nested JSON object or array.
// It is incremented whenever an [BeginObject] or [BeginArray] token is encountered
// and decremented whenever an [EndObject] or [EndArray] token is encountered.
// The depth is zero-indexed, where zero represents the top-level JSON value.
func (e *Encoder) StackDepth() int {
// NOTE: Keep in sync with Decoder.StackDepth.
return e.s.Tokens.Depth() - 1
}
// StackIndex returns information about the specified stack level.
// It must be a number between 0 and [Encoder.StackDepth], inclusive.
// For each level, it reports the kind:
//
// - 0 for a level of zero,
// - '{' for a level representing a JSON object, and
// - '[' for a level representing a JSON array.
//
// It also reports the length of that JSON object or array.
// Each name and value in a JSON object is counted separately,
// so the effective number of members would be half the length.
// A complete JSON object must have an even length.
func (e *Encoder) StackIndex(i int) (Kind, int64) {
// NOTE: Keep in sync with Decoder.StackIndex.
switch s := e.s.Tokens.index(i); {
case i > 0 && s.isObject():
return '{', s.Length()
case i > 0 && s.isArray():
return '[', s.Length()
default:
return 0, s.Length()
}
}
// StackPointer returns a JSON Pointer (RFC 6901) to the most recently written value.
func (e *Encoder) StackPointer() Pointer {
return Pointer(e.s.AppendStackPointer(nil, -1))
}
func (e *encoderState) AppendStackPointer(b []byte, where int) []byte {
e.Names.copyQuotedBuffer(e.Buf)
return e.state.appendStackPointer(b, where)
}

View File

@@ -1,182 +0,0 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"io"
"strconv"
"next.orly.dev/pkg/json/internal/jsonwire"
)
const errorPrefix = "jsontext: "
type ioError struct {
action string // either "read" or "write"
err error
}
func (e *ioError) Error() string {
return errorPrefix + e.action + " error: " + e.err.Error()
}
func (e *ioError) Unwrap() error {
return e.err
}
// SyntacticError is a description of a syntactic error that occurred when
// encoding or decoding JSON according to the grammar.
//
// The contents of this error as produced by this package may change over time.
type SyntacticError struct {
requireKeyedLiterals
nonComparable
// ByteOffset indicates that an error occurred after this byte offset.
ByteOffset int64
// JSONPointer indicates that an error occurred within this JSON value
// as indicated using the JSON Pointer notation (see RFC 6901).
JSONPointer Pointer
// Err is the underlying error.
Err error
}
// wrapSyntacticError wraps an error and annotates it with a precise location
// using the provided [encoderState] or [decoderState].
// If err is an [ioError] or [io.EOF], then it is not wrapped.
//
// It takes a relative offset pos that can be resolved into
// an absolute offset using state.offsetAt.
//
// It takes a where that specify how the JSON pointer is derived.
// If the underlying error is a [pointerSuffixError],
// then the suffix is appended to the derived pointer.
func wrapSyntacticError(state interface {
offsetAt(pos int) int64
AppendStackPointer(b []byte, where int) []byte
}, err error, pos, where int) error {
if _, ok := err.(*ioError); err == io.EOF || ok {
return err
}
offset := state.offsetAt(pos)
ptr := state.AppendStackPointer(nil, where)
if serr, ok := err.(*pointerSuffixError); ok {
ptr = serr.appendPointer(ptr)
err = serr.error
}
if d, ok := state.(*decoderState); ok && err == errMismatchDelim {
where := "at start of value"
if len(d.Tokens.Stack) > 0 && d.Tokens.Last.Length() > 0 {
switch {
case d.Tokens.Last.isArray():
where = "after array element (expecting ',' or ']')"
ptr = []byte(Pointer(ptr).Parent()) // problem is with parent array
case d.Tokens.Last.isObject():
where = "after object value (expecting ',' or '}')"
ptr = []byte(Pointer(ptr).Parent()) // problem is with parent object
}
}
err = jsonwire.NewInvalidCharacterError(d.buf[pos:], where)
}
return &SyntacticError{ByteOffset: offset, JSONPointer: Pointer(ptr), Err: err}
}
func (e *SyntacticError) Error() string {
pointer := e.JSONPointer
offset := e.ByteOffset
b := []byte(errorPrefix)
if e.Err != nil {
b = append(b, e.Err.Error()...)
if e.Err == ErrDuplicateName {
b = strconv.AppendQuote(append(b, ' '), pointer.LastToken())
pointer = pointer.Parent()
offset = 0 // not useful to print offset for duplicate names
}
} else {
b = append(b, "syntactic error"...)
}
if pointer != "" {
b = strconv.AppendQuote(append(b, " within "...), jsonwire.TruncatePointer(string(pointer), 100))
}
if offset > 0 {
b = strconv.AppendInt(append(b, " after offset "...), offset, 10)
}
return string(b)
}
func (e *SyntacticError) Unwrap() error {
return e.Err
}
// pointerSuffixError represents a JSON pointer suffix to be appended
// to [SyntacticError.JSONPointer]. It is an internal error type
// used within this package and does not appear in the public API.
//
// This type is primarily used to annotate errors in Encoder.WriteValue
// and Decoder.ReadValue with precise positions.
// At the time WriteValue or ReadValue is called, a JSON pointer to the
// upcoming value can be constructed using the Encoder/Decoder state.
// However, tracking pointers within values during normal operation
// would incur a performance penalty in the error-free case.
//
// To provide precise error locations without this overhead,
// the error is wrapped with object names or array indices
// as the call stack is popped when an error occurs.
// Since this happens in reverse order, pointerSuffixError holds
// the pointer in reverse and is only later reversed when appending to
// the pointer prefix.
//
// For example, if the encoder is at "/alpha/bravo/charlie"
// and an error occurs in WriteValue at "/xray/yankee/zulu", then
// the final pointer should be "/alpha/bravo/charlie/xray/yankee/zulu".
//
// As pointerSuffixError is populated during the error return path,
// it first contains "/zulu", then "/zulu/yankee",
// and finally "/zulu/yankee/xray".
// These tokens are reversed and concatenated to "/alpha/bravo/charlie"
// to form the full pointer.
type pointerSuffixError struct {
error
// reversePointer is a JSON pointer, but with each token in reverse order.
reversePointer []byte
}
// wrapWithObjectName wraps err with a JSON object name access,
// which must be a valid quoted JSON string.
func wrapWithObjectName(err error, quotedName []byte) error {
serr, _ := err.(*pointerSuffixError)
if serr == nil {
serr = &pointerSuffixError{error: err}
}
name := jsonwire.UnquoteMayCopy(quotedName, false)
serr.reversePointer = appendEscapePointerName(append(serr.reversePointer, '/'), name)
return serr
}
// wrapWithArrayIndex wraps err with a JSON array index access.
func wrapWithArrayIndex(err error, index int64) error {
serr, _ := err.(*pointerSuffixError)
if serr == nil {
serr = &pointerSuffixError{error: err}
}
serr.reversePointer = strconv.AppendUint(append(serr.reversePointer, '/'), uint64(index), 10)
return serr
}
// appendPointer appends the path encoded in e to the end of pointer.
func (e *pointerSuffixError) appendPointer(pointer []byte) []byte {
// Copy each token in reversePointer to the end of pointer in reverse order.
// Double reversal means that the appended suffix is now in forward order.
bi, bo := e.reversePointer, pointer
for len(bi) > 0 {
i := bytes.LastIndexByte(bi, '/')
bi, bo = bi[:i], append(bo, bi[i:]...)
}
return bo
}

View File

@@ -1,77 +0,0 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"io"
"next.orly.dev/pkg/json/internal"
)
// Internal is for internal use only.
// This is exempt from the Go compatibility agreement.
var Internal exporter
type exporter struct{}
// Export exposes internal functionality from "jsontext" to "json".
// This cannot be dynamically called by other packages since
// they cannot obtain a reference to the internal.AllowInternalUse value.
func (exporter) Export(p *internal.NotForPublicUse) export {
if p != &internal.AllowInternalUse {
panic("unauthorized call to Export")
}
return export{}
}
// The export type exposes functionality to packages with visibility to
// the internal.AllowInternalUse variable. The "json" package uses this
// to modify low-level state in the Encoder and Decoder types.
// It mutates the state directly instead of calling ReadToken or WriteToken
// since this is more performant. The public APIs need to track state to ensure
// that users are constructing a valid JSON value, but the "json" implementation
// guarantees that it emits valid JSON by the structure of the code itself.
type export struct{}
// Encoder returns a pointer to the underlying encoderState.
func (export) Encoder(e *Encoder) *encoderState { return &e.s }
// Decoder returns a pointer to the underlying decoderState.
func (export) Decoder(d *Decoder) *decoderState { return &d.s }
func (export) GetBufferedEncoder(o ...Options) *Encoder {
return getBufferedEncoder(o...)
}
func (export) PutBufferedEncoder(e *Encoder) {
putBufferedEncoder(e)
}
func (export) GetStreamingEncoder(w io.Writer, o ...Options) *Encoder {
return getStreamingEncoder(w, o...)
}
func (export) PutStreamingEncoder(e *Encoder) {
putStreamingEncoder(e)
}
func (export) GetBufferedDecoder(b []byte, o ...Options) *Decoder {
return getBufferedDecoder(b, o...)
}
func (export) PutBufferedDecoder(d *Decoder) {
putBufferedDecoder(d)
}
func (export) GetStreamingDecoder(r io.Reader, o ...Options) *Decoder {
return getStreamingDecoder(r, o...)
}
func (export) PutStreamingDecoder(d *Decoder) {
putStreamingDecoder(d)
}
func (export) IsIOError(err error) bool {
_, ok := err.(*ioError)
return ok
}

View File

@@ -1,304 +0,0 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"strings"
"next.orly.dev/pkg/json/internal/jsonflags"
"next.orly.dev/pkg/json/internal/jsonopts"
"next.orly.dev/pkg/json/internal/jsonwire"
)
// Options configures [NewEncoder], [Encoder.Reset], [NewDecoder],
// and [Decoder.Reset] with specific features.
// Each function takes in a variadic list of options, where properties
// set in latter options override the value of previously set properties.
//
// There is a single Options type, which is used with both encoding and decoding.
// Some options affect both operations, while others only affect one operation:
//
// - [AllowDuplicateNames] affects encoding and decoding
// - [AllowInvalidUTF8] affects encoding and decoding
// - [EscapeForHTML] affects encoding only
// - [EscapeForJS] affects encoding only
// - [PreserveRawStrings] affects encoding only
// - [CanonicalizeRawInts] affects encoding only
// - [CanonicalizeRawFloats] affects encoding only
// - [ReorderRawObjects] affects encoding only
// - [SpaceAfterColon] affects encoding only
// - [SpaceAfterComma] affects encoding only
// - [Multiline] affects encoding only
// - [WithIndent] affects encoding only
// - [WithIndentPrefix] affects encoding only
//
// Options that do not affect a particular operation are ignored.
//
// The Options type is identical to [encoding/json.Options] and
// [encoding/json/v2.Options]. Options from the other packages may
// be passed to functionality in this package, but are ignored.
// Options from this package may be used with the other packages.
type Options = jsonopts.Options
// AllowDuplicateNames specifies that JSON objects may contain
// duplicate member names. Disabling the duplicate name check may provide
// performance benefits, but breaks compliance with RFC 7493, section 2.3.
// The input or output will still be compliant with RFC 8259,
// which leaves the handling of duplicate names as unspecified behavior.
//
// This affects either encoding or decoding.
func AllowDuplicateNames(v bool) Options {
if v {
return jsonflags.AllowDuplicateNames | 1
} else {
return jsonflags.AllowDuplicateNames | 0
}
}
// AllowInvalidUTF8 specifies that JSON strings may contain invalid UTF-8,
// which will be mangled as the Unicode replacement character, U+FFFD.
// This causes the encoder or decoder to break compliance with
// RFC 7493, section 2.1, and RFC 8259, section 8.1.
//
// This affects either encoding or decoding.
func AllowInvalidUTF8(v bool) Options {
if v {
return jsonflags.AllowInvalidUTF8 | 1
} else {
return jsonflags.AllowInvalidUTF8 | 0
}
}
// EscapeForHTML specifies that '<', '>', and '&' characters within JSON strings
// should be escaped as a hexadecimal Unicode codepoint (e.g., \u003c) so that
// the output is safe to embed within HTML.
//
// This only affects encoding and is ignored when decoding.
func EscapeForHTML(v bool) Options {
if v {
return jsonflags.EscapeForHTML | 1
} else {
return jsonflags.EscapeForHTML | 0
}
}
// EscapeForJS specifies that U+2028 and U+2029 characters within JSON strings
// should be escaped as a hexadecimal Unicode codepoint (e.g., \u2028) so that
// the output is valid to embed within JavaScript. See RFC 8259, section 12.
//
// This only affects encoding and is ignored when decoding.
func EscapeForJS(v bool) Options {
if v {
return jsonflags.EscapeForJS | 1
} else {
return jsonflags.EscapeForJS | 0
}
}
// PreserveRawStrings specifies that when encoding a raw JSON string in a
// [Token] or [Value], pre-escaped sequences
// in a JSON string are preserved to the output.
// However, raw strings still respect [EscapeForHTML] and [EscapeForJS]
// such that the relevant characters are escaped.
// If [AllowInvalidUTF8] is enabled, bytes of invalid UTF-8
// are preserved to the output.
//
// This only affects encoding and is ignored when decoding.
func PreserveRawStrings(v bool) Options {
if v {
return jsonflags.PreserveRawStrings | 1
} else {
return jsonflags.PreserveRawStrings | 0
}
}
// CanonicalizeRawInts specifies that when encoding a raw JSON
// integer number (i.e., a number without a fraction and exponent) in a
// [Token] or [Value], the number is canonicalized
// according to RFC 8785, section 3.2.2.3. As a special case,
// the number -0 is canonicalized as 0.
//
// JSON numbers are treated as IEEE 754 double precision numbers.
// Any numbers with precision beyond what is representable by that form
// will lose their precision when canonicalized. For example,
// integer values beyond ±2⁵³ will lose their precision.
// For example, 1234567890123456789 is formatted as 1234567890123456800.
//
// This only affects encoding and is ignored when decoding.
func CanonicalizeRawInts(v bool) Options {
if v {
return jsonflags.CanonicalizeRawInts | 1
} else {
return jsonflags.CanonicalizeRawInts | 0
}
}
// CanonicalizeRawFloats specifies that when encoding a raw JSON
// floating-point number (i.e., a number with a fraction or exponent) in a
// [Token] or [Value], the number is canonicalized
// according to RFC 8785, section 3.2.2.3. As a special case,
// the number -0 is canonicalized as 0.
//
// JSON numbers are treated as IEEE 754 double precision numbers.
// It is safe to canonicalize a serialized single precision number and
// parse it back as a single precision number and expect the same value.
// If a number exceeds ±1.7976931348623157e+308, which is the maximum
// finite number, then it saturated at that value and formatted as such.
//
// This only affects encoding and is ignored when decoding.
func CanonicalizeRawFloats(v bool) Options {
if v {
return jsonflags.CanonicalizeRawFloats | 1
} else {
return jsonflags.CanonicalizeRawFloats | 0
}
}
// ReorderRawObjects specifies that when encoding a raw JSON object in a
// [Value], the object members are reordered according to
// RFC 8785, section 3.2.3.
//
// This only affects encoding and is ignored when decoding.
func ReorderRawObjects(v bool) Options {
if v {
return jsonflags.ReorderRawObjects | 1
} else {
return jsonflags.ReorderRawObjects | 0
}
}
// SpaceAfterColon specifies that the JSON output should emit a space character
// after each colon separator following a JSON object name.
// If false, then no space character appears after the colon separator.
//
// This only affects encoding and is ignored when decoding.
func SpaceAfterColon(v bool) Options {
if v {
return jsonflags.SpaceAfterColon | 1
} else {
return jsonflags.SpaceAfterColon | 0
}
}
// SpaceAfterComma specifies that the JSON output should emit a space character
// after each comma separator following a JSON object value or array element.
// If false, then no space character appears after the comma separator.
//
// This only affects encoding and is ignored when decoding.
func SpaceAfterComma(v bool) Options {
if v {
return jsonflags.SpaceAfterComma | 1
} else {
return jsonflags.SpaceAfterComma | 0
}
}
// Multiline specifies that the JSON output should expand to multiple lines,
// where every JSON object member or JSON array element appears on
// a new, indented line according to the nesting depth.
//
// If [SpaceAfterColon] is not specified, then the default is true.
// If [SpaceAfterComma] is not specified, then the default is false.
// If [WithIndent] is not specified, then the default is "\t".
//
// If set to false, then the output is a single-line,
// where the only whitespace emitted is determined by the current
// values of [SpaceAfterColon] and [SpaceAfterComma].
//
// This only affects encoding and is ignored when decoding.
func Multiline(v bool) Options {
if v {
return jsonflags.Multiline | 1
} else {
return jsonflags.Multiline | 0
}
}
// WithIndent specifies that the encoder should emit multiline output
// where each element in a JSON object or array begins on a new, indented line
// beginning with the indent prefix (see [WithIndentPrefix])
// followed by one or more copies of indent according to the nesting depth.
// The indent must only be composed of space or tab characters.
//
// If the intent to emit indented output without a preference for
// the particular indent string, then use [Multiline] instead.
//
// This only affects encoding and is ignored when decoding.
// Use of this option implies [Multiline] being set to true.
func WithIndent(indent string) Options {
// Fast-path: Return a constant for common indents, which avoids allocating.
// These are derived from analyzing the Go module proxy on 2023-07-01.
switch indent {
case "\t":
return jsonopts.Indent("\t") // ~14k usages
case " ":
return jsonopts.Indent(" ") // ~18k usages
case " ":
return jsonopts.Indent(" ") // ~1.7k usages
case " ":
return jsonopts.Indent(" ") // ~52k usages
case " ":
return jsonopts.Indent(" ") // ~12k usages
case "":
return jsonopts.Indent("") // ~1.5k usages
}
// Otherwise, allocate for this unique value.
if s := strings.Trim(indent, " \t"); len(s) > 0 {
panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent")
}
return jsonopts.Indent(indent)
}
// WithIndentPrefix specifies that the encoder should emit multiline output
// where each element in a JSON object or array begins on a new, indented line
// beginning with the indent prefix followed by one or more copies of indent
// (see [WithIndent]) according to the nesting depth.
// The prefix must only be composed of space or tab characters.
//
// This only affects encoding and is ignored when decoding.
// Use of this option implies [Multiline] being set to true.
func WithIndentPrefix(prefix string) Options {
if s := strings.Trim(prefix, " \t"); len(s) > 0 {
panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent prefix")
}
return jsonopts.IndentPrefix(prefix)
}
/*
// TODO(https://go.dev/issue/56733): Implement WithByteLimit and WithDepthLimit.
// Remember to also update the "Security Considerations" section.
// WithByteLimit sets a limit on the number of bytes of input or output bytes
// that may be consumed or produced for each top-level JSON value.
// If a [Decoder] or [Encoder] method call would need to consume/produce
// more than a total of n bytes to make progress on the top-level JSON value,
// then the call will report an error.
// Whitespace before and within the top-level value are counted against the limit.
// Whitespace after a top-level value are counted against the limit
// for the next top-level value.
//
// A non-positive limit is equivalent to no limit at all.
// If unspecified, the default limit is no limit at all.
// This affects either encoding or decoding.
func WithByteLimit(n int64) Options {
return jsonopts.ByteLimit(max(n, 0))
}
// WithDepthLimit sets a limit on the maximum depth of JSON nesting
// that may be consumed or produced for each top-level JSON value.
// If a [Decoder] or [Encoder] method call would need to consume or produce
// a depth greater than n to make progress on the top-level JSON value,
// then the call will report an error.
//
// A non-positive limit is equivalent to no limit at all.
// If unspecified, the default limit is 10000.
// This affects either encoding or decoding.
func WithDepthLimit(n int) Options {
return jsonopts.DepthLimit(max(n, 0))
}
*/

View File

@@ -1,152 +0,0 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"io"
"math/bits"
"sync"
)
// TODO(https://go.dev/issue/47657): Use sync.PoolOf.
var (
// This owns the internal buffer since there is no io.Writer to output to.
// Since the buffer can get arbitrarily large in normal usage,
// there is statistical tracking logic to determine whether to recycle
// the internal buffer or not based on a history of utilization.
bufferedEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
// This owns the internal buffer, but it is only used to temporarily store
// buffered JSON before flushing it to the underlying io.Writer.
// In a sufficiently efficient streaming mode, we do not expect the buffer
// to grow arbitrarily large. Thus, we avoid recycling large buffers.
streamingEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
// This does not own the internal buffer since
// it is taken directly from the provided bytes.Buffer.
bytesBufferEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
)
// bufferStatistics is statistics to track buffer utilization.
// It is used to determine whether to recycle a buffer or not
// to avoid https://go.dev/issue/23199.
type bufferStatistics struct {
strikes int // number of times the buffer was under-utilized
prevLen int // length of previous buffer
}
func getBufferedEncoder(opts ...Options) *Encoder {
e := bufferedEncoderPool.Get().(*Encoder)
if e.s.Buf == nil {
// Round up to nearest 2ⁿ to make best use of malloc size classes.
// See runtime/sizeclasses.go on Go1.15.
// Logical OR with 63 to ensure 64 as the minimum buffer size.
n := 1 << bits.Len(uint(e.s.bufStats.prevLen|63))
e.s.Buf = make([]byte, 0, n)
}
e.s.reset(e.s.Buf[:0], nil, opts...)
return e
}
func putBufferedEncoder(e *Encoder) {
// Recycle large buffers only if sufficiently utilized.
// If a buffer is under-utilized enough times sequentially,
// then it is discarded, ensuring that a single large buffer
// won't be kept alive by a continuous stream of small usages.
//
// The worst case utilization is computed as:
// MIN_UTILIZATION_THRESHOLD / (1 + MAX_NUM_STRIKES)
//
// For the constants chosen below, this is (25%)/(1+4) ⇒ 5%.
// This may seem low, but it ensures a lower bound on
// the absolute worst-case utilization. Without this check,
// this would be theoretically 0%, which is infinitely worse.
//
// See https://go.dev/issue/27735.
switch {
case cap(e.s.Buf) <= 4<<10: // always recycle buffers smaller than 4KiB
e.s.bufStats.strikes = 0
case cap(e.s.Buf)/4 <= len(e.s.Buf): // at least 25% utilization
e.s.bufStats.strikes = 0
case e.s.bufStats.strikes < 4: // at most 4 strikes
e.s.bufStats.strikes++
default: // discard the buffer; too large and too often under-utilized
e.s.bufStats.strikes = 0
e.s.bufStats.prevLen = len(e.s.Buf) // heuristic for size to allocate next time
e.s.Buf = nil
}
bufferedEncoderPool.Put(e)
}
func getStreamingEncoder(w io.Writer, opts ...Options) *Encoder {
if _, ok := w.(*bytes.Buffer); ok {
e := bytesBufferEncoderPool.Get().(*Encoder)
e.s.reset(nil, w, opts...) // buffer taken from bytes.Buffer
return e
} else {
e := streamingEncoderPool.Get().(*Encoder)
e.s.reset(e.s.Buf[:0], w, opts...) // preserve existing buffer
return e
}
}
func putStreamingEncoder(e *Encoder) {
if _, ok := e.s.wr.(*bytes.Buffer); ok {
bytesBufferEncoderPool.Put(e)
} else {
if cap(e.s.Buf) > 64<<10 {
e.s.Buf = nil // avoid pinning arbitrarily large amounts of memory
}
streamingEncoderPool.Put(e)
}
}
var (
// This does not own the internal buffer since it is externally provided.
bufferedDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }}
// This owns the internal buffer, but it is only used to temporarily store
// buffered JSON fetched from the underlying io.Reader.
// In a sufficiently efficient streaming mode, we do not expect the buffer
// to grow arbitrarily large. Thus, we avoid recycling large buffers.
streamingDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }}
// This does not own the internal buffer since
// it is taken directly from the provided bytes.Buffer.
bytesBufferDecoderPool = bufferedDecoderPool
)
func getBufferedDecoder(b []byte, opts ...Options) *Decoder {
d := bufferedDecoderPool.Get().(*Decoder)
d.s.reset(b, nil, opts...)
return d
}
func putBufferedDecoder(d *Decoder) {
bufferedDecoderPool.Put(d)
}
func getStreamingDecoder(r io.Reader, opts ...Options) *Decoder {
if _, ok := r.(*bytes.Buffer); ok {
d := bytesBufferDecoderPool.Get().(*Decoder)
d.s.reset(nil, r, opts...) // buffer taken from bytes.Buffer
return d
} else {
d := streamingDecoderPool.Get().(*Decoder)
d.s.reset(d.s.buf[:0], r, opts...) // preserve existing buffer
return d
}
}
func putStreamingDecoder(d *Decoder) {
if _, ok := d.s.rd.(*bytes.Buffer); ok {
bytesBufferDecoderPool.Put(d)
} else {
if cap(d.s.buf) > 64<<10 {
d.s.buf = nil // avoid pinning arbitrarily large amounts of memory
}
streamingDecoderPool.Put(d)
}
}

View File

@@ -1,41 +0,0 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"next.orly.dev/pkg/json/internal/jsonflags"
"next.orly.dev/pkg/json/internal/jsonwire"
)
// AppendQuote appends a double-quoted JSON string literal representing src
// to dst and returns the extended buffer.
// It uses the minimal string representation per RFC 8785, section 3.2.2.2.
// Invalid UTF-8 bytes are replaced with the Unicode replacement character
// and an error is returned at the end indicating the presence of invalid UTF-8.
// The dst must not overlap with the src.
func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) {
dst, err := jsonwire.AppendQuote(dst, src, &jsonflags.Flags{})
if err != nil {
err = &SyntacticError{Err: err}
}
return dst, err
}
// AppendUnquote appends the decoded interpretation of src as a
// double-quoted JSON string literal to dst and returns the extended buffer.
// The input src must be a JSON string without any surrounding whitespace.
// Invalid UTF-8 bytes are replaced with the Unicode replacement character
// and an error is returned at the end indicating the presence of invalid UTF-8.
// Any trailing bytes after the JSON string literal results in an error.
// The dst must not overlap with the src.
func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) {
dst, err := jsonwire.AppendUnquote(dst, src)
if err != nil {
err = &SyntacticError{Err: err}
}
return dst, err
}

View File

@@ -1,828 +0,0 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"errors"
"iter"
"math"
"strconv"
"strings"
"unicode/utf8"
"next.orly.dev/pkg/json/internal/jsonwire"
)
// ErrDuplicateName indicates that a JSON token could not be
// encoded or decoded because it results in a duplicate JSON object name.
// This error is directly wrapped within a [SyntacticError] when produced.
//
// The name of a duplicate JSON object member can be extracted as:
//
// err := ...
// var serr jsontext.SyntacticError
// if errors.As(err, &serr) && serr.Err == jsontext.ErrDuplicateName {
// ptr := serr.JSONPointer // JSON pointer to duplicate name
// name := ptr.LastToken() // duplicate name itself
// ...
// }
//
// This error is only returned if [AllowDuplicateNames] is false.
var ErrDuplicateName = errors.New("duplicate object member name")
// ErrNonStringName indicates that a JSON token could not be
// encoded or decoded because it is not a string,
// as required for JSON object names according to RFC 8259, section 4.
// This error is directly wrapped within a [SyntacticError] when produced.
var ErrNonStringName = errors.New("object member name must be a string")
var (
errMissingValue = errors.New("missing value after object name")
errMismatchDelim = errors.New("mismatching structural token for object or array")
errMaxDepth = errors.New("exceeded max depth")
errInvalidNamespace = errors.New("object namespace is in an invalid state")
)
// Per RFC 8259, section 9, implementations may enforce a maximum depth.
// Such a limit is necessary to prevent stack overflows.
const maxNestingDepth = 10000
type state struct {
// Tokens validates whether the next token kind is valid.
Tokens stateMachine
// Names is a stack of object names.
Names objectNameStack
// Namespaces is a stack of object namespaces.
// For performance reasons, Encoder or Decoder may not update this
// if Marshal or Unmarshal is able to track names in a more efficient way.
// See makeMapArshaler and makeStructArshaler.
// Not used if AllowDuplicateNames is true.
Namespaces objectNamespaceStack
}
// needObjectValue reports whether the next token should be an object value.
// This method is used by [wrapSyntacticError].
func (s *state) needObjectValue() bool {
return s.Tokens.Last.needObjectValue()
}
func (s *state) reset() {
s.Tokens.reset()
s.Names.reset()
s.Namespaces.reset()
}
// Pointer is a JSON Pointer (RFC 6901) that references a particular JSON value
// relative to the root of the top-level JSON value.
//
// A Pointer is a slash-separated list of tokens, where each token is
// either a JSON object name or an index to a JSON array element
// encoded as a base-10 integer value.
// It is impossible to distinguish between an array index and an object name
// (that happens to be an base-10 encoded integer) without also knowing
// the structure of the top-level JSON value that the pointer refers to.
//
// There is exactly one representation of a pointer to a particular value,
// so comparability of Pointer values is equivalent to checking whether
// they both point to the exact same value.
type Pointer string
// IsValid reports whether p is a valid JSON Pointer according to RFC 6901.
// Note that the concatenation of two valid pointers produces a valid pointer.
func (p Pointer) IsValid() bool {
for i, r := range p {
switch {
case r == '~' && (i+1 == len(p) || (p[i+1] != '0' && p[i+1] != '1')):
return false // invalid escape
case r == '\ufffd' && !strings.HasPrefix(string(p[i:]), "\ufffd"):
return false // invalid UTF-8
}
}
return len(p) == 0 || p[0] == '/'
}
// Contains reports whether the JSON value that p points to
// is equal to or contains the JSON value that pc points to.
func (p Pointer) Contains(pc Pointer) bool {
// Invariant: len(p) <= len(pc) if p.Contains(pc)
suffix, ok := strings.CutPrefix(string(pc), string(p))
return ok && (suffix == "" || suffix[0] == '/')
}
// Parent strips off the last token and returns the remaining pointer.
// The parent of an empty p is an empty string.
func (p Pointer) Parent() Pointer {
return p[:max(strings.LastIndexByte(string(p), '/'), 0)]
}
// LastToken returns the last token in the pointer.
// The last token of an empty p is an empty string.
func (p Pointer) LastToken() string {
last := p[max(strings.LastIndexByte(string(p), '/'), 0):]
return unescapePointerToken(strings.TrimPrefix(string(last), "/"))
}
// AppendToken appends a token to the end of p and returns the full pointer.
func (p Pointer) AppendToken(tok string) Pointer {
return Pointer(appendEscapePointerName([]byte(p+"/"), tok))
}
// TODO: Add Pointer.AppendTokens,
// but should this take in a ...string or an iter.Seq[string]?
// Tokens returns an iterator over the reference tokens in the JSON pointer,
// starting from the first token until the last token (unless stopped early).
func (p Pointer) Tokens() iter.Seq[string] {
return func(yield func(string) bool) {
for len(p) > 0 {
p = Pointer(strings.TrimPrefix(string(p), "/"))
i := min(uint(strings.IndexByte(string(p), '/')), uint(len(p)))
if !yield(unescapePointerToken(string(p)[:i])) {
return
}
p = p[i:]
}
}
}
func unescapePointerToken(token string) string {
if strings.Contains(token, "~") {
// Per RFC 6901, section 3, unescape '~' and '/' characters.
token = strings.ReplaceAll(token, "~1", "/")
token = strings.ReplaceAll(token, "~0", "~")
}
return token
}
// appendStackPointer appends a JSON Pointer (RFC 6901) to the current value.
//
// - If where is -1, then it points to the previously processed token.
//
// - If where is 0, then it points to the parent JSON object or array,
// or an object member if in-between an object member key and value.
// This is useful when the position is ambiguous whether
// we are interested in the previous or next token, or
// when we are uncertain whether the next token
// continues or terminates the current object or array.
//
// - If where is +1, then it points to the next expected value,
// assuming that it continues the current JSON object or array.
// As a special case, if the next token is a JSON object name,
// then it points to the parent JSON object.
//
// Invariant: Must call s.names.copyQuotedBuffer beforehand.
func (s state) appendStackPointer(b []byte, where int) []byte {
var objectDepth int
for i := 1; i < s.Tokens.Depth(); i++ {
e := s.Tokens.index(i)
arrayDelta := -1 // by default point to previous array element
if isLast := i == s.Tokens.Depth()-1; isLast {
switch {
case where < 0 && e.Length() == 0 || where == 0 && !e.needObjectValue() || where > 0 && e.NeedObjectName():
return b
case where > 0 && e.isArray():
arrayDelta = 0 // point to next array element
}
}
switch {
case e.isObject():
b = appendEscapePointerName(append(b, '/'), s.Names.getUnquoted(objectDepth))
objectDepth++
case e.isArray():
b = strconv.AppendUint(append(b, '/'), uint64(e.Length()+int64(arrayDelta)), 10)
}
}
return b
}
func appendEscapePointerName[Bytes ~[]byte | ~string](b []byte, name Bytes) []byte {
for _, r := range string(name) {
// Per RFC 6901, section 3, escape '~' and '/' characters.
switch r {
case '~':
b = append(b, "~0"...)
case '/':
b = append(b, "~1"...)
default:
b = utf8.AppendRune(b, r)
}
}
return b
}
// stateMachine is a push-down automaton that validates whether
// a sequence of tokens is valid or not according to the JSON grammar.
// It is useful for both encoding and decoding.
//
// It is a stack where each entry represents a nested JSON object or array.
// The stack has a minimum depth of 1 where the first level is a
// virtual JSON array to handle a stream of top-level JSON values.
// The top-level virtual JSON array is special in that it doesn't require commas
// between each JSON value.
//
// For performance, most methods are carefully written to be inlinable.
// The zero value is a valid state machine ready for use.
type stateMachine struct {
Stack []stateEntry
Last stateEntry
}
// reset resets the state machine.
// The machine always starts with a minimum depth of 1.
func (m *stateMachine) reset() {
m.Stack = m.Stack[:0]
if cap(m.Stack) > 1<<10 {
m.Stack = nil
}
m.Last = stateTypeArray
}
// Depth is the current nested depth of JSON objects and arrays.
// It is one-indexed (i.e., top-level values have a depth of 1).
func (m stateMachine) Depth() int {
return len(m.Stack) + 1
}
// index returns a reference to the ith entry.
// It is only valid until the next push method call.
func (m *stateMachine) index(i int) *stateEntry {
if i == len(m.Stack) {
return &m.Last
}
return &m.Stack[i]
}
// DepthLength reports the current nested depth and
// the length of the last JSON object or array.
func (m stateMachine) DepthLength() (int, int64) {
return m.Depth(), m.Last.Length()
}
// appendLiteral appends a JSON literal as the next token in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) appendLiteral() error {
switch {
case m.Last.NeedObjectName():
return ErrNonStringName
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last.Increment()
return nil
}
}
// appendString appends a JSON string as the next token in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) appendString() error {
switch {
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last.Increment()
return nil
}
}
// appendNumber appends a JSON number as the next token in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) appendNumber() error {
return m.appendLiteral()
}
// pushObject appends a JSON begin object token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) pushObject() error {
switch {
case m.Last.NeedObjectName():
return ErrNonStringName
case !m.Last.isValidNamespace():
return errInvalidNamespace
case len(m.Stack) == maxNestingDepth:
return errMaxDepth
default:
m.Last.Increment()
m.Stack = append(m.Stack, m.Last)
m.Last = stateTypeObject
return nil
}
}
// popObject appends a JSON end object token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) popObject() error {
switch {
case !m.Last.isObject():
return errMismatchDelim
case m.Last.needObjectValue():
return errMissingValue
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last = m.Stack[len(m.Stack)-1]
m.Stack = m.Stack[:len(m.Stack)-1]
return nil
}
}
// pushArray appends a JSON begin array token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) pushArray() error {
switch {
case m.Last.NeedObjectName():
return ErrNonStringName
case !m.Last.isValidNamespace():
return errInvalidNamespace
case len(m.Stack) == maxNestingDepth:
return errMaxDepth
default:
m.Last.Increment()
m.Stack = append(m.Stack, m.Last)
m.Last = stateTypeArray
return nil
}
}
// popArray appends a JSON end array token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) popArray() error {
switch {
case !m.Last.isArray() || len(m.Stack) == 0: // forbid popping top-level virtual JSON array
return errMismatchDelim
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last = m.Stack[len(m.Stack)-1]
m.Stack = m.Stack[:len(m.Stack)-1]
return nil
}
}
// NeedIndent reports whether indent whitespace should be injected.
// A zero value means that no whitespace should be injected.
// A positive value means '\n', indentPrefix, and (n-1) copies of indentBody
// should be appended to the output immediately before the next token.
func (m stateMachine) NeedIndent(next Kind) (n int) {
willEnd := next == '}' || next == ']'
switch {
case m.Depth() == 1:
return 0 // top-level values are never indented
case m.Last.Length() == 0 && willEnd:
return 0 // an empty object or array is never indented
case m.Last.Length() == 0 || m.Last.needImplicitComma(next):
return m.Depth()
case willEnd:
return m.Depth() - 1
default:
return 0
}
}
// MayAppendDelim appends a colon or comma that may precede the next token.
func (m stateMachine) MayAppendDelim(b []byte, next Kind) []byte {
switch {
case m.Last.needImplicitColon():
return append(b, ':')
case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
return append(b, ',')
default:
return b
}
}
// needDelim reports whether a colon or comma token should be implicitly emitted
// before the next token of the specified kind.
// A zero value means no delimiter should be emitted.
func (m stateMachine) needDelim(next Kind) (delim byte) {
switch {
case m.Last.needImplicitColon():
return ':'
case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
return ','
default:
return 0
}
}
// InvalidateDisabledNamespaces marks all disabled namespaces as invalid.
//
// For efficiency, Marshal and Unmarshal may disable namespaces since there are
// more efficient ways to track duplicate names. However, if an error occurs,
// the namespaces in Encoder or Decoder will be left in an inconsistent state.
// Mark the namespaces as invalid so that future method calls on
// Encoder or Decoder will return an error.
func (m *stateMachine) InvalidateDisabledNamespaces() {
for i := range m.Depth() {
e := m.index(i)
if !e.isActiveNamespace() {
e.invalidateNamespace()
}
}
}
// stateEntry encodes several artifacts within a single unsigned integer:
// - whether this represents a JSON object or array,
// - whether this object should check for duplicate names, and
// - how many elements are in this JSON object or array.
type stateEntry uint64
const (
// The type mask (1 bit) records whether this is a JSON object or array.
stateTypeMask stateEntry = 0x8000_0000_0000_0000
stateTypeObject stateEntry = 0x8000_0000_0000_0000
stateTypeArray stateEntry = 0x0000_0000_0000_0000
// The name check mask (2 bit) records whether to update
// the namespaces for the current JSON object and
// whether the namespace is valid.
stateNamespaceMask stateEntry = 0x6000_0000_0000_0000
stateDisableNamespace stateEntry = 0x4000_0000_0000_0000
stateInvalidNamespace stateEntry = 0x2000_0000_0000_0000
// The count mask (61 bits) records the number of elements.
stateCountMask stateEntry = 0x1fff_ffff_ffff_ffff
stateCountLSBMask stateEntry = 0x0000_0000_0000_0001
stateCountOdd stateEntry = 0x0000_0000_0000_0001
stateCountEven stateEntry = 0x0000_0000_0000_0000
)
// Length reports the number of elements in the JSON object or array.
// Each name and value in an object entry is treated as a separate element.
func (e stateEntry) Length() int64 {
return int64(e & stateCountMask)
}
// isObject reports whether this is a JSON object.
func (e stateEntry) isObject() bool {
return e&stateTypeMask == stateTypeObject
}
// isArray reports whether this is a JSON array.
func (e stateEntry) isArray() bool {
return e&stateTypeMask == stateTypeArray
}
// NeedObjectName reports whether the next token must be a JSON string,
// which is necessary for JSON object names.
func (e stateEntry) NeedObjectName() bool {
return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven
}
// needImplicitColon reports whether an colon should occur next,
// which always occurs after JSON object names.
func (e stateEntry) needImplicitColon() bool {
return e.needObjectValue()
}
// needObjectValue reports whether the next token must be a JSON value,
// which is necessary after every JSON object name.
func (e stateEntry) needObjectValue() bool {
return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountOdd
}
// needImplicitComma reports whether an comma should occur next,
// which always occurs after a value in a JSON object or array
// before the next value (or name).
func (e stateEntry) needImplicitComma(next Kind) bool {
return !e.needObjectValue() && e.Length() > 0 && next != '}' && next != ']'
}
// Increment increments the number of elements for the current object or array.
// This assumes that overflow won't practically be an issue since
// 1<<bits.OnesCount(stateCountMask) is sufficiently large.
func (e *stateEntry) Increment() {
(*e)++
}
// decrement decrements the number of elements for the current object or array.
// It is the callers responsibility to ensure that e.length > 0.
func (e *stateEntry) decrement() {
(*e)--
}
// DisableNamespace disables the JSON object namespace such that the
// Encoder or Decoder no longer updates the namespace.
func (e *stateEntry) DisableNamespace() {
*e |= stateDisableNamespace
}
// isActiveNamespace reports whether the JSON object namespace is actively
// being updated and used for duplicate name checks.
func (e stateEntry) isActiveNamespace() bool {
return e&(stateDisableNamespace) == 0
}
// invalidateNamespace marks the JSON object namespace as being invalid.
func (e *stateEntry) invalidateNamespace() {
*e |= stateInvalidNamespace
}
// isValidNamespace reports whether the JSON object namespace is valid.
func (e stateEntry) isValidNamespace() bool {
return e&(stateInvalidNamespace) == 0
}
// objectNameStack is a stack of names when descending into a JSON object.
// In contrast to objectNamespaceStack, this only has to remember a single name
// per JSON object.
//
// This data structure may contain offsets to encodeBuffer or decodeBuffer.
// It violates clean abstraction of layers, but is significantly more efficient.
// This ensures that popping and pushing in the common case is a trivial
// push/pop of an offset integer.
//
// The zero value is an empty names stack ready for use.
type objectNameStack struct {
// offsets is a stack of offsets for each name.
// A non-negative offset is the ending offset into the local names buffer.
// A negative offset is the bit-wise inverse of a starting offset into
// a remote buffer (e.g., encodeBuffer or decodeBuffer).
// A math.MinInt offset at the end implies that the last object is empty.
// Invariant: Positive offsets always occur before negative offsets.
offsets []int
// unquotedNames is a back-to-back concatenation of names.
unquotedNames []byte
}
func (ns *objectNameStack) reset() {
ns.offsets = ns.offsets[:0]
ns.unquotedNames = ns.unquotedNames[:0]
if cap(ns.offsets) > 1<<6 {
ns.offsets = nil // avoid pinning arbitrarily large amounts of memory
}
if cap(ns.unquotedNames) > 1<<10 {
ns.unquotedNames = nil // avoid pinning arbitrarily large amounts of memory
}
}
func (ns *objectNameStack) length() int {
return len(ns.offsets)
}
// getUnquoted retrieves the ith unquoted name in the stack.
// It returns an empty string if the last object is empty.
//
// Invariant: Must call copyQuotedBuffer beforehand.
func (ns *objectNameStack) getUnquoted(i int) []byte {
ns.ensureCopiedBuffer()
if i == 0 {
return ns.unquotedNames[:ns.offsets[0]]
} else {
return ns.unquotedNames[ns.offsets[i-1]:ns.offsets[i-0]]
}
}
// invalidOffset indicates that the last JSON object currently has no name.
const invalidOffset = math.MinInt
// push descends into a nested JSON object.
func (ns *objectNameStack) push() {
ns.offsets = append(ns.offsets, invalidOffset)
}
// ReplaceLastQuotedOffset replaces the last name with the starting offset
// to the quoted name in some remote buffer. All offsets provided must be
// relative to the same buffer until copyQuotedBuffer is called.
func (ns *objectNameStack) ReplaceLastQuotedOffset(i int) {
// Use bit-wise inversion instead of naive multiplication by -1 to avoid
// ambiguity regarding zero (which is a valid offset into the names field).
// Bit-wise inversion is mathematically equivalent to -i-1,
// such that 0 becomes -1, 1 becomes -2, and so forth.
// This ensures that remote offsets are always negative.
ns.offsets[len(ns.offsets)-1] = ^i
}
// replaceLastUnquotedName replaces the last name with the provided name.
//
// Invariant: Must call copyQuotedBuffer beforehand.
func (ns *objectNameStack) replaceLastUnquotedName(s string) {
ns.ensureCopiedBuffer()
var startOffset int
if len(ns.offsets) > 1 {
startOffset = ns.offsets[len(ns.offsets)-2]
}
ns.unquotedNames = append(ns.unquotedNames[:startOffset], s...)
ns.offsets[len(ns.offsets)-1] = len(ns.unquotedNames)
}
// clearLast removes any name in the last JSON object.
// It is semantically equivalent to ns.push followed by ns.pop.
func (ns *objectNameStack) clearLast() {
ns.offsets[len(ns.offsets)-1] = invalidOffset
}
// pop ascends out of a nested JSON object.
func (ns *objectNameStack) pop() {
ns.offsets = ns.offsets[:len(ns.offsets)-1]
}
// copyQuotedBuffer copies names from the remote buffer into the local names
// buffer so that there are no more offset references into the remote buffer.
// This allows the remote buffer to change contents without affecting
// the names that this data structure is trying to remember.
func (ns *objectNameStack) copyQuotedBuffer(b []byte) {
// Find the first negative offset.
var i int
for i = len(ns.offsets) - 1; i >= 0 && ns.offsets[i] < 0; i-- {
continue
}
// Copy each name from the remote buffer into the local buffer.
for i = i + 1; i < len(ns.offsets); i++ {
if i == len(ns.offsets)-1 && ns.offsets[i] == invalidOffset {
if i == 0 {
ns.offsets[i] = 0
} else {
ns.offsets[i] = ns.offsets[i-1]
}
break // last JSON object had a push without any names
}
// As a form of Hyrum proofing, we write an invalid character into the
// buffer to make misuse of Decoder.ReadToken more obvious.
// We need to undo that mutation here.
quotedName := b[^ns.offsets[i]:]
if quotedName[0] == invalidateBufferByte {
quotedName[0] = '"'
}
// Append the unquoted name to the local buffer.
var startOffset int
if i > 0 {
startOffset = ns.offsets[i-1]
}
if n := jsonwire.ConsumeSimpleString(quotedName); n > 0 {
ns.unquotedNames = append(ns.unquotedNames[:startOffset], quotedName[len(`"`):n-len(`"`)]...)
} else {
ns.unquotedNames, _ = jsonwire.AppendUnquote(ns.unquotedNames[:startOffset], quotedName)
}
ns.offsets[i] = len(ns.unquotedNames)
}
}
func (ns *objectNameStack) ensureCopiedBuffer() {
if len(ns.offsets) > 0 && ns.offsets[len(ns.offsets)-1] < 0 {
panic("BUG: copyQuotedBuffer not called beforehand")
}
}
// objectNamespaceStack is a stack of object namespaces.
// This data structure assists in detecting duplicate names.
type objectNamespaceStack []objectNamespace
// reset resets the object namespace stack.
func (nss *objectNamespaceStack) reset() {
if cap(*nss) > 1<<10 {
*nss = nil
}
*nss = (*nss)[:0]
}
// push starts a new namespace for a nested JSON object.
func (nss *objectNamespaceStack) push() {
if cap(*nss) > len(*nss) {
*nss = (*nss)[:len(*nss)+1]
nss.Last().reset()
} else {
*nss = append(*nss, objectNamespace{})
}
}
// Last returns a pointer to the last JSON object namespace.
func (nss objectNamespaceStack) Last() *objectNamespace {
return &nss[len(nss)-1]
}
// pop terminates the namespace for a nested JSON object.
func (nss *objectNamespaceStack) pop() {
*nss = (*nss)[:len(*nss)-1]
}
// objectNamespace is the namespace for a JSON object.
// In contrast to objectNameStack, this needs to remember a all names
// per JSON object.
//
// The zero value is an empty namespace ready for use.
type objectNamespace struct {
// It relies on a linear search over all the names before switching
// to use a Go map for direct lookup.
// endOffsets is a list of offsets to the end of each name in buffers.
// The length of offsets is the number of names in the namespace.
endOffsets []uint
// allUnquotedNames is a back-to-back concatenation of every name in the namespace.
allUnquotedNames []byte
// mapNames is a Go map containing every name in the namespace.
// Only valid if non-nil.
mapNames map[string]struct{}
}
// reset resets the namespace to be empty.
func (ns *objectNamespace) reset() {
ns.endOffsets = ns.endOffsets[:0]
ns.allUnquotedNames = ns.allUnquotedNames[:0]
ns.mapNames = nil
if cap(ns.endOffsets) > 1<<6 {
ns.endOffsets = nil // avoid pinning arbitrarily large amounts of memory
}
if cap(ns.allUnquotedNames) > 1<<10 {
ns.allUnquotedNames = nil // avoid pinning arbitrarily large amounts of memory
}
}
// length reports the number of names in the namespace.
func (ns *objectNamespace) length() int {
return len(ns.endOffsets)
}
// getUnquoted retrieves the ith unquoted name in the namespace.
func (ns *objectNamespace) getUnquoted(i int) []byte {
if i == 0 {
return ns.allUnquotedNames[:ns.endOffsets[0]]
} else {
return ns.allUnquotedNames[ns.endOffsets[i-1]:ns.endOffsets[i-0]]
}
}
// lastUnquoted retrieves the last name in the namespace.
func (ns *objectNamespace) lastUnquoted() []byte {
return ns.getUnquoted(ns.length() - 1)
}
// insertQuoted inserts a name and reports whether it was inserted,
// which only occurs if name is not already in the namespace.
// The provided name must be a valid JSON string.
func (ns *objectNamespace) insertQuoted(name []byte, isVerbatim bool) bool {
if isVerbatim {
name = name[len(`"`) : len(name)-len(`"`)]
}
return ns.insert(name, !isVerbatim)
}
func (ns *objectNamespace) InsertUnquoted(name []byte) bool {
return ns.insert(name, false)
}
func (ns *objectNamespace) insert(name []byte, quoted bool) bool {
var allNames []byte
if quoted {
allNames, _ = jsonwire.AppendUnquote(ns.allUnquotedNames, name)
} else {
allNames = append(ns.allUnquotedNames, name...)
}
name = allNames[len(ns.allUnquotedNames):]
// Switch to a map if the buffer is too large for linear search.
// This does not add the current name to the map.
if ns.mapNames == nil && (ns.length() > 64 || len(ns.allUnquotedNames) > 1024) {
ns.mapNames = make(map[string]struct{})
var startOffset uint
for _, endOffset := range ns.endOffsets {
name := ns.allUnquotedNames[startOffset:endOffset]
ns.mapNames[string(name)] = struct{}{} // allocates a new string
startOffset = endOffset
}
}
if ns.mapNames == nil {
// Perform linear search over the buffer to find matching names.
// It provides O(n) lookup, but does not require any allocations.
var startOffset uint
for _, endOffset := range ns.endOffsets {
if string(ns.allUnquotedNames[startOffset:endOffset]) == string(name) {
return false
}
startOffset = endOffset
}
} else {
// Use the map if it is populated.
// It provides O(1) lookup, but requires a string allocation per name.
if _, ok := ns.mapNames[string(name)]; ok {
return false
}
ns.mapNames[string(name)] = struct{}{} // allocates a new string
}
ns.allUnquotedNames = allNames
ns.endOffsets = append(ns.endOffsets, uint(len(ns.allUnquotedNames)))
return true
}
// removeLast removes the last name in the namespace.
func (ns *objectNamespace) removeLast() {
if ns.mapNames != nil {
delete(ns.mapNames, string(ns.lastUnquoted()))
}
if ns.length()-1 == 0 {
ns.endOffsets = ns.endOffsets[:0]
ns.allUnquotedNames = ns.allUnquotedNames[:0]
} else {
ns.endOffsets = ns.endOffsets[:ns.length()-1]
ns.allUnquotedNames = ns.allUnquotedNames[:ns.endOffsets[ns.length()-1]]
}
}

View File

@@ -1,527 +0,0 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"errors"
"math"
"strconv"
"next.orly.dev/pkg/json/internal/jsonflags"
"next.orly.dev/pkg/json/internal/jsonwire"
)
// NOTE: Token is analogous to v1 json.Token.
const (
maxInt64 = math.MaxInt64
minInt64 = math.MinInt64
maxUint64 = math.MaxUint64
minUint64 = 0 // for consistency and readability purposes
invalidTokenPanic = "invalid jsontext.Token; it has been voided by a subsequent json.Decoder call"
)
var errInvalidToken = errors.New("invalid jsontext.Token")
// Token represents a lexical JSON token, which may be one of the following:
// - a JSON literal (i.e., null, true, or false)
// - a JSON string (e.g., "hello, world!")
// - a JSON number (e.g., 123.456)
// - a begin or end delimiter for a JSON object (i.e., { or } )
// - a begin or end delimiter for a JSON array (i.e., [ or ] )
//
// A Token cannot represent entire array or object values, while a [Value] can.
// There is no Token to represent commas and colons since
// these structural tokens can be inferred from the surrounding context.
type Token struct {
nonComparable
// Tokens can exist in either a "raw" or an "exact" form.
// Tokens produced by the Decoder are in the "raw" form.
// Tokens returned by constructors are usually in the "exact" form.
// The Encoder accepts Tokens in either the "raw" or "exact" form.
//
// The following chart shows the possible values for each Token type:
// ╔═════════════════╦════════════╤════════════╤════════════╗
// ║ Token type ║ raw field │ str field │ num field ║
// ╠═════════════════╬════════════╪════════════╪════════════╣
// ║ null (raw) ║ "null" │ "" │ 0 ║
// ║ false (raw) ║ "false" │ "" │ 0 ║
// ║ true (raw) ║ "true" │ "" │ 0 ║
// ║ string (raw) ║ non-empty │ "" │ offset ║
// ║ string (string) ║ nil │ non-empty │ 0 ║
// ║ number (raw) ║ non-empty │ "" │ offset ║
// ║ number (float) ║ nil │ "f" │ non-zero ║
// ║ number (int64) ║ nil │ "i" │ non-zero ║
// ║ number (uint64) ║ nil │ "u" │ non-zero ║
// ║ object (delim) ║ "{" or "}" │ "" │ 0 ║
// ║ array (delim) ║ "[" or "]" │ "" │ 0 ║
// ╚═════════════════╩════════════╧════════════╧════════════╝
//
// Notes:
// - For tokens stored in "raw" form, the num field contains the
// absolute offset determined by raw.previousOffsetStart().
// The buffer itself is stored in raw.previousBuffer().
// - JSON literals and structural characters are always in the "raw" form.
// - JSON strings and numbers can be in either "raw" or "exact" forms.
// - The exact zero value of JSON strings and numbers in the "exact" forms
// have ambiguous representation. Thus, they are always represented
// in the "raw" form.
// raw contains a reference to the raw decode buffer.
// If non-nil, then its value takes precedence over str and num.
// It is only valid if num == raw.previousOffsetStart().
raw *decodeBuffer
// str is the unescaped JSON string if num is zero.
// Otherwise, it is "f", "i", or "u" if num should be interpreted
// as a float64, int64, or uint64, respectively.
str string
// num is a float64, int64, or uint64 stored as a uint64 value.
// It is non-zero for any JSON number in the "exact" form.
num uint64
}
// TODO: Does representing 1-byte delimiters as *decodeBuffer cause performance issues?
var (
Null Token = rawToken("null")
False Token = rawToken("false")
True Token = rawToken("true")
BeginObject Token = rawToken("{")
EndObject Token = rawToken("}")
BeginArray Token = rawToken("[")
EndArray Token = rawToken("]")
zeroString Token = rawToken(`""`)
zeroNumber Token = rawToken(`0`)
nanString Token = String("NaN")
pinfString Token = String("Infinity")
ninfString Token = String("-Infinity")
)
func rawToken(s string) Token {
return Token{raw: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}}
}
// Bool constructs a Token representing a JSON boolean.
func Bool(b bool) Token {
if b {
return True
}
return False
}
// String constructs a Token representing a JSON string.
// The provided string should contain valid UTF-8, otherwise invalid characters
// may be mangled as the Unicode replacement character.
func String(s string) Token {
if len(s) == 0 {
return zeroString
}
return Token{str: s}
}
// Float constructs a Token representing a JSON number.
// The values NaN, +Inf, and -Inf will be represented
// as a JSON string with the values "NaN", "Infinity", and "-Infinity".
func Float(n float64) Token {
switch {
case math.Float64bits(n) == 0:
return zeroNumber
case math.IsNaN(n):
return nanString
case math.IsInf(n, +1):
return pinfString
case math.IsInf(n, -1):
return ninfString
}
return Token{str: "f", num: math.Float64bits(n)}
}
// Int constructs a Token representing a JSON number from an int64.
func Int(n int64) Token {
if n == 0 {
return zeroNumber
}
return Token{str: "i", num: uint64(n)}
}
// Uint constructs a Token representing a JSON number from a uint64.
func Uint(n uint64) Token {
if n == 0 {
return zeroNumber
}
return Token{str: "u", num: uint64(n)}
}
// Clone makes a copy of the Token such that its value remains valid
// even after a subsequent [Decoder.Read] call.
func (t Token) Clone() Token {
// TODO: Allow caller to avoid any allocations?
if raw := t.raw; raw != nil {
// Avoid copying globals.
if t.raw.prevStart == 0 {
switch t.raw {
case Null.raw:
return Null
case False.raw:
return False
case True.raw:
return True
case BeginObject.raw:
return BeginObject
case EndObject.raw:
return EndObject
case BeginArray.raw:
return BeginArray
case EndArray.raw:
return EndArray
}
}
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
buf := bytes.Clone(raw.previousBuffer())
return Token{raw: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}}
}
return t
}
// Bool returns the value for a JSON boolean.
// It panics if the token kind is not a JSON boolean.
func (t Token) Bool() bool {
switch t.raw {
case True.raw:
return true
case False.raw:
return false
default:
panic("invalid JSON token kind: " + t.Kind().String())
}
}
// appendString appends a JSON string to dst and returns it.
// It panics if t is not a JSON string.
func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
if raw := t.raw; raw != nil {
// Handle raw string value.
buf := raw.previousBuffer()
if Kind(buf[0]) == '"' {
if jsonwire.ConsumeSimpleString(buf) == len(buf) {
return append(dst, buf...), nil
}
dst, _, err := jsonwire.ReformatString(dst, buf, flags)
return dst, err
}
} else if len(t.str) != 0 && t.num == 0 {
// Handle exact string value.
return jsonwire.AppendQuote(dst, t.str, flags)
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// String returns the unescaped string value for a JSON string.
// For other JSON kinds, this returns the raw JSON representation.
func (t Token) String() string {
// This is inlinable to take advantage of "function outlining".
// This avoids an allocation for the string(b) conversion
// if the caller does not use the string in an escaping manner.
// See https://blog.filippo.io/efficient-go-apis-with-the-inliner/
s, b := t.string()
if len(b) > 0 {
return string(b)
}
return s
}
func (t Token) string() (string, []byte) {
if raw := t.raw; raw != nil {
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
buf := raw.previousBuffer()
if buf[0] == '"' {
// TODO: Preserve ValueFlags in Token?
isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf)
return "", jsonwire.UnquoteMayCopy(buf, isVerbatim)
}
// Handle tokens that are not JSON strings for fmt.Stringer.
return "", buf
}
if len(t.str) != 0 && t.num == 0 {
return t.str, nil
}
// Handle tokens that are not JSON strings for fmt.Stringer.
if t.num > 0 {
switch t.str[0] {
case 'f':
return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.num), 64)), nil
case 'i':
return strconv.FormatInt(int64(t.num), 10), nil
case 'u':
return strconv.FormatUint(uint64(t.num), 10), nil
}
}
return "<invalid jsontext.Token>", nil
}
// appendNumber appends a JSON number to dst and returns it.
// It panics if t is not a JSON number.
func (t Token) appendNumber(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
if raw := t.raw; raw != nil {
// Handle raw number value.
buf := raw.previousBuffer()
if Kind(buf[0]).normalize() == '0' {
dst, _, err := jsonwire.ReformatNumber(dst, buf, flags)
return dst, err
}
} else if t.num != 0 {
// Handle exact number value.
switch t.str[0] {
case 'f':
return jsonwire.AppendFloat(dst, math.Float64frombits(t.num), 64), nil
case 'i':
return strconv.AppendInt(dst, int64(t.num), 10), nil
case 'u':
return strconv.AppendUint(dst, uint64(t.num), 10), nil
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Float returns the floating-point value for a JSON number.
// It returns a NaN, +Inf, or -Inf value for any JSON string
// with the values "NaN", "Infinity", or "-Infinity".
// It panics for all other cases.
func (t Token) Float() float64 {
if raw := t.raw; raw != nil {
// Handle raw number value.
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
buf := raw.previousBuffer()
if Kind(buf[0]).normalize() == '0' {
fv, _ := jsonwire.ParseFloat(buf, 64)
return fv
}
} else if t.num != 0 {
// Handle exact number value.
switch t.str[0] {
case 'f':
return math.Float64frombits(t.num)
case 'i':
return float64(int64(t.num))
case 'u':
return float64(uint64(t.num))
}
}
// Handle string values with "NaN", "Infinity", or "-Infinity".
if t.Kind() == '"' {
switch t.String() {
case "NaN":
return math.NaN()
case "Infinity":
return math.Inf(+1)
case "-Infinity":
return math.Inf(-1)
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Int returns the signed integer value for a JSON number.
// The fractional component of any number is ignored (truncation toward zero).
// Any number beyond the representation of an int64 will be saturated
// to the closest representable value.
// It panics if the token kind is not a JSON number.
func (t Token) Int() int64 {
if raw := t.raw; raw != nil {
// Handle raw integer value.
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
neg := false
buf := raw.previousBuffer()
if len(buf) > 0 && buf[0] == '-' {
neg, buf = true, buf[1:]
}
if numAbs, ok := jsonwire.ParseUint(buf); ok {
if neg {
if numAbs > -minInt64 {
return minInt64
}
return -1 * int64(numAbs)
} else {
if numAbs > +maxInt64 {
return maxInt64
}
return +1 * int64(numAbs)
}
}
} else if t.num != 0 {
// Handle exact integer value.
switch t.str[0] {
case 'i':
return int64(t.num)
case 'u':
if t.num > maxInt64 {
return maxInt64
}
return int64(t.num)
}
}
// Handle JSON number that is a floating-point value.
if t.Kind() == '0' {
switch fv := t.Float(); {
case fv >= maxInt64:
return maxInt64
case fv <= minInt64:
return minInt64
default:
return int64(fv) // truncation toward zero
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Uint returns the unsigned integer value for a JSON number.
// The fractional component of any number is ignored (truncation toward zero).
// Any number beyond the representation of an uint64 will be saturated
// to the closest representable value.
// It panics if the token kind is not a JSON number.
func (t Token) Uint() uint64 {
// NOTE: This accessor returns 0 for any negative JSON number,
// which might be surprising, but is at least consistent with the behavior
// of saturating out-of-bounds numbers to the closest representable number.
if raw := t.raw; raw != nil {
// Handle raw integer value.
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
neg := false
buf := raw.previousBuffer()
if len(buf) > 0 && buf[0] == '-' {
neg, buf = true, buf[1:]
}
if num, ok := jsonwire.ParseUint(buf); ok {
if neg {
return minUint64
}
return num
}
} else if t.num != 0 {
// Handle exact integer value.
switch t.str[0] {
case 'u':
return t.num
case 'i':
if int64(t.num) < minUint64 {
return minUint64
}
return uint64(int64(t.num))
}
}
// Handle JSON number that is a floating-point value.
if t.Kind() == '0' {
switch fv := t.Float(); {
case fv >= maxUint64:
return maxUint64
case fv <= minUint64:
return minUint64
default:
return uint64(fv) // truncation toward zero
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Kind returns the token kind.
func (t Token) Kind() Kind {
switch {
case t.raw != nil:
raw := t.raw
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
return Kind(t.raw.buf[raw.prevStart]).normalize()
case t.num != 0:
return '0'
case len(t.str) != 0:
return '"'
default:
return invalidKind
}
}
// Kind represents each possible JSON token kind with a single byte,
// which is conveniently the first byte of that kind's grammar
// with the restriction that numbers always be represented with '0':
//
// - 'n': null
// - 'f': false
// - 't': true
// - '"': string
// - '0': number
// - '{': object begin
// - '}': object end
// - '[': array begin
// - ']': array end
//
// An invalid kind is usually represented using 0,
// but may be non-zero due to invalid JSON data.
type Kind byte
const invalidKind Kind = 0
// String prints the kind in a humanly readable fashion.
func (k Kind) String() string {
switch k {
case 'n':
return "null"
case 'f':
return "false"
case 't':
return "true"
case '"':
return "string"
case '0':
return "number"
case '{':
return "{"
case '}':
return "}"
case '[':
return "["
case ']':
return "]"
default:
return "<invalid jsontext.Kind: " + jsonwire.QuoteRune(string(k)) + ">"
}
}
// normalize coalesces all possible starting characters of a number as just '0'.
func (k Kind) normalize() Kind {
if k == '-' || ('0' <= k && k <= '9') {
return '0'
}
return k
}

View File

@@ -1,395 +0,0 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"errors"
"io"
"slices"
"sync"
"next.orly.dev/pkg/json/internal/jsonflags"
"next.orly.dev/pkg/json/internal/jsonwire"
)
// NOTE: Value is analogous to v1 json.RawMessage.
// AppendFormat formats the JSON value in src and appends it to dst
// according to the specified options.
// See [Value.Format] for more details about the formatting behavior.
//
// The dst and src may overlap.
// If an error is reported, then the entirety of src is appended to dst.
func AppendFormat(dst, src []byte, opts ...Options) ([]byte, error) {
e := getBufferedEncoder(opts...)
defer putBufferedEncoder(e)
e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
if err := e.s.WriteValue(src); err != nil {
return append(dst, src...), err
}
return append(dst, e.s.Buf...), nil
}
// Value represents a single raw JSON value, which may be one of the following:
// - a JSON literal (i.e., null, true, or false)
// - a JSON string (e.g., "hello, world!")
// - a JSON number (e.g., 123.456)
// - an entire JSON object (e.g., {"fizz":"buzz"} )
// - an entire JSON array (e.g., [1,2,3] )
//
// Value can represent entire array or object values, while [Token] cannot.
// Value may contain leading and/or trailing whitespace.
type Value []byte
// Clone returns a copy of v.
func (v Value) Clone() Value {
return bytes.Clone(v)
}
// String returns the string formatting of v.
func (v Value) String() string {
if v == nil {
return "null"
}
return string(v)
}
// IsValid reports whether the raw JSON value is syntactically valid
// according to the specified options.
//
// By default (if no options are specified), it validates according to RFC 7493.
// It verifies whether the input is properly encoded as UTF-8,
// that escape sequences within strings decode to valid Unicode codepoints, and
// that all names in each object are unique.
// It does not verify whether numbers are representable within the limits
// of any common numeric type (e.g., float64, int64, or uint64).
//
// Relevant options include:
// - [AllowDuplicateNames]
// - [AllowInvalidUTF8]
//
// All other options are ignored.
func (v Value) IsValid(opts ...Options) bool {
// TODO: Document support for [WithByteLimit] and [WithDepthLimit].
d := getBufferedDecoder(v, opts...)
defer putBufferedDecoder(d)
_, errVal := d.ReadValue()
_, errEOF := d.ReadToken()
return errVal == nil && errEOF == io.EOF
}
// Format formats the raw JSON value in place.
//
// By default (if no options are specified), it validates according to RFC 7493
// and produces the minimal JSON representation, where
// all whitespace is elided and JSON strings use the shortest encoding.
//
// Relevant options include:
// - [AllowDuplicateNames]
// - [AllowInvalidUTF8]
// - [EscapeForHTML]
// - [EscapeForJS]
// - [PreserveRawStrings]
// - [CanonicalizeRawInts]
// - [CanonicalizeRawFloats]
// - [ReorderRawObjects]
// - [SpaceAfterColon]
// - [SpaceAfterComma]
// - [Multiline]
// - [WithIndent]
// - [WithIndentPrefix]
//
// All other options are ignored.
//
// It is guaranteed to succeed if the value is valid according to the same options.
// If the value is already formatted, then the buffer is not mutated.
func (v *Value) Format(opts ...Options) error {
// TODO: Document support for [WithByteLimit] and [WithDepthLimit].
return v.format(opts, nil)
}
// format accepts two []Options to avoid the allocation appending them together.
// It is equivalent to v.Format(append(opts1, opts2...)...).
func (v *Value) format(opts1, opts2 []Options) error {
e := getBufferedEncoder(opts1...)
defer putBufferedEncoder(e)
e.s.Join(opts2...)
e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
if err := e.s.WriteValue(*v); err != nil {
return err
}
if !bytes.Equal(*v, e.s.Buf) {
*v = append((*v)[:0], e.s.Buf...)
}
return nil
}
// Compact removes all whitespace from the raw JSON value.
//
// It does not reformat JSON strings or numbers to use any other representation.
// To maximize the set of JSON values that can be formatted,
// this permits values with duplicate names and invalid UTF-8.
//
// Compact is equivalent to calling [Value.Format] with the following options:
// - [AllowDuplicateNames](true)
// - [AllowInvalidUTF8](true)
// - [PreserveRawStrings](true)
//
// Any options specified by the caller are applied after the initial set
// and may deliberately override prior options.
func (v *Value) Compact(opts ...Options) error {
return v.format([]Options{
AllowDuplicateNames(true),
AllowInvalidUTF8(true),
PreserveRawStrings(true),
}, opts)
}
// Indent reformats the whitespace in the raw JSON value so that each element
// in a JSON object or array begins on a indented line according to the nesting.
//
// It does not reformat JSON strings or numbers to use any other representation.
// To maximize the set of JSON values that can be formatted,
// this permits values with duplicate names and invalid UTF-8.
//
// Indent is equivalent to calling [Value.Format] with the following options:
// - [AllowDuplicateNames](true)
// - [AllowInvalidUTF8](true)
// - [PreserveRawStrings](true)
// - [Multiline](true)
//
// Any options specified by the caller are applied after the initial set
// and may deliberately override prior options.
func (v *Value) Indent(opts ...Options) error {
return v.format([]Options{
AllowDuplicateNames(true),
AllowInvalidUTF8(true),
PreserveRawStrings(true),
Multiline(true),
}, opts)
}
// Canonicalize canonicalizes the raw JSON value according to the
// JSON Canonicalization Scheme (JCS) as defined by RFC 8785
// where it produces a stable representation of a JSON value.
//
// JSON strings are formatted to use their minimal representation,
// JSON numbers are formatted as double precision numbers according
// to some stable serialization algorithm.
// JSON object members are sorted in ascending order by name.
// All whitespace is removed.
//
// The output stability is dependent on the stability of the application data
// (see RFC 8785, Appendix E). It cannot produce stable output from
// fundamentally unstable input. For example, if the JSON value
// contains ephemeral data (e.g., a frequently changing timestamp),
// then the value is still unstable regardless of whether this is called.
//
// Canonicalize is equivalent to calling [Value.Format] with the following options:
// - [CanonicalizeRawInts](true)
// - [CanonicalizeRawFloats](true)
// - [ReorderRawObjects](true)
//
// Any options specified by the caller are applied after the initial set
// and may deliberately override prior options.
//
// Note that JCS treats all JSON numbers as IEEE 754 double precision numbers.
// Any numbers with precision beyond what is representable by that form
// will lose their precision when canonicalized. For example, integer values
// beyond ±2⁵³ will lose their precision. To preserve the original representation
// of JSON integers, additionally set [CanonicalizeRawInts] to false:
//
// v.Canonicalize(jsontext.CanonicalizeRawInts(false))
func (v *Value) Canonicalize(opts ...Options) error {
return v.format([]Options{
CanonicalizeRawInts(true),
CanonicalizeRawFloats(true),
ReorderRawObjects(true),
}, opts)
}
// MarshalJSON returns v as the JSON encoding of v.
// It returns the stored value as the raw JSON output without any validation.
// If v is nil, then this returns a JSON null.
func (v Value) MarshalJSON() ([]byte, error) {
// NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON.
if v == nil {
return []byte("null"), nil
}
return v, nil
}
// UnmarshalJSON sets v as the JSON encoding of b.
// It stores a copy of the provided raw JSON input without any validation.
func (v *Value) UnmarshalJSON(b []byte) error {
// NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON.
if v == nil {
return errors.New("jsontext.Value: UnmarshalJSON on nil pointer")
}
*v = append((*v)[:0], b...)
return nil
}
// Kind returns the starting token kind.
// For a valid value, this will never include '}' or ']'.
func (v Value) Kind() Kind {
if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 {
return Kind(v[0]).normalize()
}
return invalidKind
}
const commaAndWhitespace = ", \n\r\t"
type objectMember struct {
// name is the unquoted name.
name []byte // e.g., "name"
// buffer is the entirety of the raw JSON object member
// starting from right after the previous member (or opening '{')
// until right after the member value.
buffer []byte // e.g., `, \n\r\t"name": "value"`
}
func (x objectMember) Compare(y objectMember) int {
if c := jsonwire.CompareUTF16(x.name, y.name); c != 0 {
return c
}
// With [AllowDuplicateNames] or [AllowInvalidUTF8],
// names could be identical, so also sort using the member value.
return jsonwire.CompareUTF16(
bytes.TrimLeft(x.buffer, commaAndWhitespace),
bytes.TrimLeft(y.buffer, commaAndWhitespace))
}
var objectMemberPool = sync.Pool{New: func() any { return new([]objectMember) }}
func getObjectMembers() *[]objectMember {
ns := objectMemberPool.Get().(*[]objectMember)
*ns = (*ns)[:0]
return ns
}
func putObjectMembers(ns *[]objectMember) {
if cap(*ns) < 1<<10 {
clear(*ns) // avoid pinning name and buffer
objectMemberPool.Put(ns)
}
}
// mustReorderObjects reorders in-place all object members in a JSON value,
// which must be valid otherwise it panics.
func mustReorderObjects(b []byte) {
// Obtain a buffered encoder just to use its internal buffer as
// a scratch buffer for reordering object members.
e2 := getBufferedEncoder()
defer putBufferedEncoder(e2)
// Disable unnecessary checks to syntactically parse the JSON value.
d := getBufferedDecoder(b)
defer putBufferedDecoder(d)
d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1)
mustReorderObjectsFromDecoder(d, &e2.s.Buf) // per RFC 8785, section 3.2.3
}
// mustReorderObjectsFromDecoder recursively reorders all object members in place
// according to the ordering specified in RFC 8785, section 3.2.3.
//
// Pre-conditions:
// - The value is valid (i.e., no decoder errors should ever occur).
// - Initial call is provided a Decoder reading from the start of v.
//
// Post-conditions:
// - Exactly one JSON value is read from the Decoder.
// - All fully-parsed JSON objects are reordered by directly moving
// the members in the value buffer.
//
// The runtime is approximately O(n·log(n)) + O(m·log(m)),
// where n is len(v) and m is the total number of object members.
func mustReorderObjectsFromDecoder(d *Decoder, scratch *[]byte) {
switch tok, err := d.ReadToken(); tok.Kind() {
case '{':
// Iterate and collect the name and offsets for every object member.
members := getObjectMembers()
defer putObjectMembers(members)
var prevMember objectMember
isSorted := true
beforeBody := d.InputOffset() // offset after '{'
for d.PeekKind() != '}' {
beforeName := d.InputOffset()
var flags jsonwire.ValueFlags
name, _ := d.s.ReadValue(&flags)
name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim())
mustReorderObjectsFromDecoder(d, scratch)
afterValue := d.InputOffset()
currMember := objectMember{name, d.s.buf[beforeName:afterValue]}
if isSorted && len(*members) > 0 {
isSorted = objectMember.Compare(prevMember, currMember) < 0
}
*members = append(*members, currMember)
prevMember = currMember
}
afterBody := d.InputOffset() // offset before '}'
d.ReadToken()
// Sort the members; return early if it's already sorted.
if isSorted {
return
}
firstBufferBeforeSorting := (*members)[0].buffer
slices.SortFunc(*members, objectMember.Compare)
firstBufferAfterSorting := (*members)[0].buffer
// Append the reordered members to a new buffer,
// then copy the reordered members back over the original members.
// Avoid swapping in place since each member may be a different size
// where moving a member over a smaller member may corrupt the data
// for subsequent members before they have been moved.
//
// The following invariant must hold:
// sum([m.after-m.before for m in members]) == afterBody-beforeBody
commaAndWhitespacePrefix := func(b []byte) []byte {
return b[:len(b)-len(bytes.TrimLeft(b, commaAndWhitespace))]
}
sorted := (*scratch)[:0]
for i, member := range *members {
switch {
case i == 0 && &member.buffer[0] != &firstBufferBeforeSorting[0]:
// First member after sorting is not the first member before sorting,
// so use the prefix of the first member before sorting.
sorted = append(sorted, commaAndWhitespacePrefix(firstBufferBeforeSorting)...)
sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
case i != 0 && &member.buffer[0] == &firstBufferBeforeSorting[0]:
// Later member after sorting is the first member before sorting,
// so use the prefix of the first member after sorting.
sorted = append(sorted, commaAndWhitespacePrefix(firstBufferAfterSorting)...)
sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
default:
sorted = append(sorted, member.buffer...)
}
}
if int(afterBody-beforeBody) != len(sorted) {
panic("BUG: length invariant violated")
}
copy(d.s.buf[beforeBody:afterBody], sorted)
// Update scratch buffer to the largest amount ever used.
if len(sorted) > len(*scratch) {
*scratch = sorted
}
case '[':
for d.PeekKind() != ']' {
mustReorderObjectsFromDecoder(d, scratch)
}
d.ReadToken()
default:
if err != nil {
panic("BUG: " + err.Error())
}
}
}

View File

@@ -1,612 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
// JSON value parser state machine.
// Just about at the limit of what is reasonable to write by hand.
// Some parts are a bit tedious, but overall it nicely factors out the
// otherwise common code from the multiple scanning functions
// in this package (Compact, Indent, checkValid, etc).
//
// This file starts with two simple examples using the scanner
// before diving into the scanner itself.
import (
"strconv"
"sync"
)
// Valid reports whether data is a valid JSON encoding.
func Valid(data []byte) bool {
scan := newScanner()
defer freeScanner(scan)
return checkValid(data, scan) == nil
}
// checkValid verifies that data is valid JSON-encoded data.
// scan is passed in for use by checkValid to avoid an allocation.
// checkValid returns nil or a SyntaxError.
func checkValid(data []byte, scan *scanner) error {
scan.reset()
for _, c := range data {
scan.bytes++
if scan.step(scan, c) == scanError {
return scan.err
}
}
if scan.eof() == scanError {
return scan.err
}
return nil
}
// A SyntaxError is a description of a JSON syntax error.
// [Unmarshal] will return a SyntaxError if the JSON can't be parsed.
type SyntaxError struct {
msg string // description of error
Offset int64 // error occurred after reading Offset bytes
}
func (e *SyntaxError) Error() string { return e.msg }
// A scanner is a JSON scanning state machine.
// Callers call scan.reset and then pass bytes in one at a time
// by calling scan.step(&scan, c) for each byte.
// The return value, referred to as an opcode, tells the
// caller about significant parsing events like beginning
// and ending literals, objects, and arrays, so that the
// caller can follow along if it wishes.
// The return value scanEnd indicates that a single top-level
// JSON value has been completed, *before* the byte that
// just got passed in. (The indication must be delayed in order
// to recognize the end of numbers: is 123 a whole value or
// the beginning of 12345e+6?).
type scanner struct {
// The step is a func to be called to execute the next transition.
// Also tried using an integer constant and a single func
// with a switch, but using the func directly was 10% faster
// on a 64-bit Mac Mini, and it's nicer to read.
step func(*scanner, byte) int
// Reached end of top-level value.
endTop bool
// Stack of what we're in the middle of - array values, object keys, object values.
parseState []int
// Error that happened, if any.
err error
// total bytes consumed, updated by decoder.Decode (and deliberately
// not set to zero by scan.reset)
bytes int64
}
var scannerPool = sync.Pool{
New: func() any {
return &scanner{}
},
}
func newScanner() *scanner {
scan := scannerPool.Get().(*scanner)
// scan.reset by design doesn't set bytes to zero
scan.bytes = 0
scan.reset()
return scan
}
func freeScanner(scan *scanner) {
// Avoid hanging on to too much memory in extreme cases.
if len(scan.parseState) > 1024 {
scan.parseState = nil
}
scannerPool.Put(scan)
}
// These values are returned by the state transition functions
// assigned to scanner.state and the method scanner.eof.
// They give details about the current state of the scan that
// callers might be interested to know about.
// It is okay to ignore the return value of any particular
// call to scanner.state: if one call returns scanError,
// every subsequent call will return scanError too.
const (
// Continue.
scanContinue = iota // uninteresting byte
scanBeginLiteral // end implied by next result != scanContinue
scanBeginObject // begin object
scanObjectKey // just finished object key (string)
scanObjectValue // just finished non-last object value
scanEndObject // end object (implies scanObjectValue if possible)
scanBeginArray // begin array
scanArrayValue // just finished array value
scanEndArray // end array (implies scanArrayValue if possible)
scanSkipSpace // space byte; can skip; known to be last "continue" result
// Stop.
scanEnd // top-level value ended *before* this byte; known to be first "stop" result
scanError // hit an error, scanner.err.
)
// These values are stored in the parseState stack.
// They give the current state of a composite value
// being scanned. If the parser is inside a nested value
// the parseState describes the nested state, outermost at entry 0.
const (
parseObjectKey = iota // parsing object key (before colon)
parseObjectValue // parsing object value (after colon)
parseArrayValue // parsing array value
)
// This limits the max nesting depth to prevent stack overflow.
// This is permitted by https://tools.ietf.org/html/rfc7159#section-9
const maxNestingDepth = 10000
// reset prepares the scanner for use.
// It must be called before calling s.step.
func (s *scanner) reset() {
s.step = stateBeginValue
s.parseState = s.parseState[0:0]
s.err = nil
s.endTop = false
}
// eof tells the scanner that the end of input has been reached.
// It returns a scan status just as s.step does.
func (s *scanner) eof() int {
if s.err != nil {
return scanError
}
if s.endTop {
return scanEnd
}
s.step(s, ' ')
if s.endTop {
return scanEnd
}
if s.err == nil {
s.err = &SyntaxError{"unexpected end of JSON input", s.bytes}
}
return scanError
}
// pushParseState pushes a new parse state newParseState onto the parse stack.
// an error state is returned if maxNestingDepth was exceeded, otherwise successState is returned.
func (s *scanner) pushParseState(c byte, newParseState int, successState int) int {
s.parseState = append(s.parseState, newParseState)
if len(s.parseState) <= maxNestingDepth {
return successState
}
return s.error(c, "exceeded max depth")
}
// popParseState pops a parse state (already obtained) off the stack
// and updates s.step accordingly.
func (s *scanner) popParseState() {
n := len(s.parseState) - 1
s.parseState = s.parseState[0:n]
if n == 0 {
s.step = stateEndTop
s.endTop = true
} else {
s.step = stateEndValue
}
}
func isSpace(c byte) bool {
return c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n')
}
// stateBeginValueOrEmpty is the state after reading `[`.
func stateBeginValueOrEmpty(s *scanner, c byte) int {
if isSpace(c) {
return scanSkipSpace
}
if c == ']' {
return stateEndValue(s, c)
}
return stateBeginValue(s, c)
}
// stateBeginValue is the state at the beginning of the input.
func stateBeginValue(s *scanner, c byte) int {
if isSpace(c) {
return scanSkipSpace
}
switch c {
case '{':
s.step = stateBeginStringOrEmpty
return s.pushParseState(c, parseObjectKey, scanBeginObject)
case '[':
s.step = stateBeginValueOrEmpty
return s.pushParseState(c, parseArrayValue, scanBeginArray)
case '"':
s.step = stateInString
return scanBeginLiteral
case '-':
s.step = stateNeg
return scanBeginLiteral
case '0': // beginning of 0.123
s.step = state0
return scanBeginLiteral
case 't': // beginning of true
s.step = stateT
return scanBeginLiteral
case 'f': // beginning of false
s.step = stateF
return scanBeginLiteral
case 'n': // beginning of null
s.step = stateN
return scanBeginLiteral
}
if '1' <= c && c <= '9' { // beginning of 1234.5
s.step = state1
return scanBeginLiteral
}
return s.error(c, "looking for beginning of value")
}
// stateBeginStringOrEmpty is the state after reading `{`.
func stateBeginStringOrEmpty(s *scanner, c byte) int {
if isSpace(c) {
return scanSkipSpace
}
if c == '}' {
n := len(s.parseState)
s.parseState[n-1] = parseObjectValue
return stateEndValue(s, c)
}
return stateBeginString(s, c)
}
// stateBeginString is the state after reading `{"key": value,`.
func stateBeginString(s *scanner, c byte) int {
if isSpace(c) {
return scanSkipSpace
}
if c == '"' {
s.step = stateInString
return scanBeginLiteral
}
return s.error(c, "looking for beginning of object key string")
}
// stateEndValue is the state after completing a value,
// such as after reading `{}` or `true` or `["x"`.
func stateEndValue(s *scanner, c byte) int {
n := len(s.parseState)
if n == 0 {
// Completed top-level before the current byte.
s.step = stateEndTop
s.endTop = true
return stateEndTop(s, c)
}
if isSpace(c) {
s.step = stateEndValue
return scanSkipSpace
}
ps := s.parseState[n-1]
switch ps {
case parseObjectKey:
if c == ':' {
s.parseState[n-1] = parseObjectValue
s.step = stateBeginValue
return scanObjectKey
}
return s.error(c, "after object key")
case parseObjectValue:
if c == ',' {
s.parseState[n-1] = parseObjectKey
s.step = stateBeginString
return scanObjectValue
}
if c == '}' {
s.popParseState()
return scanEndObject
}
return s.error(c, "after object key:value pair")
case parseArrayValue:
if c == ',' {
s.step = stateBeginValue
return scanArrayValue
}
if c == ']' {
s.popParseState()
return scanEndArray
}
return s.error(c, "after array element")
}
return s.error(c, "")
}
// stateEndTop is the state after finishing the top-level value,
// such as after reading `{}` or `[1,2,3]`.
// Only space characters should be seen now.
func stateEndTop(s *scanner, c byte) int {
if !isSpace(c) {
// Complain about non-space byte on next call.
s.error(c, "after top-level value")
}
return scanEnd
}
// stateInString is the state after reading `"`.
func stateInString(s *scanner, c byte) int {
if c == '"' {
s.step = stateEndValue
return scanContinue
}
if c == '\\' {
s.step = stateInStringEsc
return scanContinue
}
if c < 0x20 {
return s.error(c, "in string literal")
}
return scanContinue
}
// stateInStringEsc is the state after reading `"\` during a quoted string.
func stateInStringEsc(s *scanner, c byte) int {
switch c {
case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
s.step = stateInString
return scanContinue
case 'u':
s.step = stateInStringEscU
return scanContinue
}
return s.error(c, "in string escape code")
}
// stateInStringEscU is the state after reading `"\u` during a quoted string.
func stateInStringEscU(s *scanner, c byte) int {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU1
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
func stateInStringEscU1(s *scanner, c byte) int {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU12
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
func stateInStringEscU12(s *scanner, c byte) int {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU123
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
func stateInStringEscU123(s *scanner, c byte) int {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInString
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateNeg is the state after reading `-` during a number.
func stateNeg(s *scanner, c byte) int {
if c == '0' {
s.step = state0
return scanContinue
}
if '1' <= c && c <= '9' {
s.step = state1
return scanContinue
}
return s.error(c, "in numeric literal")
}
// state1 is the state after reading a non-zero integer during a number,
// such as after reading `1` or `100` but not `0`.
func state1(s *scanner, c byte) int {
if '0' <= c && c <= '9' {
s.step = state1
return scanContinue
}
return state0(s, c)
}
// state0 is the state after reading `0` during a number.
func state0(s *scanner, c byte) int {
if c == '.' {
s.step = stateDot
return scanContinue
}
if c == 'e' || c == 'E' {
s.step = stateE
return scanContinue
}
return stateEndValue(s, c)
}
// stateDot is the state after reading the integer and decimal point in a number,
// such as after reading `1.`.
func stateDot(s *scanner, c byte) int {
if '0' <= c && c <= '9' {
s.step = stateDot0
return scanContinue
}
return s.error(c, "after decimal point in numeric literal")
}
// stateDot0 is the state after reading the integer, decimal point, and subsequent
// digits of a number, such as after reading `3.14`.
func stateDot0(s *scanner, c byte) int {
if '0' <= c && c <= '9' {
return scanContinue
}
if c == 'e' || c == 'E' {
s.step = stateE
return scanContinue
}
return stateEndValue(s, c)
}
// stateE is the state after reading the mantissa and e in a number,
// such as after reading `314e` or `0.314e`.
func stateE(s *scanner, c byte) int {
if c == '+' || c == '-' {
s.step = stateESign
return scanContinue
}
return stateESign(s, c)
}
// stateESign is the state after reading the mantissa, e, and sign in a number,
// such as after reading `314e-` or `0.314e+`.
func stateESign(s *scanner, c byte) int {
if '0' <= c && c <= '9' {
s.step = stateE0
return scanContinue
}
return s.error(c, "in exponent of numeric literal")
}
// stateE0 is the state after reading the mantissa, e, optional sign,
// and at least one digit of the exponent in a number,
// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
func stateE0(s *scanner, c byte) int {
if '0' <= c && c <= '9' {
return scanContinue
}
return stateEndValue(s, c)
}
// stateT is the state after reading `t`.
func stateT(s *scanner, c byte) int {
if c == 'r' {
s.step = stateTr
return scanContinue
}
return s.error(c, "in literal true (expecting 'r')")
}
// stateTr is the state after reading `tr`.
func stateTr(s *scanner, c byte) int {
if c == 'u' {
s.step = stateTru
return scanContinue
}
return s.error(c, "in literal true (expecting 'u')")
}
// stateTru is the state after reading `tru`.
func stateTru(s *scanner, c byte) int {
if c == 'e' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal true (expecting 'e')")
}
// stateF is the state after reading `f`.
func stateF(s *scanner, c byte) int {
if c == 'a' {
s.step = stateFa
return scanContinue
}
return s.error(c, "in literal false (expecting 'a')")
}
// stateFa is the state after reading `fa`.
func stateFa(s *scanner, c byte) int {
if c == 'l' {
s.step = stateFal
return scanContinue
}
return s.error(c, "in literal false (expecting 'l')")
}
// stateFal is the state after reading `fal`.
func stateFal(s *scanner, c byte) int {
if c == 's' {
s.step = stateFals
return scanContinue
}
return s.error(c, "in literal false (expecting 's')")
}
// stateFals is the state after reading `fals`.
func stateFals(s *scanner, c byte) int {
if c == 'e' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal false (expecting 'e')")
}
// stateN is the state after reading `n`.
func stateN(s *scanner, c byte) int {
if c == 'u' {
s.step = stateNu
return scanContinue
}
return s.error(c, "in literal null (expecting 'u')")
}
// stateNu is the state after reading `nu`.
func stateNu(s *scanner, c byte) int {
if c == 'l' {
s.step = stateNul
return scanContinue
}
return s.error(c, "in literal null (expecting 'l')")
}
// stateNul is the state after reading `nul`.
func stateNul(s *scanner, c byte) int {
if c == 'l' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal null (expecting 'l')")
}
// stateError is the state after reaching a syntax error,
// such as after reading `[1}` or `5.1.2`.
func stateError(s *scanner, c byte) int {
return scanError
}
// error records an error and switches to the error state.
func (s *scanner) error(c byte, context string) int {
s.step = stateError
s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes}
return scanError
}
// quoteChar formats c as a quoted character literal.
func quoteChar(c byte) string {
// special cases - different from quoted strings
if c == '\'' {
return `'\''`
}
if c == '"' {
return `'"'`
}
// use quoted string with different quotation marks
s := strconv.Quote(string(c))
return "'" + s[1:len(s)-1] + "'"
}

View File

@@ -1,514 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import (
"bytes"
"errors"
"io"
)
// A Decoder reads and decodes JSON values from an input stream.
type Decoder struct {
r io.Reader
buf []byte
d decodeState
scanp int // start of unread data in buf
scanned int64 // amount of data already scanned
scan scanner
err error
tokenState int
tokenStack []int
}
// NewDecoder returns a new decoder that reads from r.
//
// The decoder introduces its own buffering and may
// read data from r beyond the JSON values requested.
func NewDecoder(r io.Reader) *Decoder {
return &Decoder{r: r}
}
// UseNumber causes the Decoder to unmarshal a number into an
// interface value as a [Number] instead of as a float64.
func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
// DisallowUnknownFields causes the Decoder to return an error when the destination
// is a struct and the input contains object keys which do not match any
// non-ignored, exported fields in the destination.
func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
// Decode reads the next JSON-encoded value from its
// input and stores it in the value pointed to by v.
//
// See the documentation for [Unmarshal] for details about
// the conversion of JSON into a Go value.
func (dec *Decoder) Decode(v any) error {
if dec.err != nil {
return dec.err
}
if err := dec.tokenPrepareForDecode(); err != nil {
return err
}
if !dec.tokenValueAllowed() {
return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()}
}
// Read whole value into buffer.
n, err := dec.readValue()
if err != nil {
return err
}
dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
dec.scanp += n
// Don't save err from unmarshal into dec.err:
// the connection is still usable since we read a complete JSON
// object from it before the error happened.
err = dec.d.unmarshal(v)
// fixup token streaming state
dec.tokenValueEnd()
return err
}
// Buffered returns a reader of the data remaining in the Decoder's
// buffer. The reader is valid until the next call to [Decoder.Decode].
func (dec *Decoder) Buffered() io.Reader {
return bytes.NewReader(dec.buf[dec.scanp:])
}
// readValue reads a JSON value into dec.buf.
// It returns the length of the encoding.
func (dec *Decoder) readValue() (int, error) {
dec.scan.reset()
scanp := dec.scanp
var err error
Input:
// help the compiler see that scanp is never negative, so it can remove
// some bounds checks below.
for scanp >= 0 {
// Look in the buffer for a new value.
for ; scanp < len(dec.buf); scanp++ {
c := dec.buf[scanp]
dec.scan.bytes++
switch dec.scan.step(&dec.scan, c) {
case scanEnd:
// scanEnd is delayed one byte so we decrement
// the scanner bytes count by 1 to ensure that
// this value is correct in the next call of Decode.
dec.scan.bytes--
break Input
case scanEndObject, scanEndArray:
// scanEnd is delayed one byte.
// We might block trying to get that byte from src,
// so instead invent a space byte.
if stateEndValue(&dec.scan, ' ') == scanEnd {
scanp++
break Input
}
case scanError:
dec.err = dec.scan.err
return 0, dec.scan.err
}
}
// Did the last read have an error?
// Delayed until now to allow buffer scan.
if err != nil {
if err == io.EOF {
if dec.scan.step(&dec.scan, ' ') == scanEnd {
break Input
}
if nonSpace(dec.buf) {
err = io.ErrUnexpectedEOF
}
}
dec.err = err
return 0, err
}
n := scanp - dec.scanp
err = dec.refill()
scanp = dec.scanp + n
}
return scanp - dec.scanp, nil
}
func (dec *Decoder) refill() error {
// Make room to read more into the buffer.
// First slide down data already consumed.
if dec.scanp > 0 {
dec.scanned += int64(dec.scanp)
n := copy(dec.buf, dec.buf[dec.scanp:])
dec.buf = dec.buf[:n]
dec.scanp = 0
}
// Grow buffer if not large enough.
const minRead = 512
if cap(dec.buf)-len(dec.buf) < minRead {
newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
copy(newBuf, dec.buf)
dec.buf = newBuf
}
// Read. Delay error for next iteration (after scan).
n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
dec.buf = dec.buf[0 : len(dec.buf)+n]
return err
}
func nonSpace(b []byte) bool {
for _, c := range b {
if !isSpace(c) {
return true
}
}
return false
}
// An Encoder writes JSON values to an output stream.
type Encoder struct {
w io.Writer
err error
escapeHTML bool
indentBuf []byte
indentPrefix string
indentValue string
}
// NewEncoder returns a new encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
return &Encoder{w: w, escapeHTML: true}
}
// Encode writes the JSON encoding of v to the stream,
// with insignificant space characters elided,
// followed by a newline character.
//
// See the documentation for [Marshal] for details about the
// conversion of Go values to JSON.
func (enc *Encoder) Encode(v any) error {
if enc.err != nil {
return enc.err
}
e := newEncodeState()
defer encodeStatePool.Put(e)
err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
if err != nil {
return err
}
// Terminate each value with a newline.
// This makes the output look a little nicer
// when debugging, and some kind of space
// is required if the encoded value was a number,
// so that the reader knows there aren't more
// digits coming.
e.WriteByte('\n')
b := e.Bytes()
if enc.indentPrefix != "" || enc.indentValue != "" {
enc.indentBuf, err = appendIndent(enc.indentBuf[:0], b, enc.indentPrefix, enc.indentValue)
if err != nil {
return err
}
b = enc.indentBuf
}
if _, err = enc.w.Write(b); err != nil {
enc.err = err
}
return err
}
// SetIndent instructs the encoder to format each subsequent encoded
// value as if indented by the package-level function Indent(dst, src, prefix, indent).
// Calling SetIndent("", "") disables indentation.
func (enc *Encoder) SetIndent(prefix, indent string) {
enc.indentPrefix = prefix
enc.indentValue = indent
}
// SetEscapeHTML specifies whether problematic HTML characters
// should be escaped inside JSON quoted strings.
// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
// to avoid certain safety problems that can arise when embedding JSON in HTML.
//
// In non-HTML settings where the escaping interferes with the readability
// of the output, SetEscapeHTML(false) disables this behavior.
func (enc *Encoder) SetEscapeHTML(on bool) {
enc.escapeHTML = on
}
// RawMessage is a raw encoded JSON value.
// It implements [Marshaler] and [Unmarshaler] and can
// be used to delay JSON decoding or precompute a JSON encoding.
type RawMessage []byte
// MarshalJSON returns m as the JSON encoding of m.
func (m RawMessage) MarshalJSON() ([]byte, error) {
if m == nil {
return []byte("null"), nil
}
return m, nil
}
// UnmarshalJSON sets *m to a copy of data.
func (m *RawMessage) UnmarshalJSON(data []byte) error {
if m == nil {
return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
}
*m = append((*m)[0:0], data...)
return nil
}
var _ Marshaler = (*RawMessage)(nil)
var _ Unmarshaler = (*RawMessage)(nil)
// A Token holds a value of one of these types:
//
// - [Delim], for the four JSON delimiters [ ] { }
// - bool, for JSON booleans
// - float64, for JSON numbers
// - [Number], for JSON numbers
// - string, for JSON string literals
// - nil, for JSON null
type Token any
const (
tokenTopValue = iota
tokenArrayStart
tokenArrayValue
tokenArrayComma
tokenObjectStart
tokenObjectKey
tokenObjectColon
tokenObjectValue
tokenObjectComma
)
// advance tokenstate from a separator state to a value state
func (dec *Decoder) tokenPrepareForDecode() error {
// Note: Not calling peek before switch, to avoid
// putting peek into the standard Decode path.
// peek is only called when using the Token API.
switch dec.tokenState {
case tokenArrayComma:
c, err := dec.peek()
if err != nil {
return err
}
if c != ',' {
return &SyntaxError{"expected comma after array element", dec.InputOffset()}
}
dec.scanp++
dec.tokenState = tokenArrayValue
case tokenObjectColon:
c, err := dec.peek()
if err != nil {
return err
}
if c != ':' {
return &SyntaxError{"expected colon after object key", dec.InputOffset()}
}
dec.scanp++
dec.tokenState = tokenObjectValue
}
return nil
}
func (dec *Decoder) tokenValueAllowed() bool {
switch dec.tokenState {
case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
return true
}
return false
}
func (dec *Decoder) tokenValueEnd() {
switch dec.tokenState {
case tokenArrayStart, tokenArrayValue:
dec.tokenState = tokenArrayComma
case tokenObjectValue:
dec.tokenState = tokenObjectComma
}
}
// A Delim is a JSON array or object delimiter, one of [ ] { or }.
type Delim rune
func (d Delim) String() string {
return string(d)
}
// Token returns the next JSON token in the input stream.
// At the end of the input stream, Token returns nil, [io.EOF].
//
// Token guarantees that the delimiters [ ] { } it returns are
// properly nested and matched: if Token encounters an unexpected
// delimiter in the input, it will return an error.
//
// The input stream consists of basic JSON values—bool, string,
// number, and null—along with delimiters [ ] { } of type [Delim]
// to mark the start and end of arrays and objects.
// Commas and colons are elided.
func (dec *Decoder) Token() (Token, error) {
for {
c, err := dec.peek()
if err != nil {
return nil, err
}
switch c {
case '[':
if !dec.tokenValueAllowed() {
return dec.tokenError(c)
}
dec.scanp++
dec.tokenStack = append(dec.tokenStack, dec.tokenState)
dec.tokenState = tokenArrayStart
return Delim('['), nil
case ']':
if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
return dec.tokenError(c)
}
dec.scanp++
dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
dec.tokenValueEnd()
return Delim(']'), nil
case '{':
if !dec.tokenValueAllowed() {
return dec.tokenError(c)
}
dec.scanp++
dec.tokenStack = append(dec.tokenStack, dec.tokenState)
dec.tokenState = tokenObjectStart
return Delim('{'), nil
case '}':
if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
return dec.tokenError(c)
}
dec.scanp++
dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
dec.tokenValueEnd()
return Delim('}'), nil
case ':':
if dec.tokenState != tokenObjectColon {
return dec.tokenError(c)
}
dec.scanp++
dec.tokenState = tokenObjectValue
continue
case ',':
if dec.tokenState == tokenArrayComma {
dec.scanp++
dec.tokenState = tokenArrayValue
continue
}
if dec.tokenState == tokenObjectComma {
dec.scanp++
dec.tokenState = tokenObjectKey
continue
}
return dec.tokenError(c)
case '"':
if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
var x string
old := dec.tokenState
dec.tokenState = tokenTopValue
err := dec.Decode(&x)
dec.tokenState = old
if err != nil {
return nil, err
}
dec.tokenState = tokenObjectColon
return x, nil
}
fallthrough
default:
if !dec.tokenValueAllowed() {
return dec.tokenError(c)
}
var x any
if err := dec.Decode(&x); err != nil {
return nil, err
}
return x, nil
}
}
}
func (dec *Decoder) tokenError(c byte) (Token, error) {
var context string
switch dec.tokenState {
case tokenTopValue:
context = " looking for beginning of value"
case tokenArrayStart, tokenArrayValue, tokenObjectValue:
context = " looking for beginning of value"
case tokenArrayComma:
context = " after array element"
case tokenObjectKey:
context = " looking for beginning of object key string"
case tokenObjectColon:
context = " after object key"
case tokenObjectComma:
context = " after object key:value pair"
}
return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()}
}
// More reports whether there is another element in the
// current array or object being parsed.
func (dec *Decoder) More() bool {
c, err := dec.peek()
return err == nil && c != ']' && c != '}'
}
func (dec *Decoder) peek() (byte, error) {
var err error
for {
for i := dec.scanp; i < len(dec.buf); i++ {
c := dec.buf[i]
if isSpace(c) {
continue
}
dec.scanp = i
return c, nil
}
// buffer has been scanned, now report any error
if err != nil {
return 0, err
}
err = dec.refill()
}
}
// InputOffset returns the input stream byte offset of the current decoder position.
// The offset gives the location of the end of the most recently returned token
// and the beginning of the next token.
func (dec *Decoder) InputOffset() int64 {
return dec.scanned + int64(dec.scanp)
}

View File

@@ -1,220 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import "unicode/utf8"
// safeSet holds the value true if the ASCII character with the given array
// position can be represented inside a JSON string without any further
// escaping.
//
// All values are true except for the ASCII control characters (0-31), the
// double quote ("), and the backslash character ("\").
var safeSet = [utf8.RuneSelf]bool{
' ': true,
'!': true,
'"': false,
'#': true,
'$': true,
'%': true,
'&': true,
'\'': true,
'(': true,
')': true,
'*': true,
'+': true,
',': true,
'-': true,
'.': true,
'/': true,
'0': true,
'1': true,
'2': true,
'3': true,
'4': true,
'5': true,
'6': true,
'7': true,
'8': true,
'9': true,
':': true,
';': true,
'<': true,
'=': true,
'>': true,
'?': true,
'@': true,
'A': true,
'B': true,
'C': true,
'D': true,
'E': true,
'F': true,
'G': true,
'H': true,
'I': true,
'J': true,
'K': true,
'L': true,
'M': true,
'N': true,
'O': true,
'P': true,
'Q': true,
'R': true,
'S': true,
'T': true,
'U': true,
'V': true,
'W': true,
'X': true,
'Y': true,
'Z': true,
'[': true,
'\\': false,
']': true,
'^': true,
'_': true,
'`': true,
'a': true,
'b': true,
'c': true,
'd': true,
'e': true,
'f': true,
'g': true,
'h': true,
'i': true,
'j': true,
'k': true,
'l': true,
'm': true,
'n': true,
'o': true,
'p': true,
'q': true,
'r': true,
's': true,
't': true,
'u': true,
'v': true,
'w': true,
'x': true,
'y': true,
'z': true,
'{': true,
'|': true,
'}': true,
'~': true,
'\u007f': true,
}
// htmlSafeSet holds the value true if the ASCII character with the given
// array position can be safely represented inside a JSON string, embedded
// inside of HTML <script> tags, without any additional escaping.
//
// All values are true except for the ASCII control characters (0-31), the
// double quote ("), the backslash character ("\"), HTML opening and closing
// tags ("<" and ">"), and the ampersand ("&").
var htmlSafeSet = [utf8.RuneSelf]bool{
' ': true,
'!': true,
'"': false,
'#': true,
'$': true,
'%': true,
'&': false,
'\'': true,
'(': true,
')': true,
'*': true,
'+': true,
',': true,
'-': true,
'.': true,
'/': true,
'0': true,
'1': true,
'2': true,
'3': true,
'4': true,
'5': true,
'6': true,
'7': true,
'8': true,
'9': true,
':': true,
';': true,
'<': false,
'=': true,
'>': false,
'?': true,
'@': true,
'A': true,
'B': true,
'C': true,
'D': true,
'E': true,
'F': true,
'G': true,
'H': true,
'I': true,
'J': true,
'K': true,
'L': true,
'M': true,
'N': true,
'O': true,
'P': true,
'Q': true,
'R': true,
'S': true,
'T': true,
'U': true,
'V': true,
'W': true,
'X': true,
'Y': true,
'Z': true,
'[': true,
'\\': false,
']': true,
'^': true,
'_': true,
'`': true,
'a': true,
'b': true,
'c': true,
'd': true,
'e': true,
'f': true,
'g': true,
'h': true,
'i': true,
'j': true,
'k': true,
'l': true,
'm': true,
'n': true,
'o': true,
'p': true,
'q': true,
'r': true,
's': true,
't': true,
'u': true,
'v': true,
'w': true,
'x': true,
'y': true,
'z': true,
'{': true,
'|': true,
'}': true,
'~': true,
'\u007f': true,
}

View File

@@ -1,40 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import (
"strings"
)
// tagOptions is the string following a comma in a struct field's "json"
// tag, or the empty string. It does not include the leading comma.
type tagOptions string
// parseTag splits a struct field's json tag into its name and
// comma-separated options.
func parseTag(tag string) (string, tagOptions) {
tag, opt, _ := strings.Cut(tag, ",")
return tag, tagOptions(opt)
}
// Contains reports whether a comma-separated list of options
// contains a particular substr flag. substr must be surrounded by a
// string boundary or commas.
func (o tagOptions) Contains(optionName string) bool {
if len(o) == 0 {
return false
}
s := string(o)
for s != "" {
var name string
name, s, _ = strings.Cut(s, ",")
if name == optionName {
return true
}
}
return false
}

View File

@@ -1,253 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// Represents JSON data structure using native Go types: booleans, floats,
// strings, arrays, and maps.
package json
import (
"cmp"
"fmt"
"reflect"
"strconv"
"next.orly.dev/pkg/json/internal/jsonwire"
"next.orly.dev/pkg/json/jsontext"
jsonv2 "next.orly.dev/pkg/json/v2"
)
// Unmarshal parses the JSON-encoded data and stores the result
// in the value pointed to by v. If v is nil or not a pointer,
// Unmarshal returns an [InvalidUnmarshalError].
//
// Unmarshal uses the inverse of the encodings that
// [Marshal] uses, allocating maps, slices, and pointers as necessary,
// with the following additional rules:
//
// To unmarshal JSON into a pointer, Unmarshal first handles the case of
// the JSON being the JSON literal null. In that case, Unmarshal sets
// the pointer to nil. Otherwise, Unmarshal unmarshals the JSON into
// the value pointed at by the pointer. If the pointer is nil, Unmarshal
// allocates a new value for it to point to.
//
// To unmarshal JSON into a value implementing [Unmarshaler],
// Unmarshal calls that value's [Unmarshaler.UnmarshalJSON] method, including
// when the input is a JSON null.
// Otherwise, if the value implements [encoding.TextUnmarshaler]
// and the input is a JSON quoted string, Unmarshal calls
// [encoding.TextUnmarshaler.UnmarshalText] with the unquoted form of the string.
//
// To unmarshal JSON into a struct, Unmarshal matches incoming object
// keys to the keys used by [Marshal] (either the struct field name or its tag),
// preferring an exact match but also accepting a case-insensitive match. By
// default, object keys which don't have a corresponding struct field are
// ignored (see [Decoder.DisallowUnknownFields] for an alternative).
//
// To unmarshal JSON into an interface value,
// Unmarshal stores one of these in the interface value:
//
// - bool, for JSON booleans
// - float64, for JSON numbers
// - string, for JSON strings
// - []any, for JSON arrays
// - map[string]any, for JSON objects
// - nil for JSON null
//
// To unmarshal a JSON array into a slice, Unmarshal resets the slice length
// to zero and then appends each element to the slice.
// As a special case, to unmarshal an empty JSON array into a slice,
// Unmarshal replaces the slice with a new empty slice.
//
// To unmarshal a JSON array into a Go array, Unmarshal decodes
// JSON array elements into corresponding Go array elements.
// If the Go array is smaller than the JSON array,
// the additional JSON array elements are discarded.
// If the JSON array is smaller than the Go array,
// the additional Go array elements are set to zero values.
//
// To unmarshal a JSON object into a map, Unmarshal first establishes a map to
// use. If the map is nil, Unmarshal allocates a new map. Otherwise Unmarshal
// reuses the existing map, keeping existing entries. Unmarshal then stores
// key-value pairs from the JSON object into the map. The map's key type must
// either be any string type, an integer, or implement [encoding.TextUnmarshaler].
//
// If the JSON-encoded data contain a syntax error, Unmarshal returns a [SyntaxError].
//
// If a JSON value is not appropriate for a given target type,
// or if a JSON number overflows the target type, Unmarshal
// skips that field and completes the unmarshaling as best it can.
// If no more serious errors are encountered, Unmarshal returns
// an [UnmarshalTypeError] describing the earliest such error. In any
// case, it's not guaranteed that all the remaining fields following
// the problematic one will be unmarshaled into the target object.
//
// The JSON null value unmarshals into an interface, map, pointer, or slice
// by setting that Go value to nil. Because null is often used in JSON to mean
// “not present,” unmarshaling a JSON null into any other Go type has no effect
// on the value and produces no error.
//
// When unmarshaling quoted strings, invalid UTF-8 or
// invalid UTF-16 surrogate pairs are not treated as an error.
// Instead, they are replaced by the Unicode replacement
// character U+FFFD.
func Unmarshal(data []byte, v any) error {
return jsonv2.Unmarshal(data, v, DefaultOptionsV1())
}
// Unmarshaler is the interface implemented by types
// that can unmarshal a JSON description of themselves.
// The input can be assumed to be a valid encoding of
// a JSON value. UnmarshalJSON must copy the JSON data
// if it wishes to retain the data after returning.
type Unmarshaler = jsonv2.Unmarshaler
// An UnmarshalTypeError describes a JSON value that was
// not appropriate for a value of a specific Go type.
type UnmarshalTypeError struct {
Value string // description of JSON value - "bool", "array", "number -5"
Type reflect.Type // type of Go value it could not be assigned to
Offset int64 // error occurred after reading Offset bytes
Struct string // name of the root type containing the field
Field string // the full path from root node to the value
Err error // may be nil
}
func (e *UnmarshalTypeError) Error() string {
s := "json: cannot unmarshal"
if e.Value != "" {
s += " JSON " + e.Value
}
s += " into"
var preposition string
if e.Field != "" {
s += " " + e.Struct + "." + e.Field
preposition = " of"
}
if e.Type != nil {
s += preposition
s += " Go type " + e.Type.String()
}
if e.Err != nil {
s += ": " + e.Err.Error()
}
return s
}
func (e *UnmarshalTypeError) Unwrap() error {
return e.Err
}
// An UnmarshalFieldError describes a JSON object key that
// led to an unexported (and therefore unwritable) struct field.
//
// Deprecated: No longer used; kept for compatibility.
type UnmarshalFieldError struct {
Key string
Type reflect.Type
Field reflect.StructField
}
func (e *UnmarshalFieldError) Error() string {
return "json: cannot unmarshal object key " + strconv.Quote(e.Key) + " into unexported field " + e.Field.Name + " of type " + e.Type.String()
}
// An InvalidUnmarshalError describes an invalid argument passed to [Unmarshal].
// (The argument to [Unmarshal] must be a non-nil pointer.)
type InvalidUnmarshalError struct {
Type reflect.Type
}
func (e *InvalidUnmarshalError) Error() string {
if e.Type == nil {
return "json: Unmarshal(nil)"
}
if e.Type.Kind() != reflect.Pointer {
return "json: Unmarshal(non-pointer " + e.Type.String() + ")"
}
return "json: Unmarshal(nil " + e.Type.String() + ")"
}
// A Number represents a JSON number literal.
type Number string
// String returns the literal text of the number.
func (n Number) String() string { return string(n) }
// Float64 returns the number as a float64.
func (n Number) Float64() (float64, error) {
return strconv.ParseFloat(string(n), 64)
}
// Int64 returns the number as an int64.
func (n Number) Int64() (int64, error) {
return strconv.ParseInt(string(n), 10, 64)
}
var numberType = reflect.TypeFor[Number]()
// MarshalJSONTo implements [jsonv2.MarshalerTo].
func (n Number) MarshalJSONTo(enc *jsontext.Encoder) error {
opts := enc.Options()
stringify, _ := jsonv2.GetOption(opts, jsonv2.StringifyNumbers)
if k, n := enc.StackIndex(enc.StackDepth()); k == '{' && n%2 == 0 {
stringify = true // expecting a JSON object name
}
n = cmp.Or(n, "0")
var num []byte
val := enc.AvailableBuffer()
if stringify {
val = append(val, '"')
val = append(val, n...)
val = append(val, '"')
num = val[len(`"`) : len(val)-len(`"`)]
} else {
val = append(val, n...)
num = val
}
if n, err := jsonwire.ConsumeNumber(num); n != len(num) || err != nil {
return fmt.Errorf("cannot parse %q as JSON number: %w", val, strconv.ErrSyntax)
}
return enc.WriteValue(val)
}
// UnmarshalJSONFrom implements [jsonv2.UnmarshalerFrom].
func (n *Number) UnmarshalJSONFrom(dec *jsontext.Decoder) error {
opts := dec.Options()
stringify, _ := jsonv2.GetOption(opts, jsonv2.StringifyNumbers)
if k, n := dec.StackIndex(dec.StackDepth()); k == '{' && n%2 == 0 {
stringify = true // expecting a JSON object name
}
val, err := dec.ReadValue()
if err != nil {
return err
}
val0 := val
k := val.Kind()
switch k {
case 'n':
if legacy, _ := jsonv2.GetOption(opts, MergeWithLegacySemantics); !legacy {
*n = ""
}
return nil
case '"':
verbatim := jsonwire.ConsumeSimpleString(val) == len(val)
val = jsonwire.UnquoteMayCopy(val, verbatim)
if n, err := jsonwire.ConsumeNumber(val); n != len(val) || err != nil {
return &jsonv2.SemanticError{JSONKind: val0.Kind(), JSONValue: val0.Clone(), GoType: numberType, Err: strconv.ErrSyntax}
}
*n = Number(val)
return nil
case '0':
if stringify {
break
}
*n = Number(val)
return nil
}
return &jsonv2.SemanticError{JSONKind: k, GoType: numberType}
}

View File

@@ -1,251 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// Package json implements encoding and decoding of JSON as defined in
// RFC 7159. The mapping between JSON and Go values is described
// in the documentation for the Marshal and Unmarshal functions.
//
// See "JSON and Go" for an introduction to this package:
// https://golang.org/doc/articles/json_and_go.html
//
// # Security Considerations
//
// See the "Security Considerations" section in [encoding/json/v2].
//
// For historical reasons, the default behavior of v1 [encoding/json]
// unfortunately operates with less secure defaults.
// New usages of JSON in Go are encouraged to use [encoding/json/v2] instead.
package json
import (
"reflect"
"strconv"
jsonv2 "next.orly.dev/pkg/json/v2"
)
// Marshal returns the JSON encoding of v.
//
// Marshal traverses the value v recursively.
// If an encountered value implements [Marshaler]
// and is not a nil pointer, Marshal calls [Marshaler.MarshalJSON]
// to produce JSON. If no [Marshaler.MarshalJSON] method is present but the
// value implements [encoding.TextMarshaler] instead, Marshal calls
// [encoding.TextMarshaler.MarshalText] and encodes the result as a JSON string.
// The nil pointer exception is not strictly necessary
// but mimics a similar, necessary exception in the behavior of
// [Unmarshaler.UnmarshalJSON].
//
// Otherwise, Marshal uses the following type-dependent default encodings:
//
// Boolean values encode as JSON booleans.
//
// Floating point, integer, and [Number] values encode as JSON numbers.
// NaN and +/-Inf values will return an [UnsupportedValueError].
//
// String values encode as JSON strings coerced to valid UTF-8,
// replacing invalid bytes with the Unicode replacement rune.
// So that the JSON will be safe to embed inside HTML <script> tags,
// the string is encoded using [HTMLEscape],
// which replaces "<", ">", "&", U+2028, and U+2029 are escaped
// to "\u003c","\u003e", "\u0026", "\u2028", and "\u2029".
// This replacement can be disabled when using an [Encoder],
// by calling [Encoder.SetEscapeHTML](false).
//
// Array and slice values encode as JSON arrays, except that
// []byte encodes as a base64-encoded string, and a nil slice
// encodes as the null JSON value.
//
// Struct values encode as JSON objects.
// Each exported struct field becomes a member of the object, using the
// field name as the object key, unless the field is omitted for one of the
// reasons given below.
//
// The encoding of each struct field can be customized by the format string
// stored under the "json" key in the struct field's tag.
// The format string gives the name of the field, possibly followed by a
// comma-separated list of options. The name may be empty in order to
// specify options without overriding the default field name.
//
// The "omitempty" option specifies that the field should be omitted
// from the encoding if the field has an empty value, defined as
// false, 0, a nil pointer, a nil interface value, and any array,
// slice, map, or string of length zero.
//
// As a special case, if the field tag is "-", the field is always omitted.
// JSON names containing commas or quotes, or names identical to "" or "-",
// can be specified using a single-quoted string literal, where the syntax
// is identical to the Go grammar for a double-quoted string literal,
// but instead uses single quotes as the delimiters.
//
// Examples of struct field tags and their meanings:
//
// // Field appears in JSON as key "myName".
// Field int `json:"myName"`
//
// // Field appears in JSON as key "myName" and
// // the field is omitted from the object if its value is empty,
// // as defined above.
// Field int `json:"myName,omitempty"`
//
// // Field appears in JSON as key "Field" (the default), but
// // the field is skipped if empty.
// // Note the leading comma.
// Field int `json:",omitempty"`
//
// // Field is ignored by this package.
// Field int `json:"-"`
//
// // Field appears in JSON as key "-".
// Field int `json:"'-'"`
//
// The "omitzero" option specifies that the field should be omitted
// from the encoding if the field has a zero value, according to rules:
//
// 1) If the field type has an "IsZero() bool" method, that will be used to
// determine whether the value is zero.
//
// 2) Otherwise, the value is zero if it is the zero value for its type.
//
// If both "omitempty" and "omitzero" are specified, the field will be omitted
// if the value is either empty or zero (or both).
//
// The "string" option signals that a field is stored as JSON inside a
// JSON-encoded string. It applies only to fields of string, floating point,
// integer, or boolean types. This extra level of encoding is sometimes used
// when communicating with JavaScript programs:
//
// Int64String int64 `json:",string"`
//
// The key name will be used if it's a non-empty string consisting of
// only Unicode letters, digits, and ASCII punctuation except quotation
// marks, backslash, and comma.
//
// Embedded struct fields are usually marshaled as if their inner exported fields
// were fields in the outer struct, subject to the usual Go visibility rules amended
// as described in the next paragraph.
// An anonymous struct field with a name given in its JSON tag is treated as
// having that name, rather than being anonymous.
// An anonymous struct field of interface type is treated the same as having
// that type as its name, rather than being anonymous.
//
// The Go visibility rules for struct fields are amended for JSON when
// deciding which field to marshal or unmarshal. If there are
// multiple fields at the same level, and that level is the least
// nested (and would therefore be the nesting level selected by the
// usual Go rules), the following extra rules apply:
//
// 1) Of those fields, if any are JSON-tagged, only tagged fields are considered,
// even if there are multiple untagged fields that would otherwise conflict.
//
// 2) If there is exactly one field (tagged or not according to the first rule), that is selected.
//
// 3) Otherwise there are multiple fields, and all are ignored; no error occurs.
//
// Handling of anonymous struct fields is new in Go 1.1.
// Prior to Go 1.1, anonymous struct fields were ignored. To force ignoring of
// an anonymous struct field in both current and earlier versions, give the field
// a JSON tag of "-".
//
// Map values encode as JSON objects. The map's key type must either be a
// string, an integer type, or implement [encoding.TextMarshaler]. The map keys
// are sorted and used as JSON object keys by applying the following rules,
// subject to the UTF-8 coercion described for string values above:
// - keys of any string type are used directly
// - keys that implement [encoding.TextMarshaler] are marshaled
// - integer keys are converted to strings
//
// Pointer values encode as the value pointed to.
// A nil pointer encodes as the null JSON value.
//
// Interface values encode as the value contained in the interface.
// A nil interface value encodes as the null JSON value.
//
// Channel, complex, and function values cannot be encoded in JSON.
// Attempting to encode such a value causes Marshal to return
// an [UnsupportedTypeError].
//
// JSON cannot represent cyclic data structures and Marshal does not
// handle them. Passing cyclic structures to Marshal will result in
// an error.
func Marshal(v any) ([]byte, error) {
return jsonv2.Marshal(v, DefaultOptionsV1())
}
// MarshalIndent is like [Marshal] but applies [Indent] to format the output.
// Each JSON element in the output will begin on a new line beginning with prefix
// followed by one or more copies of indent according to the indentation nesting.
func MarshalIndent(v any, prefix, indent string) ([]byte, error) {
b, err := Marshal(v)
if err != nil {
return nil, err
}
b, err = appendIndent(nil, b, prefix, indent)
if err != nil {
return nil, err
}
return b, nil
}
// Marshaler is the interface implemented by types that
// can marshal themselves into valid JSON.
type Marshaler = jsonv2.Marshaler
// An UnsupportedTypeError is returned by [Marshal] when attempting
// to encode an unsupported value type.
type UnsupportedTypeError struct {
Type reflect.Type
}
func (e *UnsupportedTypeError) Error() string {
return "json: unsupported type: " + e.Type.String()
}
// An UnsupportedValueError is returned by [Marshal] when attempting
// to encode an unsupported value.
type UnsupportedValueError struct {
Value reflect.Value
Str string
}
func (e *UnsupportedValueError) Error() string {
return "json: unsupported value: " + e.Str
}
// Before Go 1.2, an InvalidUTF8Error was returned by [Marshal] when
// attempting to encode a string value with invalid UTF-8 sequences.
// As of Go 1.2, [Marshal] instead coerces the string to valid UTF-8 by
// replacing invalid bytes with the Unicode replacement rune U+FFFD.
//
// Deprecated: No longer used; kept for compatibility.
type InvalidUTF8Error struct {
S string // the whole string value that caused the error
}
func (e *InvalidUTF8Error) Error() string {
return "json: invalid UTF-8 in string: " + strconv.Quote(e.S)
}
// A MarshalerError represents an error from calling a
// [Marshaler.MarshalJSON] or [encoding.TextMarshaler.MarshalText] method.
type MarshalerError struct {
Type reflect.Type
Err error
sourceFunc string
}
func (e *MarshalerError) Error() string {
srcFunc := e.sourceFunc
if srcFunc == "" {
srcFunc = "MarshalJSON"
}
return "json: error calling " + srcFunc +
" for type " + e.Type.String() +
": " + e.Err.Error()
}
// Unwrap returns the underlying error.
func (e *MarshalerError) Unwrap() error { return e.Err }

View File

@@ -1,133 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"bytes"
"strings"
"next.orly.dev/pkg/json/jsontext"
)
// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
// characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
// so that the JSON will be safe to embed inside HTML <script> tags.
// For historical reasons, web browsers don't honor standard HTML
// escaping within <script> tags, so an alternative JSON encoding must be used.
func HTMLEscape(dst *bytes.Buffer, src []byte) {
dst.Grow(len(src))
dst.Write(appendHTMLEscape(dst.AvailableBuffer(), src))
}
func appendHTMLEscape(dst, src []byte) []byte {
const hex = "0123456789abcdef"
// The characters can only appear in string literals,
// so just scan the string one byte at a time.
start := 0
for i, c := range src {
if c == '<' || c == '>' || c == '&' {
dst = append(dst, src[start:i]...)
dst = append(dst, '\\', 'u', '0', '0', hex[c>>4], hex[c&0xF])
start = i + 1
}
// Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9).
if c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 {
dst = append(dst, src[start:i]...)
dst = append(dst, '\\', 'u', '2', '0', '2', hex[src[i+2]&0xF])
start = i + len("\u2029")
}
}
return append(dst, src[start:]...)
}
// Compact appends to dst the JSON-encoded src with
// insignificant space characters elided.
func Compact(dst *bytes.Buffer, src []byte) error {
dst.Grow(len(src))
b := dst.AvailableBuffer()
b, err := jsontext.AppendFormat(b, src,
jsontext.AllowDuplicateNames(true),
jsontext.AllowInvalidUTF8(true),
jsontext.PreserveRawStrings(true))
if err != nil {
return transformSyntacticError(err)
}
dst.Write(b)
return nil
}
// indentGrowthFactor specifies the growth factor of indenting JSON input.
// Empirically, the growth factor was measured to be between 1.4x to 1.8x
// for some set of compacted JSON with the indent being a single tab.
// Specify a growth factor slightly larger than what is observed
// to reduce probability of allocation in appendIndent.
// A factor no higher than 2 ensures that wasted space never exceeds 50%.
const indentGrowthFactor = 2
// Indent appends to dst an indented form of the JSON-encoded src.
// Each element in a JSON object or array begins on a new,
// indented line beginning with prefix followed by one or more
// copies of indent according to the indentation nesting.
// The data appended to dst does not begin with the prefix nor
// any indentation, to make it easier to embed inside other formatted JSON data.
// Although leading space characters (space, tab, carriage return, newline)
// at the beginning of src are dropped, trailing space characters
// at the end of src are preserved and copied to dst.
// For example, if src has no trailing spaces, neither will dst;
// if src ends in a trailing newline, so will dst.
func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error {
dst.Grow(indentGrowthFactor * len(src))
b := dst.AvailableBuffer()
b, err := appendIndent(b, src, prefix, indent)
dst.Write(b)
return err
}
func appendIndent(dst, src []byte, prefix, indent string) ([]byte, error) {
// In v2, trailing whitespace is discarded, while v1 preserved it.
dstLen := len(dst)
if n := len(src) - len(bytes.TrimRight(src, " \n\r\t")); n > 0 {
// Append the trailing whitespace afterwards.
defer func() {
if len(dst) > dstLen {
dst = append(dst, src[len(src)-n:]...)
}
}()
}
// In v2, only spaces and tabs are allowed, while v1 allowed any character.
if len(strings.Trim(prefix, " \t"))+len(strings.Trim(indent, " \t")) > 0 {
// Use placeholder spaces of correct length, and replace afterwards.
invalidPrefix, invalidIndent := prefix, indent
prefix = strings.Repeat(" ", len(prefix))
indent = strings.Repeat(" ", len(indent))
defer func() {
b := dst[dstLen:]
for i := bytes.IndexByte(b, '\n'); i >= 0; i = bytes.IndexByte(b, '\n') {
b = b[i+len("\n"):]
n := len(b) - len(bytes.TrimLeft(b, " ")) // len(prefix)+n*len(indent)
spaces := b[:n]
spaces = spaces[copy(spaces, invalidPrefix):]
for len(spaces) > 0 {
spaces = spaces[copy(spaces, invalidIndent):]
}
b = b[n:]
}
}()
}
dst, err := jsontext.AppendFormat(dst, src,
jsontext.AllowDuplicateNames(true),
jsontext.AllowInvalidUTF8(true),
jsontext.PreserveRawStrings(true),
jsontext.Multiline(true),
jsontext.WithIndentPrefix(prefix),
jsontext.WithIndent(indent))
if err != nil {
return dst[:dstLen], transformSyntacticError(err)
}
return dst, nil
}

View File

@@ -1,153 +0,0 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"fmt"
"reflect"
"strconv"
"strings"
"next.orly.dev/pkg/json/internal"
"next.orly.dev/pkg/json/jsontext"
jsonv2 "next.orly.dev/pkg/json/v2"
)
// Inject functionality into v2 to properly handle v1 types.
func init() {
internal.TransformMarshalError = transformMarshalError
internal.TransformUnmarshalError = transformUnmarshalError
internal.NewMarshalerError = func(val any, err error, funcName string) error {
return &MarshalerError{reflect.TypeOf(val), err, funcName}
}
internal.NewRawNumber = func() any { return new(Number) }
internal.RawNumberOf = func(b []byte) any { return Number(b) }
}
func transformMarshalError(root any, err error) error {
// Historically, errors returned from Marshal methods were wrapped
// in a [MarshalerError]. This is directly performed by the v2 package
// via the injected [internal.NewMarshalerError] constructor
// while operating under [ReportErrorsWithLegacySemantics].
// Note that errors from a Marshal method were always wrapped,
// even if wrapped for multiple layers.
if err, ok := err.(*jsonv2.SemanticError); err != nil {
if err.Err == nil {
// Historically, this was only reported for unserializable types
// like complex numbers, channels, functions, and unsafe.Pointers.
return &UnsupportedTypeError{Type: err.GoType}
} else {
// Historically, this was only reported for NaN or ±Inf values
// and cycles detected in the value.
// The Val used to be populated with the reflect.Value,
// but this is no longer supported.
errStr := err.Err.Error()
if err.Err == internal.ErrCycle && err.GoType != nil {
errStr += " via " + err.GoType.String()
}
errStr = strings.TrimPrefix(errStr, "unsupported value: ")
return &UnsupportedValueError{Str: errStr}
}
} else if ok {
return (*UnsupportedValueError)(nil)
}
if err, _ := err.(*MarshalerError); err != nil {
err.Err = transformSyntacticError(err.Err)
return err
}
return transformSyntacticError(err)
}
func transformUnmarshalError(root any, err error) error {
// Historically, errors from Unmarshal methods were never wrapped and
// returned verbatim while operating under [ReportErrorsWithLegacySemantics].
if err, ok := err.(*jsonv2.SemanticError); err != nil {
if err.Err == internal.ErrNonNilReference {
return &InvalidUnmarshalError{err.GoType}
}
if err.Err == jsonv2.ErrUnknownName {
return fmt.Errorf("json: unknown field %q", err.JSONPointer.LastToken())
}
// Historically, UnmarshalTypeError has always been inconsistent
// about how it reported position information.
//
// The Struct field now points to the root type,
// rather than some intermediate struct in the path.
// This better matches the original intent of the field based
// on how the Error message was formatted.
//
// For a representation closer to the historical representation,
// we switch the '/'-delimited representation of a JSON pointer
// to use a '.'-delimited representation. This may be ambiguous,
// but the prior representation was always ambiguous as well.
// Users that care about precise positions should use v2 errors
// by disabling [ReportErrorsWithLegacySemantics].
//
// The introduction of a Err field is new to the v1-to-v2 migration
// and allows us to preserve stronger error information
// that may be surfaced by the v2 package.
//
// See https://go.dev/issue/43126
var value string
switch err.JSONKind {
case 'n', '"', '0':
value = err.JSONKind.String()
case 'f', 't':
value = "bool"
case '[', ']':
value = "array"
case '{', '}':
value = "object"
}
if len(err.JSONValue) > 0 {
isStrconvError := err.Err == strconv.ErrRange || err.Err == strconv.ErrSyntax
isNumericKind := func(t reflect.Type) bool {
if t == nil {
return false
}
switch t.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
reflect.Float32, reflect.Float64:
return true
}
return false
}
if isStrconvError && isNumericKind(err.GoType) {
value = "number"
if err.JSONKind == '"' {
err.JSONValue, _ = jsontext.AppendUnquote(nil, err.JSONValue)
}
err.Err = nil
}
value += " " + string(err.JSONValue)
}
var rootName string
if t := reflect.TypeOf(root); t != nil && err.JSONPointer != "" {
if t.Kind() == reflect.Pointer {
t = t.Elem()
}
rootName = t.Name()
}
fieldPath := string(err.JSONPointer)
fieldPath = strings.TrimPrefix(fieldPath, "/")
fieldPath = strings.ReplaceAll(fieldPath, "/", ".")
return &UnmarshalTypeError{
Value: value,
Type: err.GoType,
Offset: err.ByteOffset,
Struct: rootName,
Field: fieldPath,
Err: transformSyntacticError(err.Err),
}
} else if ok {
return (*UnmarshalTypeError)(nil)
}
return transformSyntacticError(err)
}

View File

@@ -1,546 +0,0 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// Migrating to v2
//
// This package (i.e., [encoding/json]) is now formally known as the v1 package
// since a v2 package now exists at [encoding/json/v2].
// All the behavior of the v1 package is implemented in terms of
// the v2 package with the appropriate set of options specified that
// preserve the historical behavior of v1.
//
// The [jsonv2.Marshal] function is the newer equivalent of v1 [Marshal].
// The [jsonv2.Unmarshal] function is the newer equivalent of v1 [Unmarshal].
// The v2 functions have the same calling signature as the v1 equivalent
// except that they take in variadic [Options] arguments that can be specified
// to alter the behavior of marshal or unmarshal. Both v1 and v2 generally
// behave in similar ways, but there are some notable differences.
//
// The following is a list of differences between v1 and v2:
//
// - In v1, JSON object members are unmarshaled into a Go struct using a
// case-insensitive name match with the JSON name of the fields.
// In contrast, v2 matches fields using an exact, case-sensitive match.
// The [jsonv2.MatchCaseInsensitiveNames] and [MatchCaseSensitiveDelimiter]
// options control this behavior difference. To explicitly specify a Go struct
// field to use a particular name matching scheme, either the `case:ignore`
// or the `case:strict` field option can be specified.
// Field-specified options take precedence over caller-specified options.
//
// - In v1, when marshaling a Go struct, a field marked as `omitempty`
// is omitted if the field value is an "empty" Go value, which is defined as
// false, 0, a nil pointer, a nil interface value, and
// any empty array, slice, map, or string. In contrast, v2 redefines
// `omitempty` to omit a field if it encodes as an "empty" JSON value,
// which is defined as a JSON null, or an empty JSON string, object, or array.
// The [OmitEmptyWithLegacySemantics] option controls this behavior difference.
// Note that `omitempty` behaves identically in both v1 and v2 for a
// Go array, slice, map, or string (assuming no user-defined MarshalJSON method
// overrides the default representation). Existing usages of `omitempty` on a
// Go bool, number, pointer, or interface value should migrate to specifying
// `omitzero` instead (which is identically supported in both v1 and v2).
//
// - In v1, a Go struct field marked as `string` can be used to quote a
// Go string, bool, or number as a JSON string. It does not recursively
// take effect on composite Go types. In contrast, v2 restricts
// the `string` option to only quote a Go number as a JSON string.
// It does recursively take effect on Go numbers within a composite Go type.
// The [StringifyWithLegacySemantics] option controls this behavior difference.
//
// - In v1, a nil Go slice or Go map is marshaled as a JSON null.
// In contrast, v2 marshals a nil Go slice or Go map as
// an empty JSON array or JSON object, respectively.
// The [jsonv2.FormatNilSliceAsNull] and [jsonv2.FormatNilMapAsNull] options
// control this behavior difference. To explicitly specify a Go struct field
// to use a particular representation for nil, either the `format:emitempty`
// or `format:emitnull` field option can be specified.
// Field-specified options take precedence over caller-specified options.
//
// - In v1, a Go array may be unmarshaled from a JSON array of any length.
// In contrast, in v2 a Go array must be unmarshaled from a JSON array
// of the same length, otherwise it results in an error.
// The [UnmarshalArrayFromAnyLength] option controls this behavior difference.
//
// - In v1, a Go byte array is represented as a JSON array of JSON numbers.
// In contrast, in v2 a Go byte array is represented as a Base64-encoded JSON string.
// The [FormatByteArrayAsArray] option controls this behavior difference.
// To explicitly specify a Go struct field to use a particular representation,
// either the `format:array` or `format:base64` field option can be specified.
// Field-specified options take precedence over caller-specified options.
//
// - In v1, MarshalJSON methods declared on a pointer receiver are only called
// if the Go value is addressable. In contrast, in v2 a MarshalJSON method
// is always callable regardless of addressability.
// The [CallMethodsWithLegacySemantics] option controls this behavior difference.
//
// - In v1, MarshalJSON and UnmarshalJSON methods are never called for Go map keys.
// In contrast, in v2 a MarshalJSON or UnmarshalJSON method is eligible for
// being called for Go map keys.
// The [CallMethodsWithLegacySemantics] option controls this behavior difference.
//
// - In v1, a Go map is marshaled in a deterministic order.
// In contrast, in v2 a Go map is marshaled in a non-deterministic order.
// The [jsonv2.Deterministic] option controls this behavior difference.
//
// - In v1, JSON strings are encoded with HTML-specific or JavaScript-specific
// characters being escaped. In contrast, in v2 JSON strings use the minimal
// encoding and only escape if required by the JSON grammar.
// The [jsontext.EscapeForHTML] and [jsontext.EscapeForJS] options
// control this behavior difference.
//
// - In v1, bytes of invalid UTF-8 within a string are silently replaced with
// the Unicode replacement character. In contrast, in v2 the presence of
// invalid UTF-8 results in an error. The [jsontext.AllowInvalidUTF8] option
// controls this behavior difference.
//
// - In v1, a JSON object with duplicate names is permitted.
// In contrast, in v2 a JSON object with duplicate names results in an error.
// The [jsontext.AllowDuplicateNames] option controls this behavior difference.
//
// - In v1, when unmarshaling a JSON null into a non-empty Go value it will
// inconsistently either zero out the value or do nothing.
// In contrast, in v2 unmarshaling a JSON null will consistently and always
// zero out the underlying Go value. The [MergeWithLegacySemantics] option
// controls this behavior difference.
//
// - In v1, when unmarshaling a JSON value into a non-zero Go value,
// it merges into the original Go value for array elements, slice elements,
// struct fields (but not map values),
// pointer values, and interface values (only if a non-nil pointer).
// In contrast, in v2 unmarshal merges into the Go value
// for struct fields, map values, pointer values, and interface values.
// In general, the v2 semantic merges when unmarshaling a JSON object,
// otherwise it replaces the value. The [MergeWithLegacySemantics] option
// controls this behavior difference.
//
// - In v1, a [time.Duration] is represented as a JSON number containing
// the decimal number of nanoseconds. In contrast, in v2 a [time.Duration]
// has no default representation and results in a runtime error.
// The [FormatDurationAsNano] option controls this behavior difference.
// To explicitly specify a Go struct field to use a particular representation,
// either the `format:nano` or `format:units` field option can be specified.
// Field-specified options take precedence over caller-specified options.
//
// - In v1, errors are never reported at runtime for Go struct types
// that have some form of structural error (e.g., a malformed tag option).
// In contrast, v2 reports a runtime error for Go types that are invalid
// as they relate to JSON serialization. For example, a Go struct
// with only unexported fields cannot be serialized.
// The [ReportErrorsWithLegacySemantics] option controls this behavior difference.
//
// As mentioned, the entirety of v1 is implemented in terms of v2,
// where options are implicitly specified to opt into legacy behavior.
// For example, [Marshal] directly calls [jsonv2.Marshal] with [DefaultOptionsV1].
// Similarly, [Unmarshal] directly calls [jsonv2.Unmarshal] with [DefaultOptionsV1].
// The [DefaultOptionsV1] option represents the set of all options that specify
// default v1 behavior.
//
// For many of the behavior differences, there are Go struct field options
// that the author of a Go type can specify to control the behavior such that
// the type is represented identically in JSON under either v1 or v2 semantics.
//
// The availability of [DefaultOptionsV1] and [jsonv2.DefaultOptionsV2],
// where later options take precedence over former options allows for
// a gradual migration from v1 to v2. For example:
//
// - jsonv1.Marshal(v)
// uses default v1 semantics.
//
// - jsonv2.Marshal(v, jsonv1.DefaultOptionsV1())
// is semantically equivalent to jsonv1.Marshal
// and thus uses default v1 semantics.
//
// - jsonv2.Marshal(v, jsonv1.DefaultOptionsV1(), jsontext.AllowDuplicateNames(false))
// uses mostly v1 semantics, but opts into one particular v2-specific behavior.
//
// - jsonv2.Marshal(v, jsonv1.CallMethodsWithLegacySemantics(true))
// uses mostly v2 semantics, but opts into one particular v1-specific behavior.
//
// - jsonv2.Marshal(v, ..., jsonv2.DefaultOptionsV2())
// is semantically equivalent to jsonv2.Marshal since
// jsonv2.DefaultOptionsV2 overrides any options specified earlier
// and thus uses default v2 semantics.
//
// - jsonv2.Marshal(v)
// uses default v2 semantics.
//
// All new usages of "json" in Go should use the v2 package,
// but the v1 package will forever remain supported.
package json
// TODO(https://go.dev/issue/71631): Update the "Migrating to v2" documentation
// with default v2 behavior for [time.Duration].
import (
"encoding"
"next.orly.dev/pkg/json/internal/jsonflags"
"next.orly.dev/pkg/json/internal/jsonopts"
"next.orly.dev/pkg/json/jsontext"
jsonv2 "next.orly.dev/pkg/json/v2"
)
// Reference encoding, jsonv2, and jsontext packages to assist pkgsite
// in being able to hotlink references to those packages.
var (
_ encoding.TextMarshaler
_ encoding.TextUnmarshaler
_ jsonv2.Options
_ jsontext.Options
)
// Options are a set of options to configure the v2 "json" package
// to operate with v1 semantics for particular features.
// Values of this type can be passed to v2 functions like
// [jsonv2.Marshal] or [jsonv2.Unmarshal].
// Instead of referencing this type, use [jsonv2.Options].
//
// See the "Migrating to v2" section for guidance on how to migrate usage
// of "json" from using v1 to using v2 instead.
type Options = jsonopts.Options
// DefaultOptionsV1 is the full set of all options that define v1 semantics.
// It is equivalent to the following boolean options being set to true:
//
// - [CallMethodsWithLegacySemantics]
// - [FormatByteArrayAsArray]
// - [FormatBytesWithLegacySemantics]
// - [FormatDurationAsNano]
// - [MatchCaseSensitiveDelimiter]
// - [MergeWithLegacySemantics]
// - [OmitEmptyWithLegacySemantics]
// - [ParseBytesWithLooseRFC4648]
// - [ParseTimeWithLooseRFC3339]
// - [ReportErrorsWithLegacySemantics]
// - [StringifyWithLegacySemantics]
// - [UnmarshalArrayFromAnyLength]
// - [jsonv2.Deterministic]
// - [jsonv2.FormatNilMapAsNull]
// - [jsonv2.FormatNilSliceAsNull]
// - [jsonv2.MatchCaseInsensitiveNames]
// - [jsontext.AllowDuplicateNames]
// - [jsontext.AllowInvalidUTF8]
// - [jsontext.EscapeForHTML]
// - [jsontext.EscapeForJS]
// - [jsontext.PreserveRawStrings]
//
// All other boolean options are set to false.
// All non-boolean options are set to the zero value,
// except for [jsontext.WithIndent], which defaults to "\t".
//
// The [Marshal] and [Unmarshal] functions in this package are
// semantically identical to calling the v2 equivalents with this option:
//
// jsonv2.Marshal(v, jsonv1.DefaultOptionsV1())
// jsonv2.Unmarshal(b, v, jsonv1.DefaultOptionsV1())
func DefaultOptionsV1() Options {
return &jsonopts.DefaultOptionsV1
}
// CallMethodsWithLegacySemantics specifies that calling of type-provided
// marshal and unmarshal methods follow legacy semantics:
//
// - When marshaling, a marshal method declared on a pointer receiver
// is only called if the Go value is addressable.
// Values obtained from an interface or map element are not addressable.
// Values obtained from a pointer or slice element are addressable.
// Values obtained from an array element or struct field inherit
// the addressability of the parent. In contrast, the v2 semantic
// is to always call marshal methods regardless of addressability.
//
// - When marshaling or unmarshaling, the [Marshaler] or [Unmarshaler]
// methods are ignored for map keys. However, [encoding.TextMarshaler]
// or [encoding.TextUnmarshaler] are still callable.
// In contrast, the v2 semantic is to serialize map keys
// like any other value (with regard to calling methods),
// which may include calling [Marshaler] or [Unmarshaler] methods,
// where it is the implementation's responsibility to represent the
// Go value as a JSON string (as required for JSON object names).
//
// - When marshaling, if a map key value implements a marshal method
// and is a nil pointer, then it is serialized as an empty JSON string.
// In contrast, the v2 semantic is to report an error.
//
// - When marshaling, if an interface type implements a marshal method
// and the interface value is a nil pointer to a concrete type,
// then the marshal method is always called.
// In contrast, the v2 semantic is to never directly call methods
// on interface values and to instead defer evaluation based upon
// the underlying concrete value. Similar to non-interface values,
// marshal methods are not called on nil pointers and
// are instead serialized as a JSON null.
//
// This affects either marshaling or unmarshaling.
// The v1 default is true.
func CallMethodsWithLegacySemantics(v bool) Options {
if v {
return jsonflags.CallMethodsWithLegacySemantics | 1
} else {
return jsonflags.CallMethodsWithLegacySemantics | 0
}
}
// FormatByteArrayAsArray specifies that a Go [N]byte is
// formatted as as a normal Go array in contrast to the v2 default of
// formatting [N]byte as using binary data encoding (RFC 4648).
// If a struct field has a `format` tag option,
// then the specified formatting takes precedence.
//
// This affects either marshaling or unmarshaling.
// The v1 default is true.
func FormatByteArrayAsArray(v bool) Options {
if v {
return jsonflags.FormatByteArrayAsArray | 1
} else {
return jsonflags.FormatByteArrayAsArray | 0
}
}
// FormatBytesWithLegacySemantics specifies that handling of
// []~byte and [N]~byte types follow legacy semantics:
//
// - A Go []~byte is to be treated as using some form of
// binary data encoding (RFC 4648) in contrast to the v2 default
// of only treating []byte as such. In particular, v2 does not
// treat slices of named byte types as representing binary data.
//
// - When marshaling, if a named byte implements a marshal method,
// then the slice is serialized as a JSON array of elements,
// each of which call the marshal method.
//
// - When unmarshaling, if the input is a JSON array,
// then unmarshal into the []~byte as if it were a normal Go slice.
// In contrast, the v2 default is to report an error unmarshaling
// a JSON array when expecting some form of binary data encoding.
//
// This affects either marshaling or unmarshaling.
// The v1 default is true.
func FormatBytesWithLegacySemantics(v bool) Options {
if v {
return jsonflags.FormatBytesWithLegacySemantics | 1
} else {
return jsonflags.FormatBytesWithLegacySemantics | 0
}
}
// FormatDurationAsNano specifies that a [time.Duration] is
// formatted as a JSON number representing the number of nanoseconds
// in contrast to the v2 default of reporting an error.
// If a duration field has a `format` tag option,
// then the specified formatting takes precedence.
//
// This affects either marshaling or unmarshaling.
// The v1 default is true.
func FormatDurationAsNano(v bool) Options {
// TODO(https://go.dev/issue/71631): Update documentation with v2 behavior.
if v {
return jsonflags.FormatDurationAsNano | 1
} else {
return jsonflags.FormatDurationAsNano | 0
}
}
// MatchCaseSensitiveDelimiter specifies that underscores and dashes are
// not to be ignored when performing case-insensitive name matching which
// occurs under [jsonv2.MatchCaseInsensitiveNames] or the `case:ignore` tag option.
// Thus, case-insensitive name matching is identical to [strings.EqualFold].
// Use of this option diminishes the ability of case-insensitive matching
// to be able to match common case variants (e.g, "foo_bar" with "fooBar").
//
// This affects either marshaling or unmarshaling.
// The v1 default is true.
func MatchCaseSensitiveDelimiter(v bool) Options {
if v {
return jsonflags.MatchCaseSensitiveDelimiter | 1
} else {
return jsonflags.MatchCaseSensitiveDelimiter | 0
}
}
// MergeWithLegacySemantics specifies that unmarshaling into a non-zero
// Go value follows legacy semantics:
//
// - When unmarshaling a JSON null, this preserves the original Go value
// if the kind is a bool, int, uint, float, string, array, or struct.
// Otherwise, it zeros the Go value.
// In contrast, the default v2 behavior is to consistently and always
// zero the Go value when unmarshaling a JSON null into it.
//
// - When unmarshaling a JSON value other than null, this merges into
// the original Go value for array elements, slice elements,
// struct fields (but not map values),
// pointer values, and interface values (only if a non-nil pointer).
// In contrast, the default v2 behavior is to merge into the Go value
// for struct fields, map values, pointer values, and interface values.
// In general, the v2 semantic merges when unmarshaling a JSON object,
// otherwise it replaces the original value.
//
// This only affects unmarshaling and is ignored when marshaling.
// The v1 default is true.
func MergeWithLegacySemantics(v bool) Options {
if v {
return jsonflags.MergeWithLegacySemantics | 1
} else {
return jsonflags.MergeWithLegacySemantics | 0
}
}
// OmitEmptyWithLegacySemantics specifies that the `omitempty` tag option
// follows a definition of empty where a field is omitted if the Go value is
// false, 0, a nil pointer, a nil interface value,
// or any empty array, slice, map, or string.
// This overrides the v2 semantic where a field is empty if the value
// marshals as a JSON null or an empty JSON string, object, or array.
//
// The v1 and v2 definitions of `omitempty` are practically the same for
// Go strings, slices, arrays, and maps. Usages of `omitempty` on
// Go bools, ints, uints floats, pointers, and interfaces should migrate to use
// the `omitzero` tag option, which omits a field if it is the zero Go value.
//
// This only affects marshaling and is ignored when unmarshaling.
// The v1 default is true.
func OmitEmptyWithLegacySemantics(v bool) Options {
if v {
return jsonflags.OmitEmptyWithLegacySemantics | 1
} else {
return jsonflags.OmitEmptyWithLegacySemantics | 0
}
}
// ParseBytesWithLooseRFC4648 specifies that when parsing
// binary data encoded as "base32" or "base64",
// to ignore the presence of '\r' and '\n' characters.
// In contrast, the v2 default is to report an error in order to be
// strictly compliant with RFC 4648, section 3.3,
// which specifies that non-alphabet characters must be rejected.
//
// This only affects unmarshaling and is ignored when marshaling.
// The v1 default is true.
func ParseBytesWithLooseRFC4648(v bool) Options {
if v {
return jsonflags.ParseBytesWithLooseRFC4648 | 1
} else {
return jsonflags.ParseBytesWithLooseRFC4648 | 0
}
}
// ParseTimeWithLooseRFC3339 specifies that a [time.Time]
// parses according to loose adherence to RFC 3339.
// In particular, it permits historically incorrect representations,
// allowing for deviations in hour format, sub-second separator,
// and timezone representation. In contrast, the default v2 behavior
// is to strictly comply with the grammar specified in RFC 3339.
//
// This only affects unmarshaling and is ignored when marshaling.
// The v1 default is true.
func ParseTimeWithLooseRFC3339(v bool) Options {
if v {
return jsonflags.ParseTimeWithLooseRFC3339 | 1
} else {
return jsonflags.ParseTimeWithLooseRFC3339 | 0
}
}
// ReportErrorsWithLegacySemantics specifies that Marshal and Unmarshal
// should report errors with legacy semantics:
//
// - When marshaling or unmarshaling, the returned error values are
// usually of types such as [SyntaxError], [MarshalerError],
// [UnsupportedTypeError], [UnsupportedValueError],
// [InvalidUnmarshalError], or [UnmarshalTypeError].
// In contrast, the v2 semantic is to always return errors as either
// [jsonv2.SemanticError] or [jsontext.SyntacticError].
//
// - When marshaling, if a user-defined marshal method reports an error,
// it is always wrapped in a [MarshalerError], even if the error itself
// is already a [MarshalerError], which may lead to multiple redundant
// layers of wrapping. In contrast, the v2 semantic is to
// always wrap an error within [jsonv2.SemanticError]
// unless it is already a semantic error.
//
// - When unmarshaling, if a user-defined unmarshal method reports an error,
// it is never wrapped and reported verbatim. In contrast, the v2 semantic
// is to always wrap an error within [jsonv2.SemanticError]
// unless it is already a semantic error.
//
// - When marshaling or unmarshaling, if a Go struct contains type errors
// (e.g., conflicting names or malformed field tags), then such errors
// are ignored and the Go struct uses a best-effort representation.
// In contrast, the v2 semantic is to report a runtime error.
//
// - When unmarshaling, the syntactic structure of the JSON input
// is fully validated before performing the semantic unmarshaling
// of the JSON data into the Go value. Practically speaking,
// this means that JSON input with syntactic errors do not result
// in any mutations of the target Go value. In contrast, the v2 semantic
// is to perform a streaming decode and gradually unmarshal the JSON input
// into the target Go value, which means that the Go value may be
// partially mutated when a syntactic error is encountered.
//
// - When unmarshaling, a semantic error does not immediately terminate the
// unmarshal procedure, but rather evaluation continues.
// When unmarshal returns, only the first semantic error is reported.
// In contrast, the v2 semantic is to terminate unmarshal the moment
// an error is encountered.
//
// This affects either marshaling or unmarshaling.
// The v1 default is true.
func ReportErrorsWithLegacySemantics(v bool) Options {
if v {
return jsonflags.ReportErrorsWithLegacySemantics | 1
} else {
return jsonflags.ReportErrorsWithLegacySemantics | 0
}
}
// StringifyWithLegacySemantics specifies that the `string` tag option
// may stringify bools and string values. It only takes effect on fields
// where the top-level type is a bool, string, numeric kind, or a pointer to
// such a kind. Specifically, `string` will not stringify bool, string,
// or numeric kinds within a composite data type
// (e.g., array, slice, struct, map, or interface).
//
// When marshaling, such Go values are serialized as their usual
// JSON representation, but quoted within a JSON string.
// When unmarshaling, such Go values must be deserialized from
// a JSON string containing their usual JSON representation.
// A JSON null quoted in a JSON string is a valid substitute for JSON null
// while unmarshaling into a Go value that `string` takes effect on.
//
// This affects either marshaling or unmarshaling.
// The v1 default is true.
func StringifyWithLegacySemantics(v bool) Options {
if v {
return jsonflags.StringifyWithLegacySemantics | 1
} else {
return jsonflags.StringifyWithLegacySemantics | 0
}
}
// UnmarshalArrayFromAnyLength specifies that Go arrays can be unmarshaled
// from input JSON arrays of any length. If the JSON array is too short,
// then the remaining Go array elements are zeroed. If the JSON array
// is too long, then the excess JSON array elements are skipped over.
//
// This only affects unmarshaling and is ignored when marshaling.
// The v1 default is true.
func UnmarshalArrayFromAnyLength(v bool) Options {
if v {
return jsonflags.UnmarshalArrayFromAnyLength | 1
} else {
return jsonflags.UnmarshalArrayFromAnyLength | 0
}
}
// unmarshalAnyWithRawNumber specifies that unmarshaling a JSON number into
// an empty Go interface should use the Number type instead of a float64.
func unmarshalAnyWithRawNumber(v bool) Options {
if v {
return jsonflags.UnmarshalAnyWithRawNumber | 1
} else {
return jsonflags.UnmarshalAnyWithRawNumber | 0
}
}

View File

@@ -1,86 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"errors"
"io"
"strings"
"next.orly.dev/pkg/json/internal"
"next.orly.dev/pkg/json/internal/jsonflags"
"next.orly.dev/pkg/json/jsontext"
)
// export exposes internal functionality of the "jsontext" package.
var export = jsontext.Internal.Export(&internal.AllowInternalUse)
// Valid reports whether data is a valid JSON encoding.
func Valid(data []byte) bool {
return checkValid(data) == nil
}
func checkValid(data []byte) error {
d := export.GetBufferedDecoder(data)
defer export.PutBufferedDecoder(d)
xd := export.Decoder(d)
xd.Struct.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1)
if _, err := d.ReadValue(); err != nil {
if err == io.EOF {
offset := d.InputOffset() + int64(len(d.UnreadBuffer()))
err = &jsontext.SyntacticError{ByteOffset: offset, Err: io.ErrUnexpectedEOF}
}
return transformSyntacticError(err)
}
if err := xd.CheckEOF(); err != nil {
return transformSyntacticError(err)
}
return nil
}
// A SyntaxError is a description of a JSON syntax error.
// [Unmarshal] will return a SyntaxError if the JSON can't be parsed.
type SyntaxError struct {
msg string // description of error
Offset int64 // error occurred after reading Offset bytes
}
func (e *SyntaxError) Error() string { return e.msg }
var errUnexpectedEnd = errors.New("unexpected end of JSON input")
func transformSyntacticError(err error) error {
switch serr, ok := err.(*jsontext.SyntacticError); {
case serr != nil:
if serr.Err == io.ErrUnexpectedEOF {
serr.Err = errUnexpectedEnd
}
msg := serr.Err.Error()
if i := strings.Index(msg, " (expecting"); i >= 0 && !strings.Contains(msg, " in literal") {
msg = msg[:i]
}
return &SyntaxError{Offset: serr.ByteOffset, msg: syntaxErrorReplacer.Replace(msg)}
case ok:
return (*SyntaxError)(nil)
case export.IsIOError(err):
return errors.Unwrap(err) // v1 historically did not wrap IO errors
default:
return err
}
}
// syntaxErrorReplacer replaces certain string literals in the v2 error
// to better match the historical string rendering of syntax errors.
// In particular, v2 uses the terminology "object name" to match RFC 8259,
// while v1 uses "object key", which is not a term found in JSON literature.
var syntaxErrorReplacer = strings.NewReplacer(
"object name", "object key",
"at start of value", "looking for beginning of value",
"at start of string", "looking for beginning of object key string",
"after object value", "after object key:value pair",
"in number", "in numeric literal",
)

View File

@@ -1,231 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"bytes"
"io"
"next.orly.dev/pkg/json/jsontext"
jsonv2 "next.orly.dev/pkg/json/v2"
)
// A Decoder reads and decodes JSON values from an input stream.
type Decoder struct {
dec *jsontext.Decoder
opts jsonv2.Options
err error
}
// NewDecoder returns a new decoder that reads from r.
//
// The decoder introduces its own buffering and may
// read data from r beyond the JSON values requested.
func NewDecoder(r io.Reader) *Decoder {
// Hide bytes.Buffer from jsontext since it implements optimizations that
// also limits certain ways it could be used. For example, one cannot write
// to the bytes.Buffer while it is in use by jsontext.Decoder.
if _, ok := r.(*bytes.Buffer); ok {
r = struct{ io.Reader }{r}
}
dec := new(Decoder)
dec.opts = DefaultOptionsV1()
dec.dec = jsontext.NewDecoder(r, dec.opts)
return dec
}
// UseNumber causes the Decoder to unmarshal a number into an
// interface value as a [Number] instead of as a float64.
func (dec *Decoder) UseNumber() {
if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); !useNumber {
dec.opts = jsonv2.JoinOptions(dec.opts, unmarshalAnyWithRawNumber(true))
}
}
// DisallowUnknownFields causes the Decoder to return an error when the destination
// is a struct and the input contains object keys which do not match any
// non-ignored, exported fields in the destination.
func (dec *Decoder) DisallowUnknownFields() {
if reject, _ := jsonv2.GetOption(dec.opts, jsonv2.RejectUnknownMembers); !reject {
dec.opts = jsonv2.JoinOptions(dec.opts, jsonv2.RejectUnknownMembers(true))
}
}
// Decode reads the next JSON-encoded value from its
// input and stores it in the value pointed to by v.
//
// See the documentation for [Unmarshal] for details about
// the conversion of JSON into a Go value.
func (dec *Decoder) Decode(v any) error {
if dec.err != nil {
return dec.err
}
b, err := dec.dec.ReadValue()
if err != nil {
dec.err = transformSyntacticError(err)
if dec.err.Error() == errUnexpectedEnd.Error() {
// NOTE: Decode has always been inconsistent with Unmarshal
// with regard to the exact error value for truncated input.
dec.err = io.ErrUnexpectedEOF
}
return dec.err
}
return jsonv2.Unmarshal(b, v, dec.opts)
}
// Buffered returns a reader of the data remaining in the Decoder's
// buffer. The reader is valid until the next call to [Decoder.Decode].
func (dec *Decoder) Buffered() io.Reader {
return bytes.NewReader(dec.dec.UnreadBuffer())
}
// An Encoder writes JSON values to an output stream.
type Encoder struct {
w io.Writer
opts jsonv2.Options
err error
buf bytes.Buffer
indentBuf bytes.Buffer
indentPrefix string
indentValue string
}
// NewEncoder returns a new encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
enc := new(Encoder)
enc.w = w
enc.opts = DefaultOptionsV1()
return enc
}
// Encode writes the JSON encoding of v to the stream,
// followed by a newline character.
//
// See the documentation for [Marshal] for details about the
// conversion of Go values to JSON.
func (enc *Encoder) Encode(v any) error {
if enc.err != nil {
return enc.err
}
buf := &enc.buf
buf.Reset()
if err := jsonv2.MarshalWrite(buf, v, enc.opts); err != nil {
return err
}
if len(enc.indentPrefix)+len(enc.indentValue) > 0 {
enc.indentBuf.Reset()
if err := Indent(&enc.indentBuf, buf.Bytes(), enc.indentPrefix, enc.indentValue); err != nil {
return err
}
buf = &enc.indentBuf
}
buf.WriteByte('\n')
if _, err := enc.w.Write(buf.Bytes()); err != nil {
enc.err = err
return err
}
return nil
}
// SetIndent instructs the encoder to format each subsequent encoded
// value as if indented by the package-level function Indent(dst, src, prefix, indent).
// Calling SetIndent("", "") disables indentation.
func (enc *Encoder) SetIndent(prefix, indent string) {
enc.indentPrefix = prefix
enc.indentValue = indent
}
// SetEscapeHTML specifies whether problematic HTML characters
// should be escaped inside JSON quoted strings.
// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
// to avoid certain safety problems that can arise when embedding JSON in HTML.
//
// In non-HTML settings where the escaping interferes with the readability
// of the output, SetEscapeHTML(false) disables this behavior.
func (enc *Encoder) SetEscapeHTML(on bool) {
if escape, _ := jsonv2.GetOption(enc.opts, jsontext.EscapeForHTML); escape != on {
enc.opts = jsonv2.JoinOptions(enc.opts, jsontext.EscapeForHTML(on))
}
}
// RawMessage is a raw encoded JSON value.
// It implements [Marshaler] and [Unmarshaler] and can
// be used to delay JSON decoding or precompute a JSON encoding.
type RawMessage = jsontext.Value
// A Token holds a value of one of these types:
//
// - [Delim], for the four JSON delimiters [ ] { }
// - bool, for JSON booleans
// - float64, for JSON numbers
// - [Number], for JSON numbers
// - string, for JSON string literals
// - nil, for JSON null
type Token any
// A Delim is a JSON array or object delimiter, one of [ ] { or }.
type Delim rune
func (d Delim) String() string {
return string(d)
}
// Token returns the next JSON token in the input stream.
// At the end of the input stream, Token returns nil, [io.EOF].
//
// Token guarantees that the delimiters [ ] { } it returns are
// properly nested and matched: if Token encounters an unexpected
// delimiter in the input, it will return an error.
//
// The input stream consists of basic JSON values—bool, string,
// number, and null—along with delimiters [ ] { } of type [Delim]
// to mark the start and end of arrays and objects.
// Commas and colons are elided.
func (dec *Decoder) Token() (Token, error) {
tok, err := dec.dec.ReadToken()
if err != nil {
return nil, transformSyntacticError(err)
}
switch k := tok.Kind(); k {
case 'n':
return nil, nil
case 'f':
return false, nil
case 't':
return true, nil
case '"':
return tok.String(), nil
case '0':
if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); useNumber {
return Number(tok.String()), nil
}
return tok.Float(), nil
case '{', '}', '[', ']':
return Delim(k), nil
default:
panic("unreachable")
}
}
// More reports whether there is another element in the
// current array or object being parsed.
func (dec *Decoder) More() bool {
k := dec.dec.PeekKind()
return k > 0 && k != ']' && k != '}'
}
// InputOffset returns the input stream byte offset of the current decoder position.
// The offset gives the location of the end of the most recently returned token
// and the beginning of the next token.
func (dec *Decoder) InputOffset() int64 {
return dec.dec.InputOffset()
}