add forked version of encoding/json with html escape disabled
- modified all local packages to use this fork
This commit is contained in:
41
pkg/encoders/json/internal/internal.go
Normal file
41
pkg/encoders/json/internal/internal.go
Normal file
@@ -0,0 +1,41 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package internal
|
||||
|
||||
import "errors"
|
||||
|
||||
// NotForPublicUse is a marker type that an API is for internal use only.
|
||||
// It does not perfectly prevent usage of that API, but helps to restrict usage.
|
||||
// Anything with this marker is not covered by the Go compatibility agreement.
|
||||
type NotForPublicUse struct{}
|
||||
|
||||
// AllowInternalUse is passed from "json" to "jsontext" to authenticate
|
||||
// that the caller can have access to internal functionality.
|
||||
var AllowInternalUse NotForPublicUse
|
||||
|
||||
// Sentinel error values internally shared between jsonv1 and jsonv2.
|
||||
var (
|
||||
ErrCycle = errors.New("encountered a cycle")
|
||||
ErrNonNilReference = errors.New("value must be passed as a non-nil pointer reference")
|
||||
)
|
||||
|
||||
var (
|
||||
// TransformMarshalError converts a v2 error into a v1 error.
|
||||
// It is called only at the top-level of a Marshal function.
|
||||
TransformMarshalError func(any, error) error
|
||||
// NewMarshalerError constructs a jsonv1.MarshalerError.
|
||||
// It is called after a user-defined Marshal method/function fails.
|
||||
NewMarshalerError func(any, error, string) error
|
||||
// TransformUnmarshalError converts a v2 error into a v1 error.
|
||||
// It is called only at the top-level of a Unmarshal function.
|
||||
TransformUnmarshalError func(any, error) error
|
||||
|
||||
// NewRawNumber returns new(jsonv1.Number).
|
||||
NewRawNumber func() any
|
||||
// RawNumberOf returns jsonv1.Number(b).
|
||||
RawNumberOf func(b []byte) any
|
||||
)
|
||||
215
pkg/encoders/json/internal/jsonflags/flags.go
Normal file
215
pkg/encoders/json/internal/jsonflags/flags.go
Normal file
@@ -0,0 +1,215 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
// jsonflags implements all the optional boolean flags.
|
||||
// These flags are shared across both "json", "jsontext", and "jsonopts".
|
||||
package jsonflags
|
||||
|
||||
import "encoding/json/internal"
|
||||
|
||||
// Bools represents zero or more boolean flags, all set to true or false.
|
||||
// The least-significant bit is the boolean value of all flags in the set.
|
||||
// The remaining bits identify which particular flags.
|
||||
//
|
||||
// In common usage, this is OR'd with 0 or 1. For example:
|
||||
// - (AllowInvalidUTF8 | 0) means "AllowInvalidUTF8 is false"
|
||||
// - (Multiline | Indent | 1) means "Multiline and Indent are true"
|
||||
type Bools uint64
|
||||
|
||||
func (Bools) JSONOptions(internal.NotForPublicUse) {}
|
||||
|
||||
const (
|
||||
// AllFlags is the set of all flags.
|
||||
AllFlags = AllCoderFlags | AllArshalV2Flags | AllArshalV1Flags
|
||||
|
||||
// AllCoderFlags is the set of all encoder/decoder flags.
|
||||
AllCoderFlags = (maxCoderFlag - 1) - initFlag
|
||||
|
||||
// AllArshalV2Flags is the set of all v2 marshal/unmarshal flags.
|
||||
AllArshalV2Flags = (maxArshalV2Flag - 1) - (maxCoderFlag - 1)
|
||||
|
||||
// AllArshalV1Flags is the set of all v1 marshal/unmarshal flags.
|
||||
AllArshalV1Flags = (maxArshalV1Flag - 1) - (maxArshalV2Flag - 1)
|
||||
|
||||
// NonBooleanFlags is the set of non-boolean flags,
|
||||
// where the value is some other concrete Go type.
|
||||
// The value of the flag is stored within jsonopts.Struct.
|
||||
NonBooleanFlags = 0 |
|
||||
Indent |
|
||||
IndentPrefix |
|
||||
ByteLimit |
|
||||
DepthLimit |
|
||||
Marshalers |
|
||||
Unmarshalers
|
||||
|
||||
// DefaultV1Flags is the set of booleans flags that default to true under
|
||||
// v1 semantics. None of the non-boolean flags differ between v1 and v2.
|
||||
DefaultV1Flags = 0 |
|
||||
AllowDuplicateNames |
|
||||
AllowInvalidUTF8 |
|
||||
EscapeForHTML |
|
||||
EscapeForJS |
|
||||
PreserveRawStrings |
|
||||
Deterministic |
|
||||
FormatNilMapAsNull |
|
||||
FormatNilSliceAsNull |
|
||||
MatchCaseInsensitiveNames |
|
||||
CallMethodsWithLegacySemantics |
|
||||
FormatByteArrayAsArray |
|
||||
FormatBytesWithLegacySemantics |
|
||||
FormatDurationAsNano |
|
||||
MatchCaseSensitiveDelimiter |
|
||||
MergeWithLegacySemantics |
|
||||
OmitEmptyWithLegacySemantics |
|
||||
ParseBytesWithLooseRFC4648 |
|
||||
ParseTimeWithLooseRFC3339 |
|
||||
ReportErrorsWithLegacySemantics |
|
||||
StringifyWithLegacySemantics |
|
||||
UnmarshalArrayFromAnyLength
|
||||
|
||||
// AnyWhitespace reports whether the encoded output might have any whitespace.
|
||||
AnyWhitespace = Multiline | SpaceAfterColon | SpaceAfterComma
|
||||
|
||||
// WhitespaceFlags is the set of flags related to whitespace formatting.
|
||||
// In contrast to AnyWhitespace, this includes Indent and IndentPrefix
|
||||
// as those settings take no effect if Multiline is false.
|
||||
WhitespaceFlags = AnyWhitespace | Indent | IndentPrefix
|
||||
|
||||
// AnyEscape is the set of flags related to escaping in a JSON string.
|
||||
AnyEscape = EscapeForHTML | EscapeForJS
|
||||
|
||||
// CanonicalizeNumbers is the set of flags related to raw number canonicalization.
|
||||
CanonicalizeNumbers = CanonicalizeRawInts | CanonicalizeRawFloats
|
||||
)
|
||||
|
||||
// Encoder and decoder flags.
|
||||
const (
|
||||
initFlag Bools = 1 << iota // reserved for the boolean value itself
|
||||
|
||||
AllowDuplicateNames // encode or decode
|
||||
AllowInvalidUTF8 // encode or decode
|
||||
WithinArshalCall // encode or decode; for internal use by json.Marshal and json.Unmarshal
|
||||
OmitTopLevelNewline // encode only; for internal use by json.Marshal and json.MarshalWrite
|
||||
PreserveRawStrings // encode only
|
||||
CanonicalizeRawInts // encode only
|
||||
CanonicalizeRawFloats // encode only
|
||||
ReorderRawObjects // encode only
|
||||
EscapeForHTML // encode only
|
||||
EscapeForJS // encode only
|
||||
Multiline // encode only
|
||||
SpaceAfterColon // encode only
|
||||
SpaceAfterComma // encode only
|
||||
Indent // encode only; non-boolean flag
|
||||
IndentPrefix // encode only; non-boolean flag
|
||||
ByteLimit // encode or decode; non-boolean flag
|
||||
DepthLimit // encode or decode; non-boolean flag
|
||||
|
||||
maxCoderFlag
|
||||
)
|
||||
|
||||
// Marshal and Unmarshal flags (for v2).
|
||||
const (
|
||||
_ Bools = (maxCoderFlag >> 1) << iota
|
||||
|
||||
StringifyNumbers // marshal or unmarshal
|
||||
Deterministic // marshal only
|
||||
FormatNilMapAsNull // marshal only
|
||||
FormatNilSliceAsNull // marshal only
|
||||
OmitZeroStructFields // marshal only
|
||||
MatchCaseInsensitiveNames // marshal or unmarshal
|
||||
DiscardUnknownMembers // marshal only
|
||||
RejectUnknownMembers // unmarshal only
|
||||
Marshalers // marshal only; non-boolean flag
|
||||
Unmarshalers // unmarshal only; non-boolean flag
|
||||
|
||||
maxArshalV2Flag
|
||||
)
|
||||
|
||||
// Marshal and Unmarshal flags (for v1).
|
||||
const (
|
||||
_ Bools = (maxArshalV2Flag >> 1) << iota
|
||||
|
||||
CallMethodsWithLegacySemantics // marshal or unmarshal
|
||||
FormatByteArrayAsArray // marshal or unmarshal
|
||||
FormatBytesWithLegacySemantics // marshal or unmarshal
|
||||
FormatDurationAsNano // marshal or unmarshal
|
||||
MatchCaseSensitiveDelimiter // marshal or unmarshal
|
||||
MergeWithLegacySemantics // unmarshal
|
||||
OmitEmptyWithLegacySemantics // marshal
|
||||
ParseBytesWithLooseRFC4648 // unmarshal
|
||||
ParseTimeWithLooseRFC3339 // unmarshal
|
||||
ReportErrorsWithLegacySemantics // marshal or unmarshal
|
||||
StringifyWithLegacySemantics // marshal or unmarshal
|
||||
StringifyBoolsAndStrings // marshal or unmarshal; for internal use by jsonv2.makeStructArshaler
|
||||
UnmarshalAnyWithRawNumber // unmarshal; for internal use by jsonv1.Decoder.UseNumber
|
||||
UnmarshalArrayFromAnyLength // unmarshal
|
||||
|
||||
maxArshalV1Flag
|
||||
)
|
||||
|
||||
// bitsUsed is the number of bits used in the 64-bit boolean flags
|
||||
const bitsUsed = 42
|
||||
|
||||
// Static compile check that bitsUsed and maxArshalV1Flag are in sync.
|
||||
const _ = uint64((1<<bitsUsed)-maxArshalV1Flag) + uint64(maxArshalV1Flag-(1<<bitsUsed))
|
||||
|
||||
// Flags is a set of boolean flags.
|
||||
// If the presence bit is zero, then the value bit must also be zero.
|
||||
// The least-significant bit of both fields is always zero.
|
||||
//
|
||||
// Unlike Bools, which can represent a set of bools that are all true or false,
|
||||
// Flags represents a set of bools, each individually may be true or false.
|
||||
type Flags struct{ Presence, Values uint64 }
|
||||
|
||||
// Join joins two sets of flags such that the latter takes precedence.
|
||||
func (dst *Flags) Join(src Flags) {
|
||||
// Copy over all source presence bits over to the destination (using OR),
|
||||
// then invert the source presence bits to clear out source value (using AND-NOT),
|
||||
// then copy over source value bits over to the destination (using OR).
|
||||
// e.g., dst := Flags{Presence: 0b_1100_0011, Value: 0b_1000_0011}
|
||||
// e.g., src := Flags{Presence: 0b_0101_1010, Value: 0b_1001_0010}
|
||||
dst.Presence |= src.Presence // e.g., 0b_1100_0011 | 0b_0101_1010 -> 0b_110_11011
|
||||
dst.Values &= ^src.Presence // e.g., 0b_1000_0011 & 0b_1010_0101 -> 0b_100_00001
|
||||
dst.Values |= src.Values // e.g., 0b_1000_0001 | 0b_1001_0010 -> 0b_100_10011
|
||||
}
|
||||
|
||||
// Set sets both the presence and value for the provided bool (or set of bools).
|
||||
func (fs *Flags) Set(f Bools) {
|
||||
// Select out the bits for the flag identifiers (everything except LSB),
|
||||
// then set the presence for all the identifier bits (using OR),
|
||||
// then invert the identifier bits to clear out the values (using AND-NOT),
|
||||
// then copy over all the identifier bits to the value if LSB is 1.
|
||||
// e.g., fs := Flags{Presence: 0b_0101_0010, Value: 0b_0001_0010}
|
||||
// e.g., f := 0b_1001_0001
|
||||
id := uint64(f) &^ uint64(1) // e.g., 0b_1001_0001 & 0b_1111_1110 -> 0b_1001_0000
|
||||
fs.Presence |= id // e.g., 0b_0101_0010 | 0b_1001_0000 -> 0b_1101_0011
|
||||
fs.Values &= ^id // e.g., 0b_0001_0010 & 0b_0110_1111 -> 0b_0000_0010
|
||||
fs.Values |= uint64(f&1) * id // e.g., 0b_0000_0010 | 0b_1001_0000 -> 0b_1001_0010
|
||||
}
|
||||
|
||||
// Get reports whether the bool (or any of the bools) is true.
|
||||
// This is generally only used with a singular bool.
|
||||
// The value bit of f (i.e., the LSB) is ignored.
|
||||
func (fs Flags) Get(f Bools) bool {
|
||||
return fs.Values&uint64(f) > 0
|
||||
}
|
||||
|
||||
// Has reports whether the bool (or any of the bools) is set.
|
||||
// The value bit of f (i.e., the LSB) is ignored.
|
||||
func (fs Flags) Has(f Bools) bool {
|
||||
return fs.Presence&uint64(f) > 0
|
||||
}
|
||||
|
||||
// Clear clears both the presence and value for the provided bool or bools.
|
||||
// The value bit of f (i.e., the LSB) is ignored.
|
||||
func (fs *Flags) Clear(f Bools) {
|
||||
// Invert f to produce a mask to clear all bits in f (using AND).
|
||||
// e.g., fs := Flags{Presence: 0b_0101_0010, Value: 0b_0001_0010}
|
||||
// e.g., f := 0b_0001_1000
|
||||
mask := uint64(^f) // e.g., 0b_0001_1000 -> 0b_1110_0111
|
||||
fs.Presence &= mask // e.g., 0b_0101_0010 & 0b_1110_0111 -> 0b_0100_0010
|
||||
fs.Values &= mask // e.g., 0b_0001_0010 & 0b_1110_0111 -> 0b_0000_0010
|
||||
}
|
||||
75
pkg/encoders/json/internal/jsonflags/flags_test.go
Normal file
75
pkg/encoders/json/internal/jsonflags/flags_test.go
Normal file
@@ -0,0 +1,75 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsonflags
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestFlags(t *testing.T) {
|
||||
type Check struct{ want Flags }
|
||||
type Join struct{ in Flags }
|
||||
type Set struct{ in Bools }
|
||||
type Clear struct{ in Bools }
|
||||
type Get struct {
|
||||
in Bools
|
||||
want bool
|
||||
wantOk bool
|
||||
}
|
||||
|
||||
calls := []any{
|
||||
Get{in: AllowDuplicateNames, want: false, wantOk: false},
|
||||
Set{in: AllowDuplicateNames | 0},
|
||||
Get{in: AllowDuplicateNames, want: false, wantOk: true},
|
||||
Set{in: AllowDuplicateNames | 1},
|
||||
Get{in: AllowDuplicateNames, want: true, wantOk: true},
|
||||
Check{want: Flags{Presence: uint64(AllowDuplicateNames), Values: uint64(AllowDuplicateNames)}},
|
||||
Get{in: AllowInvalidUTF8, want: false, wantOk: false},
|
||||
Set{in: AllowInvalidUTF8 | 1},
|
||||
Get{in: AllowInvalidUTF8, want: true, wantOk: true},
|
||||
Set{in: AllowInvalidUTF8 | 0},
|
||||
Get{in: AllowInvalidUTF8, want: false, wantOk: true},
|
||||
Get{in: AllowDuplicateNames, want: true, wantOk: true},
|
||||
Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames)}},
|
||||
Set{in: AllowDuplicateNames | AllowInvalidUTF8 | 0},
|
||||
Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(0)}},
|
||||
Set{in: AllowDuplicateNames | AllowInvalidUTF8 | 0},
|
||||
Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(0)}},
|
||||
Set{in: AllowDuplicateNames | AllowInvalidUTF8 | 1},
|
||||
Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames | AllowInvalidUTF8)}},
|
||||
Join{in: Flags{Presence: 0, Values: 0}},
|
||||
Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames | AllowInvalidUTF8)}},
|
||||
Join{in: Flags{Presence: uint64(Multiline | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames)}},
|
||||
Check{want: Flags{Presence: uint64(Multiline | AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames)}},
|
||||
Clear{in: AllowDuplicateNames | AllowInvalidUTF8},
|
||||
Check{want: Flags{Presence: uint64(Multiline), Values: uint64(0)}},
|
||||
Set{in: AllowInvalidUTF8 | Deterministic | ReportErrorsWithLegacySemantics | 1},
|
||||
Set{in: Multiline | StringifyNumbers | 0},
|
||||
Check{want: Flags{Presence: uint64(AllowInvalidUTF8 | Deterministic | ReportErrorsWithLegacySemantics | Multiline | StringifyNumbers), Values: uint64(AllowInvalidUTF8 | Deterministic | ReportErrorsWithLegacySemantics)}},
|
||||
Clear{in: ^AllCoderFlags},
|
||||
Check{want: Flags{Presence: uint64(AllowInvalidUTF8 | Multiline), Values: uint64(AllowInvalidUTF8)}},
|
||||
}
|
||||
var fs Flags
|
||||
for i, call := range calls {
|
||||
switch call := call.(type) {
|
||||
case Join:
|
||||
fs.Join(call.in)
|
||||
case Set:
|
||||
fs.Set(call.in)
|
||||
case Clear:
|
||||
fs.Clear(call.in)
|
||||
case Get:
|
||||
got := fs.Get(call.in)
|
||||
gotOk := fs.Has(call.in)
|
||||
if got != call.want || gotOk != call.wantOk {
|
||||
t.Fatalf("%d: GetOk = (%v, %v), want (%v, %v)", i, got, gotOk, call.want, call.wantOk)
|
||||
}
|
||||
case Check:
|
||||
if fs != call.want {
|
||||
t.Fatalf("%d: got %x, want %x", i, fs, call.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
202
pkg/encoders/json/internal/jsonopts/options.go
Normal file
202
pkg/encoders/json/internal/jsonopts/options.go
Normal file
@@ -0,0 +1,202 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsonopts
|
||||
|
||||
import (
|
||||
"encoding/json/internal"
|
||||
"encoding/json/internal/jsonflags"
|
||||
)
|
||||
|
||||
// Options is the common options type shared across json packages.
|
||||
type Options interface {
|
||||
// JSONOptions is exported so related json packages can implement Options.
|
||||
JSONOptions(internal.NotForPublicUse)
|
||||
}
|
||||
|
||||
// Struct is the combination of all options in struct form.
|
||||
// This is efficient to pass down the call stack and to query.
|
||||
type Struct struct {
|
||||
Flags jsonflags.Flags
|
||||
|
||||
CoderValues
|
||||
ArshalValues
|
||||
}
|
||||
|
||||
type CoderValues struct {
|
||||
Indent string // jsonflags.Indent
|
||||
IndentPrefix string // jsonflags.IndentPrefix
|
||||
ByteLimit int64 // jsonflags.ByteLimit
|
||||
DepthLimit int // jsonflags.DepthLimit
|
||||
}
|
||||
|
||||
type ArshalValues struct {
|
||||
// The Marshalers and Unmarshalers fields use the any type to avoid a
|
||||
// concrete dependency on *json.Marshalers and *json.Unmarshalers,
|
||||
// which would in turn create a dependency on the "reflect" package.
|
||||
|
||||
Marshalers any // jsonflags.Marshalers
|
||||
Unmarshalers any // jsonflags.Unmarshalers
|
||||
|
||||
Format string
|
||||
FormatDepth int
|
||||
}
|
||||
|
||||
// DefaultOptionsV2 is the set of all options that define default v2 behavior.
|
||||
var DefaultOptionsV2 = Struct{
|
||||
Flags: jsonflags.Flags{
|
||||
Presence: uint64(jsonflags.AllFlags & ^jsonflags.WhitespaceFlags),
|
||||
Values: uint64(0),
|
||||
},
|
||||
}
|
||||
|
||||
// DefaultOptionsV1 is the set of all options that define default v1 behavior.
|
||||
var DefaultOptionsV1 = Struct{
|
||||
Flags: jsonflags.Flags{
|
||||
Presence: uint64(jsonflags.AllFlags & ^jsonflags.WhitespaceFlags),
|
||||
Values: uint64(jsonflags.DefaultV1Flags),
|
||||
},
|
||||
}
|
||||
|
||||
func (*Struct) JSONOptions(internal.NotForPublicUse) {}
|
||||
|
||||
// GetUnknownOption is injected by the "json" package to handle Options
|
||||
// declared in that package so that "jsonopts" can handle them.
|
||||
var GetUnknownOption = func(Struct, Options) (any, bool) { panic("unknown option") }
|
||||
|
||||
func GetOption[T any](opts Options, setter func(T) Options) (T, bool) {
|
||||
// Collapse the options to *Struct to simplify lookup.
|
||||
structOpts, ok := opts.(*Struct)
|
||||
if !ok {
|
||||
var structOpts2 Struct
|
||||
structOpts2.Join(opts)
|
||||
structOpts = &structOpts2
|
||||
}
|
||||
|
||||
// Lookup the option based on the return value of the setter.
|
||||
var zero T
|
||||
switch opt := setter(zero).(type) {
|
||||
case jsonflags.Bools:
|
||||
v := structOpts.Flags.Get(opt)
|
||||
ok := structOpts.Flags.Has(opt)
|
||||
return any(v).(T), ok
|
||||
case Indent:
|
||||
if !structOpts.Flags.Has(jsonflags.Indent) {
|
||||
return zero, false
|
||||
}
|
||||
return any(structOpts.Indent).(T), true
|
||||
case IndentPrefix:
|
||||
if !structOpts.Flags.Has(jsonflags.IndentPrefix) {
|
||||
return zero, false
|
||||
}
|
||||
return any(structOpts.IndentPrefix).(T), true
|
||||
case ByteLimit:
|
||||
if !structOpts.Flags.Has(jsonflags.ByteLimit) {
|
||||
return zero, false
|
||||
}
|
||||
return any(structOpts.ByteLimit).(T), true
|
||||
case DepthLimit:
|
||||
if !structOpts.Flags.Has(jsonflags.DepthLimit) {
|
||||
return zero, false
|
||||
}
|
||||
return any(structOpts.DepthLimit).(T), true
|
||||
default:
|
||||
v, ok := GetUnknownOption(*structOpts, opt)
|
||||
return v.(T), ok
|
||||
}
|
||||
}
|
||||
|
||||
// JoinUnknownOption is injected by the "json" package to handle Options
|
||||
// declared in that package so that "jsonopts" can handle them.
|
||||
var JoinUnknownOption = func(Struct, Options) Struct { panic("unknown option") }
|
||||
|
||||
func (dst *Struct) Join(srcs ...Options) {
|
||||
dst.join(false, srcs...)
|
||||
}
|
||||
|
||||
func (dst *Struct) JoinWithoutCoderOptions(srcs ...Options) {
|
||||
dst.join(true, srcs...)
|
||||
}
|
||||
|
||||
func (dst *Struct) join(excludeCoderOptions bool, srcs ...Options) {
|
||||
for _, src := range srcs {
|
||||
switch src := src.(type) {
|
||||
case nil:
|
||||
continue
|
||||
case jsonflags.Bools:
|
||||
if excludeCoderOptions {
|
||||
src &= ^jsonflags.AllCoderFlags
|
||||
}
|
||||
dst.Flags.Set(src)
|
||||
case Indent:
|
||||
if excludeCoderOptions {
|
||||
continue
|
||||
}
|
||||
dst.Flags.Set(jsonflags.Multiline | jsonflags.Indent | 1)
|
||||
dst.Indent = string(src)
|
||||
case IndentPrefix:
|
||||
if excludeCoderOptions {
|
||||
continue
|
||||
}
|
||||
dst.Flags.Set(jsonflags.Multiline | jsonflags.IndentPrefix | 1)
|
||||
dst.IndentPrefix = string(src)
|
||||
case ByteLimit:
|
||||
if excludeCoderOptions {
|
||||
continue
|
||||
}
|
||||
dst.Flags.Set(jsonflags.ByteLimit | 1)
|
||||
dst.ByteLimit = int64(src)
|
||||
case DepthLimit:
|
||||
if excludeCoderOptions {
|
||||
continue
|
||||
}
|
||||
dst.Flags.Set(jsonflags.DepthLimit | 1)
|
||||
dst.DepthLimit = int(src)
|
||||
case *Struct:
|
||||
srcFlags := src.Flags // shallow copy the flags
|
||||
if excludeCoderOptions {
|
||||
srcFlags.Clear(jsonflags.AllCoderFlags)
|
||||
}
|
||||
dst.Flags.Join(srcFlags)
|
||||
if srcFlags.Has(jsonflags.NonBooleanFlags) {
|
||||
if srcFlags.Has(jsonflags.Indent) {
|
||||
dst.Indent = src.Indent
|
||||
}
|
||||
if srcFlags.Has(jsonflags.IndentPrefix) {
|
||||
dst.IndentPrefix = src.IndentPrefix
|
||||
}
|
||||
if srcFlags.Has(jsonflags.ByteLimit) {
|
||||
dst.ByteLimit = src.ByteLimit
|
||||
}
|
||||
if srcFlags.Has(jsonflags.DepthLimit) {
|
||||
dst.DepthLimit = src.DepthLimit
|
||||
}
|
||||
if srcFlags.Has(jsonflags.Marshalers) {
|
||||
dst.Marshalers = src.Marshalers
|
||||
}
|
||||
if srcFlags.Has(jsonflags.Unmarshalers) {
|
||||
dst.Unmarshalers = src.Unmarshalers
|
||||
}
|
||||
}
|
||||
default:
|
||||
*dst = JoinUnknownOption(*dst, src)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type (
|
||||
Indent string // jsontext.WithIndent
|
||||
IndentPrefix string // jsontext.WithIndentPrefix
|
||||
ByteLimit int64 // jsontext.WithByteLimit
|
||||
DepthLimit int // jsontext.WithDepthLimit
|
||||
// type for jsonflags.Marshalers declared in "json" package
|
||||
// type for jsonflags.Unmarshalers declared in "json" package
|
||||
)
|
||||
|
||||
func (Indent) JSONOptions(internal.NotForPublicUse) {}
|
||||
func (IndentPrefix) JSONOptions(internal.NotForPublicUse) {}
|
||||
func (ByteLimit) JSONOptions(internal.NotForPublicUse) {}
|
||||
func (DepthLimit) JSONOptions(internal.NotForPublicUse) {}
|
||||
233
pkg/encoders/json/internal/jsonopts/options_test.go
Normal file
233
pkg/encoders/json/internal/jsonopts/options_test.go
Normal file
@@ -0,0 +1,233 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsonopts_test
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"encoding/json/internal/jsonflags"
|
||||
. "encoding/json/internal/jsonopts"
|
||||
"encoding/json/jsontext"
|
||||
"encoding/json/v2"
|
||||
)
|
||||
|
||||
func makeFlags(f ...jsonflags.Bools) (fs jsonflags.Flags) {
|
||||
for _, f := range f {
|
||||
fs.Set(f)
|
||||
}
|
||||
return fs
|
||||
}
|
||||
|
||||
func TestJoin(t *testing.T) {
|
||||
tests := []struct {
|
||||
in Options
|
||||
excludeCoders bool
|
||||
want *Struct
|
||||
}{{
|
||||
in: jsonflags.AllowInvalidUTF8 | 1,
|
||||
want: &Struct{Flags: makeFlags(jsonflags.AllowInvalidUTF8 | 1)},
|
||||
}, {
|
||||
in: jsonflags.Multiline | 0,
|
||||
want: &Struct{
|
||||
Flags: makeFlags(jsonflags.AllowInvalidUTF8|1, jsonflags.Multiline|0)},
|
||||
}, {
|
||||
in: Indent("\t"), // implicitly sets Multiline=true
|
||||
want: &Struct{
|
||||
Flags: makeFlags(jsonflags.AllowInvalidUTF8 | jsonflags.Multiline | jsonflags.Indent | 1),
|
||||
CoderValues: CoderValues{Indent: "\t"},
|
||||
},
|
||||
}, {
|
||||
in: &Struct{
|
||||
Flags: makeFlags(jsonflags.Multiline|jsonflags.EscapeForJS|0, jsonflags.AllowInvalidUTF8|1),
|
||||
},
|
||||
want: &Struct{
|
||||
Flags: makeFlags(jsonflags.AllowInvalidUTF8|jsonflags.Indent|1, jsonflags.Multiline|jsonflags.EscapeForJS|0),
|
||||
CoderValues: CoderValues{Indent: "\t"},
|
||||
},
|
||||
}, {
|
||||
in: &DefaultOptionsV1,
|
||||
want: func() *Struct {
|
||||
v1 := DefaultOptionsV1
|
||||
v1.Flags.Set(jsonflags.Indent | 1)
|
||||
v1.Flags.Set(jsonflags.Multiline | 0)
|
||||
v1.Indent = "\t"
|
||||
return &v1
|
||||
}(), // v1 fully replaces before (except for whitespace related flags)
|
||||
}, {
|
||||
in: &DefaultOptionsV2,
|
||||
want: func() *Struct {
|
||||
v2 := DefaultOptionsV2
|
||||
v2.Flags.Set(jsonflags.Indent | 1)
|
||||
v2.Flags.Set(jsonflags.Multiline | 0)
|
||||
v2.Indent = "\t"
|
||||
return &v2
|
||||
}(), // v2 fully replaces before (except for whitespace related flags)
|
||||
}, {
|
||||
in: jsonflags.Deterministic | jsonflags.AllowInvalidUTF8 | 1, excludeCoders: true,
|
||||
want: func() *Struct {
|
||||
v2 := DefaultOptionsV2
|
||||
v2.Flags.Set(jsonflags.Deterministic | 1)
|
||||
v2.Flags.Set(jsonflags.Indent | 1)
|
||||
v2.Flags.Set(jsonflags.Multiline | 0)
|
||||
v2.Indent = "\t"
|
||||
return &v2
|
||||
}(),
|
||||
}, {
|
||||
in: jsontext.WithIndentPrefix(" "), excludeCoders: true,
|
||||
want: func() *Struct {
|
||||
v2 := DefaultOptionsV2
|
||||
v2.Flags.Set(jsonflags.Deterministic | 1)
|
||||
v2.Flags.Set(jsonflags.Indent | 1)
|
||||
v2.Flags.Set(jsonflags.Multiline | 0)
|
||||
v2.Indent = "\t"
|
||||
return &v2
|
||||
}(),
|
||||
}, {
|
||||
in: jsontext.WithIndentPrefix(" "), excludeCoders: false,
|
||||
want: func() *Struct {
|
||||
v2 := DefaultOptionsV2
|
||||
v2.Flags.Set(jsonflags.Deterministic | 1)
|
||||
v2.Flags.Set(jsonflags.Indent | 1)
|
||||
v2.Flags.Set(jsonflags.IndentPrefix | 1)
|
||||
v2.Flags.Set(jsonflags.Multiline | 1)
|
||||
v2.Indent = "\t"
|
||||
v2.IndentPrefix = " "
|
||||
return &v2
|
||||
}(),
|
||||
}, {
|
||||
in: &Struct{
|
||||
Flags: jsonflags.Flags{
|
||||
Presence: uint64(jsonflags.Deterministic | jsonflags.Indent | jsonflags.IndentPrefix),
|
||||
Values: uint64(jsonflags.Indent | jsonflags.IndentPrefix),
|
||||
},
|
||||
CoderValues: CoderValues{Indent: " ", IndentPrefix: " "},
|
||||
},
|
||||
excludeCoders: true,
|
||||
want: func() *Struct {
|
||||
v2 := DefaultOptionsV2
|
||||
v2.Flags.Set(jsonflags.Indent | 1)
|
||||
v2.Flags.Set(jsonflags.IndentPrefix | 1)
|
||||
v2.Flags.Set(jsonflags.Multiline | 1)
|
||||
v2.Indent = "\t"
|
||||
v2.IndentPrefix = " "
|
||||
return &v2
|
||||
}(),
|
||||
}, {
|
||||
in: &Struct{
|
||||
Flags: jsonflags.Flags{
|
||||
Presence: uint64(jsonflags.Deterministic | jsonflags.Indent | jsonflags.IndentPrefix),
|
||||
Values: uint64(jsonflags.Indent | jsonflags.IndentPrefix),
|
||||
},
|
||||
CoderValues: CoderValues{Indent: " ", IndentPrefix: " "},
|
||||
},
|
||||
excludeCoders: false,
|
||||
want: func() *Struct {
|
||||
v2 := DefaultOptionsV2
|
||||
v2.Flags.Set(jsonflags.Indent | 1)
|
||||
v2.Flags.Set(jsonflags.IndentPrefix | 1)
|
||||
v2.Flags.Set(jsonflags.Multiline | 1)
|
||||
v2.Indent = " "
|
||||
v2.IndentPrefix = " "
|
||||
return &v2
|
||||
}(),
|
||||
}}
|
||||
got := new(Struct)
|
||||
for i, tt := range tests {
|
||||
if tt.excludeCoders {
|
||||
got.JoinWithoutCoderOptions(tt.in)
|
||||
} else {
|
||||
got.Join(tt.in)
|
||||
}
|
||||
if !reflect.DeepEqual(got, tt.want) {
|
||||
t.Fatalf("%d: Join:\n\tgot: %+v\n\twant: %+v", i, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGet(t *testing.T) {
|
||||
opts := &Struct{
|
||||
Flags: makeFlags(jsonflags.Indent|jsonflags.Deterministic|jsonflags.Marshalers|1, jsonflags.Multiline|0),
|
||||
CoderValues: CoderValues{Indent: "\t"},
|
||||
ArshalValues: ArshalValues{Marshalers: new(json.Marshalers)},
|
||||
}
|
||||
if v, ok := json.GetOption(nil, jsontext.AllowDuplicateNames); v || ok {
|
||||
t.Errorf("GetOption(..., AllowDuplicateNames) = (%v, %v), want (false, false)", v, ok)
|
||||
}
|
||||
if v, ok := json.GetOption(jsonflags.AllowInvalidUTF8|0, jsontext.AllowDuplicateNames); v || ok {
|
||||
t.Errorf("GetOption(..., AllowDuplicateNames) = (%v, %v), want (false, false)", v, ok)
|
||||
}
|
||||
if v, ok := json.GetOption(jsonflags.AllowDuplicateNames|0, jsontext.AllowDuplicateNames); v || !ok {
|
||||
t.Errorf("GetOption(..., AllowDuplicateNames) = (%v, %v), want (false, true)", v, ok)
|
||||
}
|
||||
if v, ok := json.GetOption(jsonflags.AllowDuplicateNames|1, jsontext.AllowDuplicateNames); !v || !ok {
|
||||
t.Errorf("GetOption(..., AllowDuplicateNames) = (%v, %v), want (true, true)", v, ok)
|
||||
}
|
||||
if v, ok := json.GetOption(Indent(""), jsontext.AllowDuplicateNames); v || ok {
|
||||
t.Errorf("GetOption(..., AllowDuplicateNames) = (%v, %v), want (false, false)", v, ok)
|
||||
}
|
||||
if v, ok := json.GetOption(Indent(" "), jsontext.WithIndent); v != " " || !ok {
|
||||
t.Errorf(`GetOption(..., WithIndent) = (%q, %v), want (" ", true)`, v, ok)
|
||||
}
|
||||
if v, ok := json.GetOption(jsonflags.AllowDuplicateNames|1, jsontext.WithIndent); v != "" || ok {
|
||||
t.Errorf(`GetOption(..., WithIndent) = (%q, %v), want ("", false)`, v, ok)
|
||||
}
|
||||
if v, ok := json.GetOption(opts, jsontext.AllowDuplicateNames); v || ok {
|
||||
t.Errorf("GetOption(..., AllowDuplicateNames) = (%v, %v), want (false, false)", v, ok)
|
||||
}
|
||||
if v, ok := json.GetOption(opts, json.Deterministic); !v || !ok {
|
||||
t.Errorf("GetOption(..., Deterministic) = (%v, %v), want (true, true)", v, ok)
|
||||
}
|
||||
if v, ok := json.GetOption(opts, jsontext.Multiline); v || !ok {
|
||||
t.Errorf("GetOption(..., Multiline) = (%v, %v), want (false, true)", v, ok)
|
||||
}
|
||||
if v, ok := json.GetOption(opts, jsontext.AllowInvalidUTF8); v || ok {
|
||||
t.Errorf("GetOption(..., AllowInvalidUTF8) = (%v, %v), want (false, false)", v, ok)
|
||||
}
|
||||
if v, ok := json.GetOption(opts, jsontext.WithIndent); v != "\t" || !ok {
|
||||
t.Errorf(`GetOption(..., WithIndent) = (%q, %v), want ("\t", true)`, v, ok)
|
||||
}
|
||||
if v, ok := json.GetOption(opts, jsontext.WithIndentPrefix); v != "" || ok {
|
||||
t.Errorf(`GetOption(..., WithIndentPrefix) = (%q, %v), want ("", false)`, v, ok)
|
||||
}
|
||||
if v, ok := json.GetOption(opts, json.WithMarshalers); v == nil || !ok {
|
||||
t.Errorf(`GetOption(..., WithMarshalers) = (%v, %v), want (non-nil, true)`, v, ok)
|
||||
}
|
||||
if v, ok := json.GetOption(opts, json.WithUnmarshalers); v != nil || ok {
|
||||
t.Errorf(`GetOption(..., WithUnmarshalers) = (%v, %v), want (nil, false)`, v, ok)
|
||||
}
|
||||
}
|
||||
|
||||
var sink struct {
|
||||
Bool bool
|
||||
String string
|
||||
Marshalers *json.Marshalers
|
||||
}
|
||||
|
||||
func BenchmarkGetBool(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
opts := json.DefaultOptionsV2()
|
||||
for range b.N {
|
||||
sink.Bool, sink.Bool = json.GetOption(opts, jsontext.AllowDuplicateNames)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkGetIndent(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
opts := json.DefaultOptionsV2()
|
||||
for range b.N {
|
||||
sink.String, sink.Bool = json.GetOption(opts, jsontext.WithIndent)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkGetMarshalers(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
opts := json.JoinOptions(json.DefaultOptionsV2(), json.WithMarshalers(nil))
|
||||
for range b.N {
|
||||
sink.Marshalers, sink.Bool = json.GetOption(opts, json.WithMarshalers)
|
||||
}
|
||||
}
|
||||
629
pkg/encoders/json/internal/jsonwire/decode.go
Normal file
629
pkg/encoders/json/internal/jsonwire/decode.go
Normal file
@@ -0,0 +1,629 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsonwire
|
||||
|
||||
import (
|
||||
"io"
|
||||
"math"
|
||||
"slices"
|
||||
"strconv"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type ValueFlags uint
|
||||
|
||||
const (
|
||||
_ ValueFlags = (1 << iota) / 2 // powers of two starting with zero
|
||||
|
||||
stringNonVerbatim // string cannot be naively treated as valid UTF-8
|
||||
stringNonCanonical // string not formatted according to RFC 8785, section 3.2.2.2.
|
||||
// TODO: Track whether a number is a non-integer?
|
||||
)
|
||||
|
||||
func (f *ValueFlags) Join(f2 ValueFlags) { *f |= f2 }
|
||||
func (f ValueFlags) IsVerbatim() bool { return f&stringNonVerbatim == 0 }
|
||||
func (f ValueFlags) IsCanonical() bool { return f&stringNonCanonical == 0 }
|
||||
|
||||
// ConsumeWhitespace consumes leading JSON whitespace per RFC 7159, section 2.
|
||||
func ConsumeWhitespace(b []byte) (n int) {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
for len(b) > n && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
|
||||
n++
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// ConsumeNull consumes the next JSON null literal per RFC 7159, section 3.
|
||||
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
|
||||
func ConsumeNull(b []byte) int {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
const literal = "null"
|
||||
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
|
||||
return len(literal)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ConsumeFalse consumes the next JSON false literal per RFC 7159, section 3.
|
||||
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
|
||||
func ConsumeFalse(b []byte) int {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
const literal = "false"
|
||||
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
|
||||
return len(literal)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ConsumeTrue consumes the next JSON true literal per RFC 7159, section 3.
|
||||
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
|
||||
func ConsumeTrue(b []byte) int {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
const literal = "true"
|
||||
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
|
||||
return len(literal)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ConsumeLiteral consumes the next JSON literal per RFC 7159, section 3.
|
||||
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
|
||||
func ConsumeLiteral(b []byte, lit string) (n int, err error) {
|
||||
for i := 0; i < len(b) && i < len(lit); i++ {
|
||||
if b[i] != lit[i] {
|
||||
return i, NewInvalidCharacterError(b[i:], "in literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")")
|
||||
}
|
||||
}
|
||||
if len(b) < len(lit) {
|
||||
return len(b), io.ErrUnexpectedEOF
|
||||
}
|
||||
return len(lit), nil
|
||||
}
|
||||
|
||||
// ConsumeSimpleString consumes the next JSON string per RFC 7159, section 7
|
||||
// but is limited to the grammar for an ASCII string without escape sequences.
|
||||
// It returns 0 if it is invalid or more complicated than a simple string,
|
||||
// in which case consumeString should be called.
|
||||
//
|
||||
// It rejects '<', '>', and '&' for compatibility reasons since these were
|
||||
// always escaped in the v1 implementation. Thus, if this function reports
|
||||
// non-zero then we know that the string would be encoded the same way
|
||||
// under both v1 or v2 escape semantics.
|
||||
func ConsumeSimpleString(b []byte) (n int) {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if len(b) > 0 && b[0] == '"' {
|
||||
n++
|
||||
for len(b) > n && b[n] < utf8.RuneSelf && escapeASCII[b[n]] == 0 {
|
||||
n++
|
||||
}
|
||||
if uint(len(b)) > uint(n) && b[n] == '"' {
|
||||
n++
|
||||
return n
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ConsumeString consumes the next JSON string per RFC 7159, section 7.
|
||||
// If validateUTF8 is false, then this allows the presence of invalid UTF-8
|
||||
// characters within the string itself.
|
||||
// It reports the number of bytes consumed and whether an error was encountered.
|
||||
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
|
||||
func ConsumeString(flags *ValueFlags, b []byte, validateUTF8 bool) (n int, err error) {
|
||||
return ConsumeStringResumable(flags, b, 0, validateUTF8)
|
||||
}
|
||||
|
||||
// ConsumeStringResumable is identical to consumeString but supports resuming
|
||||
// from a previous call that returned io.ErrUnexpectedEOF.
|
||||
func ConsumeStringResumable(flags *ValueFlags, b []byte, resumeOffset int, validateUTF8 bool) (n int, err error) {
|
||||
// Consume the leading double quote.
|
||||
switch {
|
||||
case resumeOffset > 0:
|
||||
n = resumeOffset // already handled the leading quote
|
||||
case uint(len(b)) == 0:
|
||||
return n, io.ErrUnexpectedEOF
|
||||
case b[0] == '"':
|
||||
n++
|
||||
default:
|
||||
return n, NewInvalidCharacterError(b[n:], `at start of string (expecting '"')`)
|
||||
}
|
||||
|
||||
// Consume every character in the string.
|
||||
for uint(len(b)) > uint(n) {
|
||||
// Optimize for long sequences of unescaped characters.
|
||||
noEscape := func(c byte) bool {
|
||||
return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
|
||||
}
|
||||
for uint(len(b)) > uint(n) && noEscape(b[n]) {
|
||||
n++
|
||||
}
|
||||
if uint(len(b)) <= uint(n) {
|
||||
return n, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
// Check for terminating double quote.
|
||||
if b[n] == '"' {
|
||||
n++
|
||||
return n, nil
|
||||
}
|
||||
|
||||
switch r, rn := utf8.DecodeRune(b[n:]); {
|
||||
// Handle UTF-8 encoded byte sequence.
|
||||
// Due to specialized handling of ASCII above, we know that
|
||||
// all normal sequences at this point must be 2 bytes or larger.
|
||||
case rn > 1:
|
||||
n += rn
|
||||
// Handle escape sequence.
|
||||
case r == '\\':
|
||||
flags.Join(stringNonVerbatim)
|
||||
resumeOffset = n
|
||||
if uint(len(b)) < uint(n+2) {
|
||||
return resumeOffset, io.ErrUnexpectedEOF
|
||||
}
|
||||
switch r := b[n+1]; r {
|
||||
case '/':
|
||||
// Forward slash is the only character with 3 representations.
|
||||
// Per RFC 8785, section 3.2.2.2., this must not be escaped.
|
||||
flags.Join(stringNonCanonical)
|
||||
n += 2
|
||||
case '"', '\\', 'b', 'f', 'n', 'r', 't':
|
||||
n += 2
|
||||
case 'u':
|
||||
if uint(len(b)) < uint(n+6) {
|
||||
if hasEscapedUTF16Prefix(b[n:], false) {
|
||||
return resumeOffset, io.ErrUnexpectedEOF
|
||||
}
|
||||
flags.Join(stringNonCanonical)
|
||||
return n, NewInvalidEscapeSequenceError(b[n:])
|
||||
}
|
||||
v1, ok := parseHexUint16(b[n+2 : n+6])
|
||||
if !ok {
|
||||
flags.Join(stringNonCanonical)
|
||||
return n, NewInvalidEscapeSequenceError(b[n : n+6])
|
||||
}
|
||||
// Only certain control characters can use the \uFFFF notation
|
||||
// for canonical formatting (per RFC 8785, section 3.2.2.2.).
|
||||
switch v1 {
|
||||
// \uFFFF notation not permitted for these characters.
|
||||
case '\b', '\f', '\n', '\r', '\t':
|
||||
flags.Join(stringNonCanonical)
|
||||
default:
|
||||
// \uFFFF notation only permitted for control characters.
|
||||
if v1 >= ' ' {
|
||||
flags.Join(stringNonCanonical)
|
||||
} else {
|
||||
// \uFFFF notation must be lower case.
|
||||
for _, c := range b[n+2 : n+6] {
|
||||
if 'A' <= c && c <= 'F' {
|
||||
flags.Join(stringNonCanonical)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
n += 6
|
||||
|
||||
r := rune(v1)
|
||||
if validateUTF8 && utf16.IsSurrogate(r) {
|
||||
if uint(len(b)) < uint(n+6) {
|
||||
if hasEscapedUTF16Prefix(b[n:], true) {
|
||||
return resumeOffset, io.ErrUnexpectedEOF
|
||||
}
|
||||
flags.Join(stringNonCanonical)
|
||||
return n - 6, NewInvalidEscapeSequenceError(b[n-6:])
|
||||
} else if v2, ok := parseHexUint16(b[n+2 : n+6]); b[n] != '\\' || b[n+1] != 'u' || !ok {
|
||||
flags.Join(stringNonCanonical)
|
||||
return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6])
|
||||
} else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError {
|
||||
flags.Join(stringNonCanonical)
|
||||
return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6])
|
||||
} else {
|
||||
n += 6
|
||||
}
|
||||
}
|
||||
default:
|
||||
flags.Join(stringNonCanonical)
|
||||
return n, NewInvalidEscapeSequenceError(b[n : n+2])
|
||||
}
|
||||
// Handle invalid UTF-8.
|
||||
case r == utf8.RuneError:
|
||||
if !utf8.FullRune(b[n:]) {
|
||||
return n, io.ErrUnexpectedEOF
|
||||
}
|
||||
flags.Join(stringNonVerbatim | stringNonCanonical)
|
||||
if validateUTF8 {
|
||||
return n, ErrInvalidUTF8
|
||||
}
|
||||
n++
|
||||
// Handle invalid control characters.
|
||||
case r < ' ':
|
||||
flags.Join(stringNonVerbatim | stringNonCanonical)
|
||||
return n, NewInvalidCharacterError(b[n:], "in string (expecting non-control character)")
|
||||
default:
|
||||
panic("BUG: unhandled character " + QuoteRune(b[n:]))
|
||||
}
|
||||
}
|
||||
return n, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
// AppendUnquote appends the unescaped form of a JSON string in src to dst.
|
||||
// Any invalid UTF-8 within the string will be replaced with utf8.RuneError,
|
||||
// but the error will be specified as having encountered such an error.
|
||||
// The input must be an entire JSON string with no surrounding whitespace.
|
||||
func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) (v []byte, err error) {
|
||||
dst = slices.Grow(dst, len(src))
|
||||
|
||||
// Consume the leading double quote.
|
||||
var i, n int
|
||||
switch {
|
||||
case uint(len(src)) == 0:
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
case src[0] == '"':
|
||||
i, n = 1, 1
|
||||
default:
|
||||
return dst, NewInvalidCharacterError(src, `at start of string (expecting '"')`)
|
||||
}
|
||||
|
||||
// Consume every character in the string.
|
||||
for uint(len(src)) > uint(n) {
|
||||
// Optimize for long sequences of unescaped characters.
|
||||
noEscape := func(c byte) bool {
|
||||
return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
|
||||
}
|
||||
for uint(len(src)) > uint(n) && noEscape(src[n]) {
|
||||
n++
|
||||
}
|
||||
if uint(len(src)) <= uint(n) {
|
||||
dst = append(dst, src[i:n]...)
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
// Check for terminating double quote.
|
||||
if src[n] == '"' {
|
||||
dst = append(dst, src[i:n]...)
|
||||
n++
|
||||
if n < len(src) {
|
||||
err = NewInvalidCharacterError(src[n:], "after string value")
|
||||
}
|
||||
return dst, err
|
||||
}
|
||||
|
||||
switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); {
|
||||
// Handle UTF-8 encoded byte sequence.
|
||||
// Due to specialized handling of ASCII above, we know that
|
||||
// all normal sequences at this point must be 2 bytes or larger.
|
||||
case rn > 1:
|
||||
n += rn
|
||||
// Handle escape sequence.
|
||||
case r == '\\':
|
||||
dst = append(dst, src[i:n]...)
|
||||
|
||||
// Handle escape sequence.
|
||||
if uint(len(src)) < uint(n+2) {
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
switch r := src[n+1]; r {
|
||||
case '"', '\\', '/':
|
||||
dst = append(dst, r)
|
||||
n += 2
|
||||
case 'b':
|
||||
dst = append(dst, '\b')
|
||||
n += 2
|
||||
case 'f':
|
||||
dst = append(dst, '\f')
|
||||
n += 2
|
||||
case 'n':
|
||||
dst = append(dst, '\n')
|
||||
n += 2
|
||||
case 'r':
|
||||
dst = append(dst, '\r')
|
||||
n += 2
|
||||
case 't':
|
||||
dst = append(dst, '\t')
|
||||
n += 2
|
||||
case 'u':
|
||||
if uint(len(src)) < uint(n+6) {
|
||||
if hasEscapedUTF16Prefix(src[n:], false) {
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
return dst, NewInvalidEscapeSequenceError(src[n:])
|
||||
}
|
||||
v1, ok := parseHexUint16(src[n+2 : n+6])
|
||||
if !ok {
|
||||
return dst, NewInvalidEscapeSequenceError(src[n : n+6])
|
||||
}
|
||||
n += 6
|
||||
|
||||
// Check whether this is a surrogate half.
|
||||
r := rune(v1)
|
||||
if utf16.IsSurrogate(r) {
|
||||
r = utf8.RuneError // assume failure unless the following succeeds
|
||||
if uint(len(src)) < uint(n+6) {
|
||||
if hasEscapedUTF16Prefix(src[n:], true) {
|
||||
return utf8.AppendRune(dst, r), io.ErrUnexpectedEOF
|
||||
}
|
||||
err = NewInvalidEscapeSequenceError(src[n-6:])
|
||||
} else if v2, ok := parseHexUint16(src[n+2 : n+6]); src[n] != '\\' || src[n+1] != 'u' || !ok {
|
||||
err = NewInvalidEscapeSequenceError(src[n-6 : n+6])
|
||||
} else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError {
|
||||
err = NewInvalidEscapeSequenceError(src[n-6 : n+6])
|
||||
} else {
|
||||
n += 6
|
||||
}
|
||||
}
|
||||
|
||||
dst = utf8.AppendRune(dst, r)
|
||||
default:
|
||||
return dst, NewInvalidEscapeSequenceError(src[n : n+2])
|
||||
}
|
||||
i = n
|
||||
// Handle invalid UTF-8.
|
||||
case r == utf8.RuneError:
|
||||
dst = append(dst, src[i:n]...)
|
||||
if !utf8.FullRuneInString(string(truncateMaxUTF8(src[n:]))) {
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
// NOTE: An unescaped string may be longer than the escaped string
|
||||
// because invalid UTF-8 bytes are being replaced.
|
||||
dst = append(dst, "\uFFFD"...)
|
||||
n += rn
|
||||
i = n
|
||||
err = ErrInvalidUTF8
|
||||
// Handle invalid control characters.
|
||||
case r < ' ':
|
||||
dst = append(dst, src[i:n]...)
|
||||
return dst, NewInvalidCharacterError(src[n:], "in string (expecting non-control character)")
|
||||
default:
|
||||
panic("BUG: unhandled character " + QuoteRune(src[n:]))
|
||||
}
|
||||
}
|
||||
dst = append(dst, src[i:n]...)
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
// hasEscapedUTF16Prefix reports whether b is possibly
|
||||
// the truncated prefix of a \uFFFF escape sequence.
|
||||
func hasEscapedUTF16Prefix[Bytes ~[]byte | ~string](b Bytes, lowerSurrogateHalf bool) bool {
|
||||
for i := range len(b) {
|
||||
switch c := b[i]; {
|
||||
case i == 0 && c != '\\':
|
||||
return false
|
||||
case i == 1 && c != 'u':
|
||||
return false
|
||||
case i == 2 && lowerSurrogateHalf && c != 'd' && c != 'D':
|
||||
return false // not within ['\uDC00':'\uDFFF']
|
||||
case i == 3 && lowerSurrogateHalf && !('c' <= c && c <= 'f') && !('C' <= c && c <= 'F'):
|
||||
return false // not within ['\uDC00':'\uDFFF']
|
||||
case i >= 2 && i < 6 && !('0' <= c && c <= '9') && !('a' <= c && c <= 'f') && !('A' <= c && c <= 'F'):
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// UnquoteMayCopy returns the unescaped form of b.
|
||||
// If there are no escaped characters, the output is simply a subslice of
|
||||
// the input with the surrounding quotes removed.
|
||||
// Otherwise, a new buffer is allocated for the output.
|
||||
// It assumes the input is valid.
|
||||
func UnquoteMayCopy(b []byte, isVerbatim bool) []byte {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if isVerbatim {
|
||||
return b[len(`"`) : len(b)-len(`"`)]
|
||||
}
|
||||
b, _ = AppendUnquote(nil, b)
|
||||
return b
|
||||
}
|
||||
|
||||
// ConsumeSimpleNumber consumes the next JSON number per RFC 7159, section 6
|
||||
// but is limited to the grammar for a positive integer.
|
||||
// It returns 0 if it is invalid or more complicated than a simple integer,
|
||||
// in which case consumeNumber should be called.
|
||||
func ConsumeSimpleNumber(b []byte) (n int) {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if len(b) > 0 {
|
||||
if b[0] == '0' {
|
||||
n++
|
||||
} else if '1' <= b[0] && b[0] <= '9' {
|
||||
n++
|
||||
for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
} else {
|
||||
return 0
|
||||
}
|
||||
if uint(len(b)) <= uint(n) || (b[n] != '.' && b[n] != 'e' && b[n] != 'E') {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type ConsumeNumberState uint
|
||||
|
||||
const (
|
||||
consumeNumberInit ConsumeNumberState = iota
|
||||
beforeIntegerDigits
|
||||
withinIntegerDigits
|
||||
beforeFractionalDigits
|
||||
withinFractionalDigits
|
||||
beforeExponentDigits
|
||||
withinExponentDigits
|
||||
)
|
||||
|
||||
// ConsumeNumber consumes the next JSON number per RFC 7159, section 6.
|
||||
// It reports the number of bytes consumed and whether an error was encountered.
|
||||
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
|
||||
//
|
||||
// Note that JSON numbers are not self-terminating.
|
||||
// If the entire input is consumed, then the caller needs to consider whether
|
||||
// there may be subsequent unread data that may still be part of this number.
|
||||
func ConsumeNumber(b []byte) (n int, err error) {
|
||||
n, _, err = ConsumeNumberResumable(b, 0, consumeNumberInit)
|
||||
return n, err
|
||||
}
|
||||
|
||||
// ConsumeNumberResumable is identical to consumeNumber but supports resuming
|
||||
// from a previous call that returned io.ErrUnexpectedEOF.
|
||||
func ConsumeNumberResumable(b []byte, resumeOffset int, state ConsumeNumberState) (n int, _ ConsumeNumberState, err error) {
|
||||
// Jump to the right state when resuming from a partial consumption.
|
||||
n = resumeOffset
|
||||
if state > consumeNumberInit {
|
||||
switch state {
|
||||
case withinIntegerDigits, withinFractionalDigits, withinExponentDigits:
|
||||
// Consume leading digits.
|
||||
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
if uint(len(b)) <= uint(n) {
|
||||
return n, state, nil // still within the same state
|
||||
}
|
||||
state++ // switches "withinX" to "beforeY" where Y is the state after X
|
||||
}
|
||||
switch state {
|
||||
case beforeIntegerDigits:
|
||||
goto beforeInteger
|
||||
case beforeFractionalDigits:
|
||||
goto beforeFractional
|
||||
case beforeExponentDigits:
|
||||
goto beforeExponent
|
||||
default:
|
||||
return n, state, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Consume required integer component (with optional minus sign).
|
||||
beforeInteger:
|
||||
resumeOffset = n
|
||||
if uint(len(b)) > 0 && b[0] == '-' {
|
||||
n++
|
||||
}
|
||||
switch {
|
||||
case uint(len(b)) <= uint(n):
|
||||
return resumeOffset, beforeIntegerDigits, io.ErrUnexpectedEOF
|
||||
case b[n] == '0':
|
||||
n++
|
||||
state = beforeFractionalDigits
|
||||
case '1' <= b[n] && b[n] <= '9':
|
||||
n++
|
||||
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
state = withinIntegerDigits
|
||||
default:
|
||||
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
|
||||
}
|
||||
|
||||
// Consume optional fractional component.
|
||||
beforeFractional:
|
||||
if uint(len(b)) > uint(n) && b[n] == '.' {
|
||||
resumeOffset = n
|
||||
n++
|
||||
switch {
|
||||
case uint(len(b)) <= uint(n):
|
||||
return resumeOffset, beforeFractionalDigits, io.ErrUnexpectedEOF
|
||||
case '0' <= b[n] && b[n] <= '9':
|
||||
n++
|
||||
default:
|
||||
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
|
||||
}
|
||||
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
state = withinFractionalDigits
|
||||
}
|
||||
|
||||
// Consume optional exponent component.
|
||||
beforeExponent:
|
||||
if uint(len(b)) > uint(n) && (b[n] == 'e' || b[n] == 'E') {
|
||||
resumeOffset = n
|
||||
n++
|
||||
if uint(len(b)) > uint(n) && (b[n] == '-' || b[n] == '+') {
|
||||
n++
|
||||
}
|
||||
switch {
|
||||
case uint(len(b)) <= uint(n):
|
||||
return resumeOffset, beforeExponentDigits, io.ErrUnexpectedEOF
|
||||
case '0' <= b[n] && b[n] <= '9':
|
||||
n++
|
||||
default:
|
||||
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
|
||||
}
|
||||
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
state = withinExponentDigits
|
||||
}
|
||||
|
||||
return n, state, nil
|
||||
}
|
||||
|
||||
// parseHexUint16 is similar to strconv.ParseUint,
|
||||
// but operates directly on []byte and is optimized for base-16.
|
||||
// See https://go.dev/issue/42429.
|
||||
func parseHexUint16[Bytes ~[]byte | ~string](b Bytes) (v uint16, ok bool) {
|
||||
if len(b) != 4 {
|
||||
return 0, false
|
||||
}
|
||||
for i := range 4 {
|
||||
c := b[i]
|
||||
switch {
|
||||
case '0' <= c && c <= '9':
|
||||
c = c - '0'
|
||||
case 'a' <= c && c <= 'f':
|
||||
c = 10 + c - 'a'
|
||||
case 'A' <= c && c <= 'F':
|
||||
c = 10 + c - 'A'
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
v = v*16 + uint16(c)
|
||||
}
|
||||
return v, true
|
||||
}
|
||||
|
||||
// ParseUint parses b as a decimal unsigned integer according to
|
||||
// a strict subset of the JSON number grammar, returning the value if valid.
|
||||
// It returns (0, false) if there is a syntax error and
|
||||
// returns (math.MaxUint64, false) if there is an overflow.
|
||||
func ParseUint(b []byte) (v uint64, ok bool) {
|
||||
const unsafeWidth = 20 // len(fmt.Sprint(uint64(math.MaxUint64)))
|
||||
var n int
|
||||
for ; len(b) > n && ('0' <= b[n] && b[n] <= '9'); n++ {
|
||||
v = 10*v + uint64(b[n]-'0')
|
||||
}
|
||||
switch {
|
||||
case n == 0 || len(b) != n || (b[0] == '0' && string(b) != "0"):
|
||||
return 0, false
|
||||
case n >= unsafeWidth && (b[0] != '1' || v < 1e19 || n > unsafeWidth):
|
||||
return math.MaxUint64, false
|
||||
}
|
||||
return v, true
|
||||
}
|
||||
|
||||
// ParseFloat parses a floating point number according to the Go float grammar.
|
||||
// Note that the JSON number grammar is a strict subset.
|
||||
//
|
||||
// If the number overflows the finite representation of a float,
|
||||
// then we return MaxFloat since any finite value will always be infinitely
|
||||
// more accurate at representing another finite value than an infinite value.
|
||||
func ParseFloat(b []byte, bits int) (v float64, ok bool) {
|
||||
fv, err := strconv.ParseFloat(string(b), bits)
|
||||
if math.IsInf(fv, 0) {
|
||||
switch {
|
||||
case bits == 32 && math.IsInf(fv, +1):
|
||||
fv = +math.MaxFloat32
|
||||
case bits == 64 && math.IsInf(fv, +1):
|
||||
fv = +math.MaxFloat64
|
||||
case bits == 32 && math.IsInf(fv, -1):
|
||||
fv = -math.MaxFloat32
|
||||
case bits == 64 && math.IsInf(fv, -1):
|
||||
fv = -math.MaxFloat64
|
||||
}
|
||||
}
|
||||
return fv, err == nil
|
||||
}
|
||||
443
pkg/encoders/json/internal/jsonwire/decode_test.go
Normal file
443
pkg/encoders/json/internal/jsonwire/decode_test.go
Normal file
@@ -0,0 +1,443 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsonwire
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"math"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestConsumeWhitespace(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
want int
|
||||
}{
|
||||
{"", 0},
|
||||
{"a", 0},
|
||||
{" a", 1},
|
||||
{" a ", 1},
|
||||
{" \n\r\ta", 4},
|
||||
{" \n\r\t \n\r\t \n\r\t \n\r\t", 16},
|
||||
{"\u00a0", 0}, // non-breaking space is not JSON whitespace
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("", func(t *testing.T) {
|
||||
if got := ConsumeWhitespace([]byte(tt.in)); got != tt.want {
|
||||
t.Errorf("ConsumeWhitespace(%q) = %v, want %v", tt.in, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConsumeLiteral(t *testing.T) {
|
||||
tests := []struct {
|
||||
literal string
|
||||
in string
|
||||
want int
|
||||
wantErr error
|
||||
}{
|
||||
{"null", "", 0, io.ErrUnexpectedEOF},
|
||||
{"null", "n", 1, io.ErrUnexpectedEOF},
|
||||
{"null", "nu", 2, io.ErrUnexpectedEOF},
|
||||
{"null", "nul", 3, io.ErrUnexpectedEOF},
|
||||
{"null", "null", 4, nil},
|
||||
{"null", "nullx", 4, nil},
|
||||
{"null", "x", 0, NewInvalidCharacterError("x", "in literal null (expecting 'n')")},
|
||||
{"null", "nuxx", 2, NewInvalidCharacterError("x", "in literal null (expecting 'l')")},
|
||||
|
||||
{"false", "", 0, io.ErrUnexpectedEOF},
|
||||
{"false", "f", 1, io.ErrUnexpectedEOF},
|
||||
{"false", "fa", 2, io.ErrUnexpectedEOF},
|
||||
{"false", "fal", 3, io.ErrUnexpectedEOF},
|
||||
{"false", "fals", 4, io.ErrUnexpectedEOF},
|
||||
{"false", "false", 5, nil},
|
||||
{"false", "falsex", 5, nil},
|
||||
{"false", "x", 0, NewInvalidCharacterError("x", "in literal false (expecting 'f')")},
|
||||
{"false", "falsx", 4, NewInvalidCharacterError("x", "in literal false (expecting 'e')")},
|
||||
|
||||
{"true", "", 0, io.ErrUnexpectedEOF},
|
||||
{"true", "t", 1, io.ErrUnexpectedEOF},
|
||||
{"true", "tr", 2, io.ErrUnexpectedEOF},
|
||||
{"true", "tru", 3, io.ErrUnexpectedEOF},
|
||||
{"true", "true", 4, nil},
|
||||
{"true", "truex", 4, nil},
|
||||
{"true", "x", 0, NewInvalidCharacterError("x", "in literal true (expecting 't')")},
|
||||
{"true", "trux", 3, NewInvalidCharacterError("x", "in literal true (expecting 'e')")},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("", func(t *testing.T) {
|
||||
var got int
|
||||
switch tt.literal {
|
||||
case "null":
|
||||
got = ConsumeNull([]byte(tt.in))
|
||||
case "false":
|
||||
got = ConsumeFalse([]byte(tt.in))
|
||||
case "true":
|
||||
got = ConsumeTrue([]byte(tt.in))
|
||||
default:
|
||||
t.Errorf("invalid literal: %v", tt.literal)
|
||||
}
|
||||
switch {
|
||||
case tt.wantErr == nil && got != tt.want:
|
||||
t.Errorf("Consume%v(%q) = %v, want %v", strings.Title(tt.literal), tt.in, got, tt.want)
|
||||
case tt.wantErr != nil && got != 0:
|
||||
t.Errorf("Consume%v(%q) = %v, want %v", strings.Title(tt.literal), tt.in, got, 0)
|
||||
}
|
||||
|
||||
got, gotErr := ConsumeLiteral([]byte(tt.in), tt.literal)
|
||||
if got != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) {
|
||||
t.Errorf("ConsumeLiteral(%q, %q) = (%v, %v), want (%v, %v)", tt.in, tt.literal, got, gotErr, tt.want, tt.wantErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConsumeString(t *testing.T) {
|
||||
var errPrev = errors.New("same as previous error")
|
||||
tests := []struct {
|
||||
in string
|
||||
simple bool
|
||||
want int
|
||||
wantUTF8 int // consumed bytes if validateUTF8 is specified
|
||||
wantFlags ValueFlags
|
||||
wantUnquote string
|
||||
wantErr error
|
||||
wantErrUTF8 error // error if validateUTF8 is specified
|
||||
wantErrUnquote error
|
||||
}{
|
||||
{``, false, 0, 0, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"`, false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`""`, true, 2, 2, 0, "", nil, nil, nil},
|
||||
{`""x`, true, 2, 2, 0, "", nil, nil, NewInvalidCharacterError("x", "after string value")},
|
||||
{` ""x`, false, 0, 0, 0, "", NewInvalidCharacterError(" ", "at start of string (expecting '\"')"), errPrev, errPrev},
|
||||
{`"hello`, false, 6, 6, 0, "hello", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"hello"`, true, 7, 7, 0, "hello", nil, nil, nil},
|
||||
{"\"\x00\"", false, 1, 1, stringNonVerbatim | stringNonCanonical, "", NewInvalidCharacterError("\x00", "in string (expecting non-control character)"), errPrev, errPrev},
|
||||
{`"\u0000"`, false, 8, 8, stringNonVerbatim, "\x00", nil, nil, nil},
|
||||
{"\"\x1f\"", false, 1, 1, stringNonVerbatim | stringNonCanonical, "", NewInvalidCharacterError("\x1f", "in string (expecting non-control character)"), errPrev, errPrev},
|
||||
{`"\u001f"`, false, 8, 8, stringNonVerbatim, "\x1f", nil, nil, nil},
|
||||
{`"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"`, true, 54, 54, 0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", nil, nil, nil},
|
||||
{"\" !#$%'()*+,-./0123456789:;=?@[]^_`{|}~\x7f\"", true, 41, 41, 0, " !#$%'()*+,-./0123456789:;=?@[]^_`{|}~\x7f", nil, nil, nil},
|
||||
{`"&"`, false, 3, 3, 0, "&", nil, nil, nil},
|
||||
{`"<"`, false, 3, 3, 0, "<", nil, nil, nil},
|
||||
{`">"`, false, 3, 3, 0, ">", nil, nil, nil},
|
||||
{"\"x\x80\"", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd", nil, ErrInvalidUTF8, errPrev},
|
||||
{"\"x\xff\"", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd", nil, ErrInvalidUTF8, errPrev},
|
||||
{"\"x\xc0", false, 3, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd", io.ErrUnexpectedEOF, ErrInvalidUTF8, io.ErrUnexpectedEOF},
|
||||
{"\"x\xc0\x80\"", false, 5, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd", nil, ErrInvalidUTF8, errPrev},
|
||||
{"\"x\xe0", false, 2, 2, 0, "x", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{"\"x\xe0\x80", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd", io.ErrUnexpectedEOF, ErrInvalidUTF8, io.ErrUnexpectedEOF},
|
||||
{"\"x\xe0\x80\x80\"", false, 6, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd", nil, ErrInvalidUTF8, errPrev},
|
||||
{"\"x\xf0", false, 2, 2, 0, "x", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{"\"x\xf0\x80", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd", io.ErrUnexpectedEOF, ErrInvalidUTF8, io.ErrUnexpectedEOF},
|
||||
{"\"x\xf0\x80\x80", false, 5, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd", io.ErrUnexpectedEOF, ErrInvalidUTF8, io.ErrUnexpectedEOF},
|
||||
{"\"x\xf0\x80\x80\x80\"", false, 7, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd\ufffd", nil, ErrInvalidUTF8, errPrev},
|
||||
{"\"x\xed\xba\xad\"", false, 6, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd", nil, ErrInvalidUTF8, errPrev},
|
||||
{"\"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602\"", false, 25, 25, 0, "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", nil, nil, nil},
|
||||
{`"¢"`[:2], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"¢"`[:3], false, 3, 3, 0, "¢", io.ErrUnexpectedEOF, errPrev, errPrev}, // missing terminating quote
|
||||
{`"¢"`[:4], false, 4, 4, 0, "¢", nil, nil, nil},
|
||||
{`"€"`[:2], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"€"`[:3], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"€"`[:4], false, 4, 4, 0, "€", io.ErrUnexpectedEOF, errPrev, errPrev}, // missing terminating quote
|
||||
{`"€"`[:5], false, 5, 5, 0, "€", nil, nil, nil},
|
||||
{`"𐍈"`[:2], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"𐍈"`[:3], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"𐍈"`[:4], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"𐍈"`[:5], false, 5, 5, 0, "𐍈", io.ErrUnexpectedEOF, errPrev, errPrev}, // missing terminating quote
|
||||
{`"𐍈"`[:6], false, 6, 6, 0, "𐍈", nil, nil, nil},
|
||||
{`"x\`, false, 2, 2, stringNonVerbatim, "x", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"x\"`, false, 4, 4, stringNonVerbatim, "x\"", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"x\x"`, false, 2, 2, stringNonVerbatim | stringNonCanonical, "x", NewInvalidEscapeSequenceError(`\x`), errPrev, errPrev},
|
||||
{`"\"\\\b\f\n\r\t"`, false, 16, 16, stringNonVerbatim, "\"\\\b\f\n\r\t", nil, nil, nil},
|
||||
{`"/"`, true, 3, 3, 0, "/", nil, nil, nil},
|
||||
{`"\/"`, false, 4, 4, stringNonVerbatim | stringNonCanonical, "/", nil, nil, nil},
|
||||
{`"\u002f"`, false, 8, 8, stringNonVerbatim | stringNonCanonical, "/", nil, nil, nil},
|
||||
{`"\u`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"\uf`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"\uff`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"\ufff`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"\ufffd`, false, 7, 7, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"\ufffd"`, false, 8, 8, stringNonVerbatim | stringNonCanonical, "\ufffd", nil, nil, nil},
|
||||
{`"\uABCD"`, false, 8, 8, stringNonVerbatim | stringNonCanonical, "\uabcd", nil, nil, nil},
|
||||
{`"\uefX0"`, false, 1, 1, stringNonVerbatim | stringNonCanonical, "", NewInvalidEscapeSequenceError(`\uefX0`), errPrev, errPrev},
|
||||
{`"\uDEAD`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"\uDEAD"`, false, 8, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", nil, NewInvalidEscapeSequenceError(`\uDEAD"`), errPrev},
|
||||
{`"\uDEAD______"`, false, 14, 1, stringNonVerbatim | stringNonCanonical, "\ufffd______", nil, NewInvalidEscapeSequenceError(`\uDEAD______`), errPrev},
|
||||
{`"\uDEAD\uXXXX"`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", NewInvalidEscapeSequenceError(`\uXXXX`), NewInvalidEscapeSequenceError(`\uDEAD\uXXXX`), NewInvalidEscapeSequenceError(`\uXXXX`)},
|
||||
{`"\uDEAD\uBEEF"`, false, 14, 1, stringNonVerbatim | stringNonCanonical, "\ufffd\ubeef", nil, NewInvalidEscapeSequenceError(`\uDEAD\uBEEF`), errPrev},
|
||||
{`"\uD800\udea`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, errPrev, errPrev},
|
||||
{`"\uD800\udb`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, NewInvalidEscapeSequenceError(`\uD800\udb`), io.ErrUnexpectedEOF},
|
||||
{`"\uD800\udead"`, false, 14, 14, stringNonVerbatim | stringNonCanonical, "\U000102ad", nil, nil, nil},
|
||||
{`"\u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009"`, false, 50, 50, stringNonVerbatim | stringNonCanonical, "\"\\/\b\f\n\r\t", nil, nil, nil},
|
||||
{`"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\ud83d\ude02"`, false, 56, 56, stringNonVerbatim | stringNonCanonical, "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", nil, nil, nil},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("", func(t *testing.T) {
|
||||
if tt.wantErrUTF8 == errPrev {
|
||||
tt.wantErrUTF8 = tt.wantErr
|
||||
}
|
||||
if tt.wantErrUnquote == errPrev {
|
||||
tt.wantErrUnquote = tt.wantErrUTF8
|
||||
}
|
||||
|
||||
switch got := ConsumeSimpleString([]byte(tt.in)); {
|
||||
case tt.simple && got != tt.want:
|
||||
t.Errorf("consumeSimpleString(%q) = %v, want %v", tt.in, got, tt.want)
|
||||
case !tt.simple && got != 0:
|
||||
t.Errorf("consumeSimpleString(%q) = %v, want %v", tt.in, got, 0)
|
||||
}
|
||||
|
||||
var gotFlags ValueFlags
|
||||
got, gotErr := ConsumeString(&gotFlags, []byte(tt.in), false)
|
||||
if gotFlags != tt.wantFlags {
|
||||
t.Errorf("consumeString(%q, false) flags = %v, want %v", tt.in, gotFlags, tt.wantFlags)
|
||||
}
|
||||
if got != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) {
|
||||
t.Errorf("consumeString(%q, false) = (%v, %v), want (%v, %v)", tt.in, got, gotErr, tt.want, tt.wantErr)
|
||||
}
|
||||
|
||||
got, gotErr = ConsumeString(&gotFlags, []byte(tt.in), true)
|
||||
if got != tt.wantUTF8 || !reflect.DeepEqual(gotErr, tt.wantErrUTF8) {
|
||||
t.Errorf("consumeString(%q, false) = (%v, %v), want (%v, %v)", tt.in, got, gotErr, tt.wantUTF8, tt.wantErrUTF8)
|
||||
}
|
||||
|
||||
gotUnquote, gotErr := AppendUnquote(nil, tt.in)
|
||||
if string(gotUnquote) != tt.wantUnquote || !reflect.DeepEqual(gotErr, tt.wantErrUnquote) {
|
||||
t.Errorf("AppendUnquote(nil, %q) = (%q, %v), want (%q, %v)", tt.in[:got], gotUnquote, gotErr, tt.wantUnquote, tt.wantErrUnquote)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConsumeNumber(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
simple bool
|
||||
want int
|
||||
wantErr error
|
||||
}{
|
||||
{"", false, 0, io.ErrUnexpectedEOF},
|
||||
{`"NaN"`, false, 0, NewInvalidCharacterError("\"", "in number (expecting digit)")},
|
||||
{`"Infinity"`, false, 0, NewInvalidCharacterError("\"", "in number (expecting digit)")},
|
||||
{`"-Infinity"`, false, 0, NewInvalidCharacterError("\"", "in number (expecting digit)")},
|
||||
{".0", false, 0, NewInvalidCharacterError(".", "in number (expecting digit)")},
|
||||
{"0", true, 1, nil},
|
||||
{"-0", false, 2, nil},
|
||||
{"+0", false, 0, NewInvalidCharacterError("+", "in number (expecting digit)")},
|
||||
{"1", true, 1, nil},
|
||||
{"-1", false, 2, nil},
|
||||
{"00", true, 1, nil},
|
||||
{"-00", false, 2, nil},
|
||||
{"01", true, 1, nil},
|
||||
{"-01", false, 2, nil},
|
||||
{"0i", true, 1, nil},
|
||||
{"-0i", false, 2, nil},
|
||||
{"0f", true, 1, nil},
|
||||
{"-0f", false, 2, nil},
|
||||
{"9876543210", true, 10, nil},
|
||||
{"-9876543210", false, 11, nil},
|
||||
{"9876543210x", true, 10, nil},
|
||||
{"-9876543210x", false, 11, nil},
|
||||
{" 9876543210", true, 0, NewInvalidCharacterError(" ", "in number (expecting digit)")},
|
||||
{"- 9876543210", false, 1, NewInvalidCharacterError(" ", "in number (expecting digit)")},
|
||||
{strings.Repeat("9876543210", 1000), true, 10000, nil},
|
||||
{"-" + strings.Repeat("9876543210", 1000), false, 1 + 10000, nil},
|
||||
{"0.", false, 1, io.ErrUnexpectedEOF},
|
||||
{"-0.", false, 2, io.ErrUnexpectedEOF},
|
||||
{"0e", false, 1, io.ErrUnexpectedEOF},
|
||||
{"-0e", false, 2, io.ErrUnexpectedEOF},
|
||||
{"0E", false, 1, io.ErrUnexpectedEOF},
|
||||
{"-0E", false, 2, io.ErrUnexpectedEOF},
|
||||
{"0.0", false, 3, nil},
|
||||
{"-0.0", false, 4, nil},
|
||||
{"0e0", false, 3, nil},
|
||||
{"-0e0", false, 4, nil},
|
||||
{"0E0", false, 3, nil},
|
||||
{"-0E0", false, 4, nil},
|
||||
{"0.0123456789", false, 12, nil},
|
||||
{"-0.0123456789", false, 13, nil},
|
||||
{"1.f", false, 2, NewInvalidCharacterError("f", "in number (expecting digit)")},
|
||||
{"-1.f", false, 3, NewInvalidCharacterError("f", "in number (expecting digit)")},
|
||||
{"1.e", false, 2, NewInvalidCharacterError("e", "in number (expecting digit)")},
|
||||
{"-1.e", false, 3, NewInvalidCharacterError("e", "in number (expecting digit)")},
|
||||
{"1e0", false, 3, nil},
|
||||
{"-1e0", false, 4, nil},
|
||||
{"1E0", false, 3, nil},
|
||||
{"-1E0", false, 4, nil},
|
||||
{"1Ex", false, 2, NewInvalidCharacterError("x", "in number (expecting digit)")},
|
||||
{"-1Ex", false, 3, NewInvalidCharacterError("x", "in number (expecting digit)")},
|
||||
{"1e-0", false, 4, nil},
|
||||
{"-1e-0", false, 5, nil},
|
||||
{"1e+0", false, 4, nil},
|
||||
{"-1e+0", false, 5, nil},
|
||||
{"1E-0", false, 4, nil},
|
||||
{"-1E-0", false, 5, nil},
|
||||
{"1E+0", false, 4, nil},
|
||||
{"-1E+0", false, 5, nil},
|
||||
{"1E+00500", false, 8, nil},
|
||||
{"-1E+00500", false, 9, nil},
|
||||
{"1E+00500x", false, 8, nil},
|
||||
{"-1E+00500x", false, 9, nil},
|
||||
{"9876543210.0123456789e+01234589x", false, 31, nil},
|
||||
{"-9876543210.0123456789e+01234589x", false, 32, nil},
|
||||
{"1_000_000", true, 1, nil},
|
||||
{"0x12ef", true, 1, nil},
|
||||
{"0x1p-2", true, 1, nil},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("", func(t *testing.T) {
|
||||
switch got := ConsumeSimpleNumber([]byte(tt.in)); {
|
||||
case tt.simple && got != tt.want:
|
||||
t.Errorf("ConsumeSimpleNumber(%q) = %v, want %v", tt.in, got, tt.want)
|
||||
case !tt.simple && got != 0:
|
||||
t.Errorf("ConsumeSimpleNumber(%q) = %v, want %v", tt.in, got, 0)
|
||||
}
|
||||
|
||||
got, gotErr := ConsumeNumber([]byte(tt.in))
|
||||
if got != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) {
|
||||
t.Errorf("ConsumeNumber(%q) = (%v, %v), want (%v, %v)", tt.in, got, gotErr, tt.want, tt.wantErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseHexUint16(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
want uint16
|
||||
wantOk bool
|
||||
}{
|
||||
{"", 0, false},
|
||||
{"a", 0, false},
|
||||
{"ab", 0, false},
|
||||
{"abc", 0, false},
|
||||
{"abcd", 0xabcd, true},
|
||||
{"abcde", 0, false},
|
||||
{"9eA1", 0x9ea1, true},
|
||||
{"gggg", 0, false},
|
||||
{"0000", 0x0000, true},
|
||||
{"1234", 0x1234, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("", func(t *testing.T) {
|
||||
got, gotOk := parseHexUint16([]byte(tt.in))
|
||||
if got != tt.want || gotOk != tt.wantOk {
|
||||
t.Errorf("parseHexUint16(%q) = (0x%04x, %v), want (0x%04x, %v)", tt.in, got, gotOk, tt.want, tt.wantOk)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseUint(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
want uint64
|
||||
wantOk bool
|
||||
}{
|
||||
{"", 0, false},
|
||||
{"0", 0, true},
|
||||
{"1", 1, true},
|
||||
{"-1", 0, false},
|
||||
{"1f", 0, false},
|
||||
{"00", 0, false},
|
||||
{"01", 0, false},
|
||||
{"10", 10, true},
|
||||
{"10.9", 0, false},
|
||||
{" 10", 0, false},
|
||||
{"10 ", 0, false},
|
||||
{"123456789", 123456789, true},
|
||||
{"123456789d", 0, false},
|
||||
{"18446744073709551614", math.MaxUint64 - 1, true},
|
||||
{"18446744073709551615", math.MaxUint64, true},
|
||||
{"18446744073709551616", math.MaxUint64, false},
|
||||
{"18446744073709551620", math.MaxUint64, false},
|
||||
{"18446744073709551700", math.MaxUint64, false},
|
||||
{"18446744073709552000", math.MaxUint64, false},
|
||||
{"18446744073709560000", math.MaxUint64, false},
|
||||
{"18446744073709600000", math.MaxUint64, false},
|
||||
{"18446744073710000000", math.MaxUint64, false},
|
||||
{"18446744073800000000", math.MaxUint64, false},
|
||||
{"18446744074000000000", math.MaxUint64, false},
|
||||
{"18446744080000000000", math.MaxUint64, false},
|
||||
{"18446744100000000000", math.MaxUint64, false},
|
||||
{"18446745000000000000", math.MaxUint64, false},
|
||||
{"18446750000000000000", math.MaxUint64, false},
|
||||
{"18446800000000000000", math.MaxUint64, false},
|
||||
{"18447000000000000000", math.MaxUint64, false},
|
||||
{"18450000000000000000", math.MaxUint64, false},
|
||||
{"18500000000000000000", math.MaxUint64, false},
|
||||
{"19000000000000000000", math.MaxUint64, false},
|
||||
{"19999999999999999999", math.MaxUint64, false},
|
||||
{"20000000000000000000", math.MaxUint64, false},
|
||||
{"100000000000000000000", math.MaxUint64, false},
|
||||
{"99999999999999999999999999999999", math.MaxUint64, false},
|
||||
{"99999999999999999999999999999999f", 0, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("", func(t *testing.T) {
|
||||
got, gotOk := ParseUint([]byte(tt.in))
|
||||
if got != tt.want || gotOk != tt.wantOk {
|
||||
t.Errorf("ParseUint(%q) = (%v, %v), want (%v, %v)", tt.in, got, gotOk, tt.want, tt.wantOk)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFloat(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
want32 float64
|
||||
want64 float64
|
||||
wantOk bool
|
||||
}{
|
||||
{"0", 0, 0, true},
|
||||
{"-1", -1, -1, true},
|
||||
{"1", 1, 1, true},
|
||||
|
||||
{"-16777215", -16777215, -16777215, true}, // -(1<<24 - 1)
|
||||
{"16777215", 16777215, 16777215, true}, // +(1<<24 - 1)
|
||||
{"-16777216", -16777216, -16777216, true}, // -(1<<24)
|
||||
{"16777216", 16777216, 16777216, true}, // +(1<<24)
|
||||
{"-16777217", -16777216, -16777217, true}, // -(1<<24 + 1)
|
||||
{"16777217", 16777216, 16777217, true}, // +(1<<24 + 1)
|
||||
|
||||
{"-9007199254740991", -9007199254740992, -9007199254740991, true}, // -(1<<53 - 1)
|
||||
{"9007199254740991", 9007199254740992, 9007199254740991, true}, // +(1<<53 - 1)
|
||||
{"-9007199254740992", -9007199254740992, -9007199254740992, true}, // -(1<<53)
|
||||
{"9007199254740992", 9007199254740992, 9007199254740992, true}, // +(1<<53)
|
||||
{"-9007199254740993", -9007199254740992, -9007199254740992, true}, // -(1<<53 + 1)
|
||||
{"9007199254740993", 9007199254740992, 9007199254740992, true}, // +(1<<53 + 1)
|
||||
|
||||
{"-1e1000", -math.MaxFloat32, -math.MaxFloat64, false},
|
||||
{"1e1000", +math.MaxFloat32, +math.MaxFloat64, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("", func(t *testing.T) {
|
||||
got32, gotOk32 := ParseFloat([]byte(tt.in), 32)
|
||||
if got32 != tt.want32 || gotOk32 != tt.wantOk {
|
||||
t.Errorf("ParseFloat(%q, 32) = (%v, %v), want (%v, %v)", tt.in, got32, gotOk32, tt.want32, tt.wantOk)
|
||||
}
|
||||
|
||||
got64, gotOk64 := ParseFloat([]byte(tt.in), 64)
|
||||
if got64 != tt.want64 || gotOk64 != tt.wantOk {
|
||||
t.Errorf("ParseFloat(%q, 64) = (%v, %v), want (%v, %v)", tt.in, got64, gotOk64, tt.want64, tt.wantOk)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
290
pkg/encoders/json/internal/jsonwire/encode.go
Normal file
290
pkg/encoders/json/internal/jsonwire/encode.go
Normal file
@@ -0,0 +1,290 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsonwire
|
||||
|
||||
import (
|
||||
"math"
|
||||
"slices"
|
||||
"strconv"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
|
||||
"encoding/json/internal/jsonflags"
|
||||
)
|
||||
|
||||
// escapeASCII reports whether the ASCII character needs to be escaped.
|
||||
// It conservatively assumes EscapeForHTML.
|
||||
var escapeASCII = [...]uint8{
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
|
||||
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // escape '"' and '&'
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, // escape '<' and '>'
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // escape '\\'
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
}
|
||||
|
||||
// NeedEscape reports whether src needs escaping of any characters.
|
||||
// It conservatively assumes EscapeForHTML and EscapeForJS.
|
||||
// It reports true for inputs with invalid UTF-8.
|
||||
func NeedEscape[Bytes ~[]byte | ~string](src Bytes) bool {
|
||||
var i int
|
||||
for uint(len(src)) > uint(i) {
|
||||
if c := src[i]; c < utf8.RuneSelf {
|
||||
if escapeASCII[c] > 0 {
|
||||
return true
|
||||
}
|
||||
i++
|
||||
} else {
|
||||
r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[i:])))
|
||||
if r == utf8.RuneError || r == '\u2028' || r == '\u2029' {
|
||||
return true
|
||||
}
|
||||
i += rn
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// AppendQuote appends src to dst as a JSON string per RFC 7159, section 7.
|
||||
//
|
||||
// It takes in flags and respects the following:
|
||||
// - EscapeForHTML escapes '<', '>', and '&'.
|
||||
// - EscapeForJS escapes '\u2028' and '\u2029'.
|
||||
// - AllowInvalidUTF8 avoids reporting an error for invalid UTF-8.
|
||||
//
|
||||
// Regardless of whether AllowInvalidUTF8 is specified,
|
||||
// invalid bytes are replaced with the Unicode replacement character ('\ufffd').
|
||||
// If no escape flags are set, then the shortest representable form is used,
|
||||
// which is also the canonical form for strings (RFC 8785, section 3.2.2.2).
|
||||
func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflags.Flags) ([]byte, error) {
|
||||
var i, n int
|
||||
var hasInvalidUTF8 bool
|
||||
dst = slices.Grow(dst, len(`"`)+len(src)+len(`"`))
|
||||
dst = append(dst, '"')
|
||||
for uint(len(src)) > uint(n) {
|
||||
if c := src[n]; c < utf8.RuneSelf {
|
||||
// Handle single-byte ASCII.
|
||||
n++
|
||||
if escapeASCII[c] == 0 {
|
||||
continue // no escaping possibly needed
|
||||
}
|
||||
// Handle escaping of single-byte ASCII.
|
||||
if !(c == '<' || c == '>' || c == '&') || flags.Get(jsonflags.EscapeForHTML) {
|
||||
dst = append(dst, src[i:n-1]...)
|
||||
dst = appendEscapedASCII(dst, c)
|
||||
i = n
|
||||
}
|
||||
} else {
|
||||
// Handle multi-byte Unicode.
|
||||
r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:])))
|
||||
n += rn
|
||||
if r != utf8.RuneError && r != '\u2028' && r != '\u2029' {
|
||||
continue // no escaping possibly needed
|
||||
}
|
||||
// Handle escaping of multi-byte Unicode.
|
||||
switch {
|
||||
case isInvalidUTF8(r, rn):
|
||||
hasInvalidUTF8 = true
|
||||
dst = append(dst, src[i:n-rn]...)
|
||||
dst = append(dst, "\ufffd"...)
|
||||
i = n
|
||||
case (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS):
|
||||
dst = append(dst, src[i:n-rn]...)
|
||||
dst = appendEscapedUnicode(dst, r)
|
||||
i = n
|
||||
}
|
||||
}
|
||||
}
|
||||
dst = append(dst, src[i:n]...)
|
||||
dst = append(dst, '"')
|
||||
if hasInvalidUTF8 && !flags.Get(jsonflags.AllowInvalidUTF8) {
|
||||
return dst, ErrInvalidUTF8
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
func appendEscapedASCII(dst []byte, c byte) []byte {
|
||||
switch c {
|
||||
case '"', '\\':
|
||||
dst = append(dst, '\\', c)
|
||||
case '\b':
|
||||
dst = append(dst, "\\b"...)
|
||||
case '\f':
|
||||
dst = append(dst, "\\f"...)
|
||||
case '\n':
|
||||
dst = append(dst, "\\n"...)
|
||||
case '\r':
|
||||
dst = append(dst, "\\r"...)
|
||||
case '\t':
|
||||
dst = append(dst, "\\t"...)
|
||||
default:
|
||||
dst = appendEscapedUTF16(dst, uint16(c))
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func appendEscapedUnicode(dst []byte, r rune) []byte {
|
||||
if r1, r2 := utf16.EncodeRune(r); r1 != '\ufffd' && r2 != '\ufffd' {
|
||||
dst = appendEscapedUTF16(dst, uint16(r1))
|
||||
dst = appendEscapedUTF16(dst, uint16(r2))
|
||||
} else {
|
||||
dst = appendEscapedUTF16(dst, uint16(r))
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func appendEscapedUTF16(dst []byte, x uint16) []byte {
|
||||
const hex = "0123456789abcdef"
|
||||
return append(dst, '\\', 'u', hex[(x>>12)&0xf], hex[(x>>8)&0xf], hex[(x>>4)&0xf], hex[(x>>0)&0xf])
|
||||
}
|
||||
|
||||
// ReformatString consumes a JSON string from src and appends it to dst,
|
||||
// reformatting it if necessary according to the specified flags.
|
||||
// It returns the appended output and the number of consumed input bytes.
|
||||
func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
|
||||
// TODO: Should this update ValueFlags as input?
|
||||
var valFlags ValueFlags
|
||||
n, err := ConsumeString(&valFlags, src, !flags.Get(jsonflags.AllowInvalidUTF8))
|
||||
if err != nil {
|
||||
return dst, n, err
|
||||
}
|
||||
|
||||
// If the output requires no special escapes, and the input
|
||||
// is already in canonical form or should be preserved verbatim,
|
||||
// then directly copy the input to the output.
|
||||
if !flags.Get(jsonflags.AnyEscape) &&
|
||||
(valFlags.IsCanonical() || flags.Get(jsonflags.PreserveRawStrings)) {
|
||||
dst = append(dst, src[:n]...) // copy the string verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
|
||||
// Under [jsonflags.PreserveRawStrings], any pre-escaped sequences
|
||||
// remain escaped, however we still need to respect the
|
||||
// [jsonflags.EscapeForHTML] and [jsonflags.EscapeForJS] options.
|
||||
if flags.Get(jsonflags.PreserveRawStrings) {
|
||||
var i, lastAppendIndex int
|
||||
for i < n {
|
||||
if c := src[i]; c < utf8.RuneSelf {
|
||||
if (c == '<' || c == '>' || c == '&') && flags.Get(jsonflags.EscapeForHTML) {
|
||||
dst = append(dst, src[lastAppendIndex:i]...)
|
||||
dst = appendEscapedASCII(dst, c)
|
||||
lastAppendIndex = i + 1
|
||||
}
|
||||
i++
|
||||
} else {
|
||||
r, rn := utf8.DecodeRune(truncateMaxUTF8(src[i:]))
|
||||
if (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS) {
|
||||
dst = append(dst, src[lastAppendIndex:i]...)
|
||||
dst = appendEscapedUnicode(dst, r)
|
||||
lastAppendIndex = i + rn
|
||||
}
|
||||
i += rn
|
||||
}
|
||||
}
|
||||
return append(dst, src[lastAppendIndex:n]...), n, nil
|
||||
}
|
||||
|
||||
// The input contains characters that might need escaping,
|
||||
// unnecessary escape sequences, or invalid UTF-8.
|
||||
// Perform a round-trip unquote and quote to properly reformat
|
||||
// these sequences according the current flags.
|
||||
b, _ := AppendUnquote(nil, src[:n])
|
||||
dst, _ = AppendQuote(dst, b, flags)
|
||||
return dst, n, nil
|
||||
}
|
||||
|
||||
// AppendFloat appends src to dst as a JSON number per RFC 7159, section 6.
|
||||
// It formats numbers similar to the ES6 number-to-string conversion.
|
||||
// See https://go.dev/issue/14135.
|
||||
//
|
||||
// The output is identical to ECMA-262, 6th edition, section 7.1.12.1 and with
|
||||
// RFC 8785, section 3.2.2.3 for 64-bit floating-point numbers except for -0,
|
||||
// which is formatted as -0 instead of just 0.
|
||||
//
|
||||
// For 32-bit floating-point numbers,
|
||||
// the output is a 32-bit equivalent of the algorithm.
|
||||
// Note that ECMA-262 specifies no algorithm for 32-bit numbers.
|
||||
func AppendFloat(dst []byte, src float64, bits int) []byte {
|
||||
if bits == 32 {
|
||||
src = float64(float32(src))
|
||||
}
|
||||
|
||||
abs := math.Abs(src)
|
||||
fmt := byte('f')
|
||||
if abs != 0 {
|
||||
if bits == 64 && (float64(abs) < 1e-6 || float64(abs) >= 1e21) ||
|
||||
bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
|
||||
fmt = 'e'
|
||||
}
|
||||
}
|
||||
dst = strconv.AppendFloat(dst, src, fmt, -1, bits)
|
||||
if fmt == 'e' {
|
||||
// Clean up e-09 to e-9.
|
||||
n := len(dst)
|
||||
if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' {
|
||||
dst[n-2] = dst[n-1]
|
||||
dst = dst[:n-1]
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// ReformatNumber consumes a JSON string from src and appends it to dst,
|
||||
// canonicalizing it if specified.
|
||||
// It returns the appended output and the number of consumed input bytes.
|
||||
func ReformatNumber(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
|
||||
n, err := ConsumeNumber(src)
|
||||
if err != nil {
|
||||
return dst, n, err
|
||||
}
|
||||
if !flags.Get(jsonflags.CanonicalizeNumbers) {
|
||||
dst = append(dst, src[:n]...) // copy the number verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
|
||||
// Identify the kind of number.
|
||||
var isFloat bool
|
||||
for _, c := range src[:n] {
|
||||
if c == '.' || c == 'e' || c == 'E' {
|
||||
isFloat = true // has fraction or exponent
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Check if need to canonicalize this kind of number.
|
||||
switch {
|
||||
case string(src[:n]) == "-0":
|
||||
break // canonicalize -0 as 0 regardless of kind
|
||||
case isFloat:
|
||||
if !flags.Get(jsonflags.CanonicalizeRawFloats) {
|
||||
dst = append(dst, src[:n]...) // copy the number verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
default:
|
||||
// As an optimization, we can copy integer numbers below 2⁵³ verbatim
|
||||
// since the canonical form is always identical.
|
||||
const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10))
|
||||
if !flags.Get(jsonflags.CanonicalizeRawInts) || n < maxExactIntegerDigits {
|
||||
dst = append(dst, src[:n]...) // copy the number verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Parse and reformat the number (which uses a canonical format).
|
||||
fv, _ := strconv.ParseFloat(string(src[:n]), 64)
|
||||
switch {
|
||||
case fv == 0:
|
||||
fv = 0 // normalize negative zero as just zero
|
||||
case math.IsInf(fv, +1):
|
||||
fv = +math.MaxFloat64
|
||||
case math.IsInf(fv, -1):
|
||||
fv = -math.MaxFloat64
|
||||
}
|
||||
return AppendFloat(dst, fv, 64), n, nil
|
||||
}
|
||||
332
pkg/encoders/json/internal/jsonwire/encode_test.go
Normal file
332
pkg/encoders/json/internal/jsonwire/encode_test.go
Normal file
@@ -0,0 +1,332 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsonwire
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"crypto/sha256"
|
||||
"encoding/binary"
|
||||
"encoding/hex"
|
||||
"flag"
|
||||
"math"
|
||||
"net/http"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"encoding/json/internal/jsonflags"
|
||||
)
|
||||
|
||||
func TestAppendQuote(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
flags jsonflags.Bools
|
||||
want string
|
||||
wantErr error
|
||||
wantErrUTF8 error
|
||||
}{
|
||||
{"", 0, `""`, nil, nil},
|
||||
{"hello", 0, `"hello"`, nil, nil},
|
||||
{"\x00", 0, `"\u0000"`, nil, nil},
|
||||
{"\x1f", 0, `"\u001f"`, nil, nil},
|
||||
{"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", 0, `"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"`, nil, nil},
|
||||
{" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f", 0, "\" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f\"", nil, nil},
|
||||
{" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f", jsonflags.EscapeForHTML, "\" !#$%\\u0026'()*+,-./0123456789:;\\u003c=\\u003e?@[]^_`{|}~\x7f\"", nil, nil},
|
||||
{" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f", jsonflags.EscapeForJS, "\" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f\"", nil, nil},
|
||||
{"\u2027\u2028\u2029\u2030", 0, "\"\u2027\u2028\u2029\u2030\"", nil, nil},
|
||||
{"\u2027\u2028\u2029\u2030", jsonflags.EscapeForHTML, "\"\u2027\u2028\u2029\u2030\"", nil, nil},
|
||||
{"\u2027\u2028\u2029\u2030", jsonflags.EscapeForJS, "\"\u2027\\u2028\\u2029\u2030\"", nil, nil},
|
||||
{"x\x80\ufffd", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8},
|
||||
{"x\xff\ufffd", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8},
|
||||
{"x\xc0", 0, "\"x\ufffd\"", nil, ErrInvalidUTF8},
|
||||
{"x\xc0\x80", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8},
|
||||
{"x\xe0", 0, "\"x\ufffd\"", nil, ErrInvalidUTF8},
|
||||
{"x\xe0\x80", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8},
|
||||
{"x\xe0\x80\x80", 0, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8},
|
||||
{"x\xf0", 0, "\"x\ufffd\"", nil, ErrInvalidUTF8},
|
||||
{"x\xf0\x80", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8},
|
||||
{"x\xf0\x80\x80", 0, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8},
|
||||
{"x\xf0\x80\x80\x80", 0, "\"x\ufffd\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8},
|
||||
{"x\xed\xba\xad", 0, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8},
|
||||
{"\"\\/\b\f\n\r\t", 0, `"\"\\/\b\f\n\r\t"`, nil, nil},
|
||||
{"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃).", 0, `"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃)."`, nil, nil},
|
||||
{"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", 0, "\"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602\"", nil, nil},
|
||||
{"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", 0, "\"\\u0000\\u001f\u0020\\\"\u0026\u003c\u003e\\\\\u007f\u0080\u2028\u2029\ufffd\U0001f602\"", nil, nil},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("", func(t *testing.T) {
|
||||
var flags jsonflags.Flags
|
||||
flags.Set(tt.flags | 1)
|
||||
|
||||
flags.Set(jsonflags.AllowInvalidUTF8 | 1)
|
||||
got, gotErr := AppendQuote(nil, tt.in, &flags)
|
||||
if string(got) != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) {
|
||||
t.Errorf("AppendQuote(nil, %q, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErr)
|
||||
}
|
||||
flags.Set(jsonflags.AllowInvalidUTF8 | 0)
|
||||
switch got, gotErr := AppendQuote(nil, tt.in, &flags); {
|
||||
case tt.wantErrUTF8 == nil && (string(got) != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr)):
|
||||
t.Errorf("AppendQuote(nil, %q, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErr)
|
||||
case tt.wantErrUTF8 != nil && (!strings.HasPrefix(tt.want, string(got)) || !reflect.DeepEqual(gotErr, tt.wantErrUTF8)):
|
||||
t.Errorf("AppendQuote(nil, %q, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErrUTF8)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAppendNumber(t *testing.T) {
|
||||
tests := []struct {
|
||||
in float64
|
||||
want32 string
|
||||
want64 string
|
||||
}{
|
||||
{math.E, "2.7182817", "2.718281828459045"},
|
||||
{math.Pi, "3.1415927", "3.141592653589793"},
|
||||
{math.SmallestNonzeroFloat32, "1e-45", "1.401298464324817e-45"},
|
||||
{math.SmallestNonzeroFloat64, "0", "5e-324"},
|
||||
{math.MaxFloat32, "3.4028235e+38", "3.4028234663852886e+38"},
|
||||
{math.MaxFloat64, "", "1.7976931348623157e+308"},
|
||||
{0.1111111111111111, "0.11111111", "0.1111111111111111"},
|
||||
{0.2222222222222222, "0.22222222", "0.2222222222222222"},
|
||||
{0.3333333333333333, "0.33333334", "0.3333333333333333"},
|
||||
{0.4444444444444444, "0.44444445", "0.4444444444444444"},
|
||||
{0.5555555555555555, "0.5555556", "0.5555555555555555"},
|
||||
{0.6666666666666666, "0.6666667", "0.6666666666666666"},
|
||||
{0.7777777777777777, "0.7777778", "0.7777777777777777"},
|
||||
{0.8888888888888888, "0.8888889", "0.8888888888888888"},
|
||||
{0.9999999999999999, "1", "0.9999999999999999"},
|
||||
|
||||
// The following entries are from RFC 8785, appendix B
|
||||
// which are designed to ensure repeatable formatting of 64-bit floats.
|
||||
{math.Float64frombits(0x0000000000000000), "0", "0"},
|
||||
{math.Float64frombits(0x8000000000000000), "-0", "-0"}, // differs from RFC 8785
|
||||
{math.Float64frombits(0x0000000000000001), "0", "5e-324"},
|
||||
{math.Float64frombits(0x8000000000000001), "-0", "-5e-324"},
|
||||
{math.Float64frombits(0x7fefffffffffffff), "", "1.7976931348623157e+308"},
|
||||
{math.Float64frombits(0xffefffffffffffff), "", "-1.7976931348623157e+308"},
|
||||
{math.Float64frombits(0x4340000000000000), "9007199000000000", "9007199254740992"},
|
||||
{math.Float64frombits(0xc340000000000000), "-9007199000000000", "-9007199254740992"},
|
||||
{math.Float64frombits(0x4430000000000000), "295147900000000000000", "295147905179352830000"},
|
||||
{math.Float64frombits(0x44b52d02c7e14af5), "1e+23", "9.999999999999997e+22"},
|
||||
{math.Float64frombits(0x44b52d02c7e14af6), "1e+23", "1e+23"},
|
||||
{math.Float64frombits(0x44b52d02c7e14af7), "1e+23", "1.0000000000000001e+23"},
|
||||
{math.Float64frombits(0x444b1ae4d6e2ef4e), "1e+21", "999999999999999700000"},
|
||||
{math.Float64frombits(0x444b1ae4d6e2ef4f), "1e+21", "999999999999999900000"},
|
||||
{math.Float64frombits(0x444b1ae4d6e2ef50), "1e+21", "1e+21"},
|
||||
{math.Float64frombits(0x3eb0c6f7a0b5ed8c), "0.000001", "9.999999999999997e-7"},
|
||||
{math.Float64frombits(0x3eb0c6f7a0b5ed8d), "0.000001", "0.000001"},
|
||||
{math.Float64frombits(0x41b3de4355555553), "333333340", "333333333.3333332"},
|
||||
{math.Float64frombits(0x41b3de4355555554), "333333340", "333333333.33333325"},
|
||||
{math.Float64frombits(0x41b3de4355555555), "333333340", "333333333.3333333"},
|
||||
{math.Float64frombits(0x41b3de4355555556), "333333340", "333333333.3333334"},
|
||||
{math.Float64frombits(0x41b3de4355555557), "333333340", "333333333.33333343"},
|
||||
{math.Float64frombits(0xbecbf647612f3696), "-0.0000033333333", "-0.0000033333333333333333"},
|
||||
{math.Float64frombits(0x43143ff3c1cb0959), "1424953900000000", "1424953923781206.2"},
|
||||
|
||||
// The following are select entries from RFC 8785, appendix B,
|
||||
// but modified for equivalent 32-bit behavior.
|
||||
{float64(math.Float32frombits(0x65a96815)), "9.999999e+22", "9.999998877476383e+22"},
|
||||
{float64(math.Float32frombits(0x65a96816)), "1e+23", "9.999999778196308e+22"},
|
||||
{float64(math.Float32frombits(0x65a96817)), "1.0000001e+23", "1.0000000678916234e+23"},
|
||||
{float64(math.Float32frombits(0x6258d725)), "999999900000000000000", "999999879303389000000"},
|
||||
{float64(math.Float32frombits(0x6258d726)), "999999950000000000000", "999999949672133200000"},
|
||||
{float64(math.Float32frombits(0x6258d727)), "1e+21", "1.0000000200408773e+21"},
|
||||
{float64(math.Float32frombits(0x6258d728)), "1.0000001e+21", "1.0000000904096215e+21"},
|
||||
{float64(math.Float32frombits(0x358637bc)), "9.999999e-7", "9.99999883788405e-7"},
|
||||
{float64(math.Float32frombits(0x358637bd)), "0.000001", "9.999999974752427e-7"},
|
||||
{float64(math.Float32frombits(0x358637be)), "0.0000010000001", "0.0000010000001111620804"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("", func(t *testing.T) {
|
||||
if got32 := string(AppendFloat(nil, tt.in, 32)); got32 != tt.want32 && tt.want32 != "" {
|
||||
t.Errorf("AppendFloat(nil, %v, 32) = %v, want %v", tt.in, got32, tt.want32)
|
||||
}
|
||||
if got64 := string(AppendFloat(nil, tt.in, 64)); got64 != tt.want64 && tt.want64 != "" {
|
||||
t.Errorf("AppendFloat(nil, %v, 64) = %v, want %v", tt.in, got64, tt.want64)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// The default of 1e4 lines was chosen since it is sufficiently large to include
|
||||
// test numbers from all three categories (i.e., static, series, and random).
|
||||
// Yet, it is sufficiently low to execute quickly relative to other tests.
|
||||
//
|
||||
// Processing 1e8 lines takes a minute and processes about 4GiB worth of text.
|
||||
var testCanonicalNumberLines = flag.Float64("canonical-number-lines", 1e4, "specify the number of lines to check from the canonical numbers testdata")
|
||||
|
||||
// TestCanonicalNumber verifies that appendNumber complies with RFC 8785
|
||||
// according to the testdata provided by the reference implementation.
|
||||
// See https://github.com/cyberphone/json-canonicalization/tree/master/testdata#es6-numbers.
|
||||
func TestCanonicalNumber(t *testing.T) {
|
||||
const testfileURL = "https://github.com/cyberphone/json-canonicalization/releases/download/es6testfile/es6testfile100m.txt.gz"
|
||||
hashes := map[float64]string{
|
||||
1e3: "be18b62b6f69cdab33a7e0dae0d9cfa869fda80ddc712221570f9f40a5878687",
|
||||
1e4: "b9f7a8e75ef22a835685a52ccba7f7d6bdc99e34b010992cbc5864cd12be6892",
|
||||
1e5: "22776e6d4b49fa294a0d0f349268e5c28808fe7e0cb2bcbe28f63894e494d4c7",
|
||||
1e6: "49415fee2c56c77864931bd3624faad425c3c577d6d74e89a83bc725506dad16",
|
||||
1e7: "b9f8a44a91d46813b21b9602e72f112613c91408db0b8341fb94603d9db135e0",
|
||||
1e8: "0f7dda6b0837dde083c5d6b896f7d62340c8a2415b0c7121d83145e08a755272",
|
||||
}
|
||||
wantHash := hashes[*testCanonicalNumberLines]
|
||||
if wantHash == "" {
|
||||
t.Fatalf("canonical-number-lines must be one of the following values: 1e3, 1e4, 1e5, 1e6, 1e7, 1e8")
|
||||
}
|
||||
numLines := int(*testCanonicalNumberLines)
|
||||
|
||||
// generator returns a function that generates the next float64 to format.
|
||||
// This implements the algorithm specified in the reference implementation.
|
||||
generator := func() func() float64 {
|
||||
static := [...]uint64{
|
||||
0x0000000000000000, 0x8000000000000000, 0x0000000000000001, 0x8000000000000001,
|
||||
0xc46696695dbd1cc3, 0xc43211ede4974a35, 0xc3fce97ca0f21056, 0xc3c7213080c1a6ac,
|
||||
0xc39280f39a348556, 0xc35d9b1f5d20d557, 0xc327af4c4a80aaac, 0xc2f2f2a36ecd5556,
|
||||
0xc2be51057e155558, 0xc28840d131aaaaac, 0xc253670dc1555557, 0xc21f0b4935555557,
|
||||
0xc1e8d5d42aaaaaac, 0xc1b3de4355555556, 0xc17fca0555555556, 0xc1496e6aaaaaaaab,
|
||||
0xc114585555555555, 0xc0e046aaaaaaaaab, 0xc0aa0aaaaaaaaaaa, 0xc074d55555555555,
|
||||
0xc040aaaaaaaaaaab, 0xc00aaaaaaaaaaaab, 0xbfd5555555555555, 0xbfa1111111111111,
|
||||
0xbf6b4e81b4e81b4f, 0xbf35d867c3ece2a5, 0xbf0179ec9cbd821e, 0xbecbf647612f3696,
|
||||
0xbe965e9f80f29212, 0xbe61e54c672874db, 0xbe2ca213d840baf8, 0xbdf6e80fe033c8c6,
|
||||
0xbdc2533fe68fd3d2, 0xbd8d51ffd74c861c, 0xbd5774ccac3d3817, 0xbd22c3d6f030f9ac,
|
||||
0xbcee0624b3818f79, 0xbcb804ea293472c7, 0xbc833721ba905bd3, 0xbc4ebe9c5db3c61e,
|
||||
0xbc18987d17c304e5, 0xbbe3ad30dfcf371d, 0xbbaf7b816618582f, 0xbb792f9ab81379bf,
|
||||
0xbb442615600f9499, 0xbb101e77800c76e1, 0xbad9ca58cce0be35, 0xbaa4a1e0a3e6fe90,
|
||||
0xba708180831f320d, 0xba3a68cd9e985016, 0x446696695dbd1cc3, 0x443211ede4974a35,
|
||||
0x43fce97ca0f21056, 0x43c7213080c1a6ac, 0x439280f39a348556, 0x435d9b1f5d20d557,
|
||||
0x4327af4c4a80aaac, 0x42f2f2a36ecd5556, 0x42be51057e155558, 0x428840d131aaaaac,
|
||||
0x4253670dc1555557, 0x421f0b4935555557, 0x41e8d5d42aaaaaac, 0x41b3de4355555556,
|
||||
0x417fca0555555556, 0x41496e6aaaaaaaab, 0x4114585555555555, 0x40e046aaaaaaaaab,
|
||||
0x40aa0aaaaaaaaaaa, 0x4074d55555555555, 0x4040aaaaaaaaaaab, 0x400aaaaaaaaaaaab,
|
||||
0x3fd5555555555555, 0x3fa1111111111111, 0x3f6b4e81b4e81b4f, 0x3f35d867c3ece2a5,
|
||||
0x3f0179ec9cbd821e, 0x3ecbf647612f3696, 0x3e965e9f80f29212, 0x3e61e54c672874db,
|
||||
0x3e2ca213d840baf8, 0x3df6e80fe033c8c6, 0x3dc2533fe68fd3d2, 0x3d8d51ffd74c861c,
|
||||
0x3d5774ccac3d3817, 0x3d22c3d6f030f9ac, 0x3cee0624b3818f79, 0x3cb804ea293472c7,
|
||||
0x3c833721ba905bd3, 0x3c4ebe9c5db3c61e, 0x3c18987d17c304e5, 0x3be3ad30dfcf371d,
|
||||
0x3baf7b816618582f, 0x3b792f9ab81379bf, 0x3b442615600f9499, 0x3b101e77800c76e1,
|
||||
0x3ad9ca58cce0be35, 0x3aa4a1e0a3e6fe90, 0x3a708180831f320d, 0x3a3a68cd9e985016,
|
||||
0x4024000000000000, 0x4014000000000000, 0x3fe0000000000000, 0x3fa999999999999a,
|
||||
0x3f747ae147ae147b, 0x3f40624dd2f1a9fc, 0x3f0a36e2eb1c432d, 0x3ed4f8b588e368f1,
|
||||
0x3ea0c6f7a0b5ed8d, 0x3e6ad7f29abcaf48, 0x3e35798ee2308c3a, 0x3ed539223589fa95,
|
||||
0x3ed4ff26cd5a7781, 0x3ed4f95a762283ff, 0x3ed4f8c60703520c, 0x3ed4f8b72f19cd0d,
|
||||
0x3ed4f8b5b31c0c8d, 0x3ed4f8b58d1c461a, 0x3ed4f8b5894f7f0e, 0x3ed4f8b588ee37f3,
|
||||
0x3ed4f8b588e47da4, 0x3ed4f8b588e3849c, 0x3ed4f8b588e36bb5, 0x3ed4f8b588e36937,
|
||||
0x3ed4f8b588e368f8, 0x3ed4f8b588e368f1, 0x3ff0000000000000, 0xbff0000000000000,
|
||||
0xbfeffffffffffffa, 0xbfeffffffffffffb, 0x3feffffffffffffa, 0x3feffffffffffffb,
|
||||
0x3feffffffffffffc, 0x3feffffffffffffe, 0xbfefffffffffffff, 0xbfefffffffffffff,
|
||||
0x3fefffffffffffff, 0x3fefffffffffffff, 0x3fd3333333333332, 0x3fd3333333333333,
|
||||
0x3fd3333333333334, 0x0010000000000000, 0x000ffffffffffffd, 0x000fffffffffffff,
|
||||
0x7fefffffffffffff, 0xffefffffffffffff, 0x4340000000000000, 0xc340000000000000,
|
||||
0x4430000000000000, 0x44b52d02c7e14af5, 0x44b52d02c7e14af6, 0x44b52d02c7e14af7,
|
||||
0x444b1ae4d6e2ef4e, 0x444b1ae4d6e2ef4f, 0x444b1ae4d6e2ef50, 0x3eb0c6f7a0b5ed8c,
|
||||
0x3eb0c6f7a0b5ed8d, 0x41b3de4355555553, 0x41b3de4355555554, 0x41b3de4355555555,
|
||||
0x41b3de4355555556, 0x41b3de4355555557, 0xbecbf647612f3696, 0x43143ff3c1cb0959,
|
||||
}
|
||||
var state struct {
|
||||
idx int
|
||||
data []byte
|
||||
block [sha256.Size]byte
|
||||
}
|
||||
return func() float64 {
|
||||
const numSerial = 2000
|
||||
var f float64
|
||||
switch {
|
||||
case state.idx < len(static):
|
||||
f = math.Float64frombits(static[state.idx])
|
||||
case state.idx < len(static)+numSerial:
|
||||
f = math.Float64frombits(0x0010000000000000 + uint64(state.idx-len(static)))
|
||||
default:
|
||||
for f == 0 || math.IsNaN(f) || math.IsInf(f, 0) {
|
||||
if len(state.data) == 0 {
|
||||
state.block = sha256.Sum256(state.block[:])
|
||||
state.data = state.block[:]
|
||||
}
|
||||
f = math.Float64frombits(binary.LittleEndian.Uint64(state.data))
|
||||
state.data = state.data[8:]
|
||||
}
|
||||
}
|
||||
state.idx++
|
||||
return f
|
||||
}
|
||||
}
|
||||
|
||||
// Pass through the test twice. In the first pass we only hash the output,
|
||||
// while in the second pass we check every line against the golden testdata.
|
||||
// If the hashes match in the first pass, then we skip the second pass.
|
||||
for _, checkGolden := range []bool{false, true} {
|
||||
var br *bufio.Reader // for line-by-line reading of es6testfile100m.txt
|
||||
if checkGolden {
|
||||
resp, err := http.Get(testfileURL)
|
||||
if err != nil {
|
||||
t.Fatalf("http.Get error: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
zr, err := gzip.NewReader(resp.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("gzip.NewReader error: %v", err)
|
||||
}
|
||||
|
||||
br = bufio.NewReader(zr)
|
||||
}
|
||||
|
||||
// appendNumberJCS differs from appendNumber only for -0.
|
||||
appendNumberJCS := func(b []byte, f float64) []byte {
|
||||
if math.Signbit(f) && f == 0 {
|
||||
return append(b, '0')
|
||||
}
|
||||
return AppendFloat(b, f, 64)
|
||||
}
|
||||
|
||||
var gotLine []byte
|
||||
next := generator()
|
||||
hash := sha256.New()
|
||||
start := time.Now()
|
||||
lastPrint := start
|
||||
for n := 1; n <= numLines; n++ {
|
||||
// Generate the formatted line for this number.
|
||||
f := next()
|
||||
gotLine = gotLine[:0] // reset from previous usage
|
||||
gotLine = strconv.AppendUint(gotLine, math.Float64bits(f), 16)
|
||||
gotLine = append(gotLine, ',')
|
||||
gotLine = appendNumberJCS(gotLine, f)
|
||||
gotLine = append(gotLine, '\n')
|
||||
hash.Write(gotLine)
|
||||
|
||||
// Check that the formatted line matches.
|
||||
if checkGolden {
|
||||
wantLine, err := br.ReadBytes('\n')
|
||||
if err != nil {
|
||||
t.Fatalf("bufio.Reader.ReadBytes error: %v", err)
|
||||
}
|
||||
if !bytes.Equal(gotLine, wantLine) {
|
||||
t.Errorf("mismatch on line %d:\n\tgot %v\n\twant %v",
|
||||
n, strings.TrimSpace(string(gotLine)), strings.TrimSpace(string(wantLine)))
|
||||
}
|
||||
}
|
||||
|
||||
// Print progress.
|
||||
if now := time.Now(); now.Sub(lastPrint) > time.Second || n == numLines {
|
||||
remaining := float64(now.Sub(start)) * float64(numLines-n) / float64(n)
|
||||
t.Logf("%0.3f%% (%v remaining)",
|
||||
100.0*float64(n)/float64(numLines),
|
||||
time.Duration(remaining).Round(time.Second))
|
||||
lastPrint = now
|
||||
}
|
||||
}
|
||||
|
||||
gotHash := hex.EncodeToString(hash.Sum(nil))
|
||||
if gotHash == wantHash {
|
||||
return // hashes match, no need to check golden testdata
|
||||
}
|
||||
}
|
||||
}
|
||||
217
pkg/encoders/json/internal/jsonwire/wire.go
Normal file
217
pkg/encoders/json/internal/jsonwire/wire.go
Normal file
@@ -0,0 +1,217 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
// Package jsonwire implements stateless functionality for handling JSON text.
|
||||
package jsonwire
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"errors"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// TrimSuffixWhitespace trims JSON from the end of b.
|
||||
func TrimSuffixWhitespace(b []byte) []byte {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
n := len(b) - 1
|
||||
for n >= 0 && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
|
||||
n--
|
||||
}
|
||||
return b[:n+1]
|
||||
}
|
||||
|
||||
// TrimSuffixString trims a valid JSON string at the end of b.
|
||||
// The behavior is undefined if there is not a valid JSON string present.
|
||||
func TrimSuffixString(b []byte) []byte {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if len(b) > 0 && b[len(b)-1] == '"' {
|
||||
b = b[:len(b)-1]
|
||||
}
|
||||
for len(b) >= 2 && !(b[len(b)-1] == '"' && b[len(b)-2] != '\\') {
|
||||
b = b[:len(b)-1] // trim all characters except an unescaped quote
|
||||
}
|
||||
if len(b) > 0 && b[len(b)-1] == '"' {
|
||||
b = b[:len(b)-1]
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// HasSuffixByte reports whether b ends with c.
|
||||
func HasSuffixByte(b []byte, c byte) bool {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
return len(b) > 0 && b[len(b)-1] == c
|
||||
}
|
||||
|
||||
// TrimSuffixByte removes c from the end of b if it is present.
|
||||
func TrimSuffixByte(b []byte, c byte) []byte {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if len(b) > 0 && b[len(b)-1] == c {
|
||||
return b[:len(b)-1]
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// QuoteRune quotes the first rune in the input.
|
||||
func QuoteRune[Bytes ~[]byte | ~string](b Bytes) string {
|
||||
r, n := utf8.DecodeRuneInString(string(truncateMaxUTF8(b)))
|
||||
if r == utf8.RuneError && n == 1 {
|
||||
return `'\x` + strconv.FormatUint(uint64(b[0]), 16) + `'`
|
||||
}
|
||||
return strconv.QuoteRune(r)
|
||||
}
|
||||
|
||||
// CompareUTF16 lexicographically compares x to y according
|
||||
// to the UTF-16 codepoints of the UTF-8 encoded input strings.
|
||||
// This implements the ordering specified in RFC 8785, section 3.2.3.
|
||||
func CompareUTF16[Bytes ~[]byte | ~string](x, y Bytes) int {
|
||||
// NOTE: This is an optimized, mostly allocation-free implementation
|
||||
// of CompareUTF16Simple in wire_test.go. FuzzCompareUTF16 verifies that the
|
||||
// two implementations agree on the result of comparing any two strings.
|
||||
isUTF16Self := func(r rune) bool {
|
||||
return ('\u0000' <= r && r <= '\uD7FF') || ('\uE000' <= r && r <= '\uFFFF')
|
||||
}
|
||||
|
||||
for {
|
||||
if len(x) == 0 || len(y) == 0 {
|
||||
return cmp.Compare(len(x), len(y))
|
||||
}
|
||||
|
||||
// ASCII fast-path.
|
||||
if x[0] < utf8.RuneSelf || y[0] < utf8.RuneSelf {
|
||||
if x[0] != y[0] {
|
||||
return cmp.Compare(x[0], y[0])
|
||||
}
|
||||
x, y = x[1:], y[1:]
|
||||
continue
|
||||
}
|
||||
|
||||
// Decode next pair of runes as UTF-8.
|
||||
rx, nx := utf8.DecodeRuneInString(string(truncateMaxUTF8(x)))
|
||||
ry, ny := utf8.DecodeRuneInString(string(truncateMaxUTF8(y)))
|
||||
|
||||
selfx := isUTF16Self(rx)
|
||||
selfy := isUTF16Self(ry)
|
||||
switch {
|
||||
// The x rune is a single UTF-16 codepoint, while
|
||||
// the y rune is a surrogate pair of UTF-16 codepoints.
|
||||
case selfx && !selfy:
|
||||
ry, _ = utf16.EncodeRune(ry)
|
||||
// The y rune is a single UTF-16 codepoint, while
|
||||
// the x rune is a surrogate pair of UTF-16 codepoints.
|
||||
case selfy && !selfx:
|
||||
rx, _ = utf16.EncodeRune(rx)
|
||||
}
|
||||
if rx != ry {
|
||||
return cmp.Compare(rx, ry)
|
||||
}
|
||||
|
||||
// Check for invalid UTF-8, in which case,
|
||||
// we just perform a byte-for-byte comparison.
|
||||
if isInvalidUTF8(rx, nx) || isInvalidUTF8(ry, ny) {
|
||||
if x[0] != y[0] {
|
||||
return cmp.Compare(x[0], y[0])
|
||||
}
|
||||
}
|
||||
x, y = x[nx:], y[ny:]
|
||||
}
|
||||
}
|
||||
|
||||
// truncateMaxUTF8 truncates b such it contains at least one rune.
|
||||
//
|
||||
// The utf8 package currently lacks generic variants, which complicates
|
||||
// generic functions that operates on either []byte or string.
|
||||
// As a hack, we always call the utf8 function operating on strings,
|
||||
// but always truncate the input such that the result is identical.
|
||||
//
|
||||
// Example usage:
|
||||
//
|
||||
// utf8.DecodeRuneInString(string(truncateMaxUTF8(b)))
|
||||
//
|
||||
// Converting a []byte to a string is stack allocated since
|
||||
// truncateMaxUTF8 guarantees that the []byte is short.
|
||||
func truncateMaxUTF8[Bytes ~[]byte | ~string](b Bytes) Bytes {
|
||||
// TODO(https://go.dev/issue/56948): Remove this function and
|
||||
// instead directly call generic utf8 functions wherever used.
|
||||
if len(b) > utf8.UTFMax {
|
||||
return b[:utf8.UTFMax]
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// TODO(https://go.dev/issue/70547): Use utf8.ErrInvalid instead.
|
||||
var ErrInvalidUTF8 = errors.New("invalid UTF-8")
|
||||
|
||||
func NewInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) error {
|
||||
what := QuoteRune(prefix)
|
||||
return errors.New("invalid character " + what + " " + where)
|
||||
}
|
||||
|
||||
func NewInvalidEscapeSequenceError[Bytes ~[]byte | ~string](what Bytes) error {
|
||||
label := "escape sequence"
|
||||
if len(what) > 6 {
|
||||
label = "surrogate pair"
|
||||
}
|
||||
needEscape := strings.IndexFunc(string(what), func(r rune) bool {
|
||||
return r == '`' || r == utf8.RuneError || unicode.IsSpace(r) || !unicode.IsPrint(r)
|
||||
}) >= 0
|
||||
if needEscape {
|
||||
return errors.New("invalid " + label + " " + strconv.Quote(string(what)) + " in string")
|
||||
} else {
|
||||
return errors.New("invalid " + label + " `" + string(what) + "` in string")
|
||||
}
|
||||
}
|
||||
|
||||
// TruncatePointer optionally truncates the JSON pointer,
|
||||
// enforcing that the length roughly does not exceed n.
|
||||
func TruncatePointer(s string, n int) string {
|
||||
if len(s) <= n {
|
||||
return s
|
||||
}
|
||||
i := n / 2
|
||||
j := len(s) - n/2
|
||||
|
||||
// Avoid truncating a name if there are multiple names present.
|
||||
if k := strings.LastIndexByte(s[:i], '/'); k > 0 {
|
||||
i = k
|
||||
}
|
||||
if k := strings.IndexByte(s[j:], '/'); k >= 0 {
|
||||
j += k + len("/")
|
||||
}
|
||||
|
||||
// Avoid truncation in the middle of a UTF-8 rune.
|
||||
for i > 0 && isInvalidUTF8(utf8.DecodeLastRuneInString(s[:i])) {
|
||||
i--
|
||||
}
|
||||
for j < len(s) && isInvalidUTF8(utf8.DecodeRuneInString(s[j:])) {
|
||||
j++
|
||||
}
|
||||
|
||||
// Determine the right middle fragment to use.
|
||||
var middle string
|
||||
switch strings.Count(s[i:j], "/") {
|
||||
case 0:
|
||||
middle = "…"
|
||||
case 1:
|
||||
middle = "…/…"
|
||||
default:
|
||||
middle = "…/…/…"
|
||||
}
|
||||
if strings.HasPrefix(s[i:j], "/") && middle != "…" {
|
||||
middle = strings.TrimPrefix(middle, "…")
|
||||
}
|
||||
if strings.HasSuffix(s[i:j], "/") && middle != "…" {
|
||||
middle = strings.TrimSuffix(middle, "…")
|
||||
}
|
||||
return s[:i] + middle + s[j:]
|
||||
}
|
||||
|
||||
func isInvalidUTF8(r rune, rn int) bool {
|
||||
return r == utf8.RuneError && rn == 1
|
||||
}
|
||||
98
pkg/encoders/json/internal/jsonwire/wire_test.go
Normal file
98
pkg/encoders/json/internal/jsonwire/wire_test.go
Normal file
@@ -0,0 +1,98 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsonwire
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"slices"
|
||||
"testing"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func TestQuoteRune(t *testing.T) {
|
||||
tests := []struct{ in, want string }{
|
||||
{"x", `'x'`},
|
||||
{"\n", `'\n'`},
|
||||
{"'", `'\''`},
|
||||
{"\xff", `'\xff'`},
|
||||
{"💩", `'💩'`},
|
||||
{"💩"[:1], `'\xf0'`},
|
||||
{"\uffff", `'\uffff'`},
|
||||
{"\U00101234", `'\U00101234'`},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got := QuoteRune([]byte(tt.in))
|
||||
if got != tt.want {
|
||||
t.Errorf("quoteRune(%q) = %s, want %s", tt.in, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var compareUTF16Testdata = []string{"", "\r", "1", "f\xfe", "f\xfe\xff", "f\xff", "\u0080", "\u00f6", "\u20ac", "\U0001f600", "\ufb33"}
|
||||
|
||||
func TestCompareUTF16(t *testing.T) {
|
||||
for i, si := range compareUTF16Testdata {
|
||||
for j, sj := range compareUTF16Testdata {
|
||||
got := CompareUTF16([]byte(si), []byte(sj))
|
||||
want := cmp.Compare(i, j)
|
||||
if got != want {
|
||||
t.Errorf("CompareUTF16(%q, %q) = %v, want %v", si, sj, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func FuzzCompareUTF16(f *testing.F) {
|
||||
for _, td1 := range compareUTF16Testdata {
|
||||
for _, td2 := range compareUTF16Testdata {
|
||||
f.Add([]byte(td1), []byte(td2))
|
||||
}
|
||||
}
|
||||
|
||||
// CompareUTF16Simple is identical to CompareUTF16,
|
||||
// but relies on naively converting a string to a []uint16 codepoints.
|
||||
// It is easy to verify as correct, but is slow.
|
||||
CompareUTF16Simple := func(x, y []byte) int {
|
||||
ux := utf16.Encode([]rune(string(x)))
|
||||
uy := utf16.Encode([]rune(string(y)))
|
||||
return slices.Compare(ux, uy)
|
||||
}
|
||||
|
||||
f.Fuzz(func(t *testing.T, s1, s2 []byte) {
|
||||
// Compare the optimized and simplified implementations.
|
||||
got := CompareUTF16(s1, s2)
|
||||
want := CompareUTF16Simple(s1, s2)
|
||||
if got != want && utf8.Valid(s1) && utf8.Valid(s2) {
|
||||
t.Errorf("CompareUTF16(%q, %q) = %v, want %v", s1, s2, got, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestTruncatePointer(t *testing.T) {
|
||||
tests := []struct{ in, want string }{
|
||||
{"hello", "hello"},
|
||||
{"/a/b/c", "/a/b/c"},
|
||||
{"/a/b/c/d/e/f/g", "/a/b/…/f/g"},
|
||||
{"supercalifragilisticexpialidocious", "super…cious"},
|
||||
{"/supercalifragilisticexpialidocious/supercalifragilisticexpialidocious", "/supe…/…cious"},
|
||||
{"/supercalifragilisticexpialidocious/supercalifragilisticexpialidocious/supercalifragilisticexpialidocious", "/supe…/…/…cious"},
|
||||
{"/a/supercalifragilisticexpialidocious/supercalifragilisticexpialidocious", "/a/…/…cious"},
|
||||
{"/supercalifragilisticexpialidocious/supercalifragilisticexpialidocious/b", "/supe…/…/b"},
|
||||
{"/fizz/buzz/bazz", "/fizz/…/bazz"},
|
||||
{"/fizz/buzz/bazz/razz", "/fizz/…/razz"},
|
||||
{"/////////////////////////////", "/////…/////"},
|
||||
{"/🎄❤️✨/🎁✅😊/🎅🔥⭐", "/🎄…/…/…⭐"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got := TruncatePointer(tt.in, 10)
|
||||
if got != tt.want {
|
||||
t.Errorf("TruncatePointer(%q) = %q, want %q", tt.in, got, tt.want)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user