add forked version of encoding/json with html escape disabled

- modified all local packages to use this fork
This commit is contained in:
2025-08-23 10:34:22 +01:00
parent f15db4313b
commit c958a7d9ed
72 changed files with 31429 additions and 10 deletions

View File

@@ -0,0 +1,856 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"errors"
"io"
"math"
"math/rand"
"path"
"reflect"
"strings"
"testing"
"encoding/json/internal/jsontest"
"encoding/json/internal/jsonwire"
)
func E(err error) *SyntacticError {
return &SyntacticError{Err: err}
}
func newInvalidCharacterError(prefix, where string) *SyntacticError {
return E(jsonwire.NewInvalidCharacterError(prefix, where))
}
func newInvalidEscapeSequenceError(what string) *SyntacticError {
return E(jsonwire.NewInvalidEscapeSequenceError(what))
}
func (e *SyntacticError) withPos(prefix string, pointer Pointer) *SyntacticError {
e.ByteOffset = int64(len(prefix))
e.JSONPointer = pointer
return e
}
func equalError(x, y error) bool {
return reflect.DeepEqual(x, y)
}
var (
zeroToken Token
zeroValue Value
)
// tokOrVal is either a Token or a Value.
type tokOrVal interface{ Kind() Kind }
type coderTestdataEntry struct {
name jsontest.CaseName
in string
outCompacted string
outEscaped string // outCompacted if empty; escapes all runes in a string
outIndented string // outCompacted if empty; uses " " for indent prefix and "\t" for indent
outCanonicalized string // outCompacted if empty
tokens []Token
pointers []Pointer
}
var coderTestdata = []coderTestdataEntry{{
name: jsontest.Name("Null"),
in: ` null `,
outCompacted: `null`,
tokens: []Token{Null},
pointers: []Pointer{""},
}, {
name: jsontest.Name("False"),
in: ` false `,
outCompacted: `false`,
tokens: []Token{False},
}, {
name: jsontest.Name("True"),
in: ` true `,
outCompacted: `true`,
tokens: []Token{True},
}, {
name: jsontest.Name("EmptyString"),
in: ` "" `,
outCompacted: `""`,
tokens: []Token{String("")},
}, {
name: jsontest.Name("SimpleString"),
in: ` "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" `,
outCompacted: `"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"`,
outEscaped: `"\u0061\u0062\u0063\u0064\u0065\u0066\u0067\u0068\u0069\u006a\u006b\u006c\u006d\u006e\u006f\u0070\u0071\u0072\u0073\u0074\u0075\u0076\u0077\u0078\u0079\u007a\u0041\u0042\u0043\u0044\u0045\u0046\u0047\u0048\u0049\u004a\u004b\u004c\u004d\u004e\u004f\u0050\u0051\u0052\u0053\u0054\u0055\u0056\u0057\u0058\u0059\u005a"`,
tokens: []Token{String("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")},
}, {
name: jsontest.Name("ComplicatedString"),
in: " \"Hello, 世界 🌟★☆✩🌠 " + "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602" + ` \ud800\udead \"\\\/\b\f\n\r\t \u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009" `,
outCompacted: "\"Hello, 世界 🌟★☆✩🌠 " + "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602" + " 𐊭 \\\"\\\\/\\b\\f\\n\\r\\t \\\"\\\\/\\b\\f\\n\\r\\t\"",
outEscaped: `"\u0048\u0065\u006c\u006c\u006f\u002c\u0020\u4e16\u754c\u0020\ud83c\udf1f\u2605\u2606\u2729\ud83c\udf20\u0020\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\ud83d\ude02\u0020\ud800\udead\u0020\u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009\u0020\u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009"`,
outCanonicalized: `"Hello, 世界 🌟★☆✩🌠 €ö€힙דּ<EE8080>😂 𐊭 \"\\/\b\f\n\r\t \"\\/\b\f\n\r\t"`,
tokens: []Token{rawToken("\"Hello, 世界 🌟★☆✩🌠 " + "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602" + " 𐊭 \\\"\\\\/\\b\\f\\n\\r\\t \\\"\\\\/\\b\\f\\n\\r\\t\"")},
}, {
name: jsontest.Name("ZeroNumber"),
in: ` 0 `,
outCompacted: `0`,
tokens: []Token{Uint(0)},
}, {
name: jsontest.Name("SimpleNumber"),
in: ` 123456789 `,
outCompacted: `123456789`,
tokens: []Token{Uint(123456789)},
}, {
name: jsontest.Name("NegativeNumber"),
in: ` -123456789 `,
outCompacted: `-123456789`,
tokens: []Token{Int(-123456789)},
}, {
name: jsontest.Name("FractionalNumber"),
in: " 0.123456789 ",
outCompacted: `0.123456789`,
tokens: []Token{Float(0.123456789)},
}, {
name: jsontest.Name("ExponentNumber"),
in: " 0e12456789 ",
outCompacted: `0e12456789`,
outCanonicalized: `0`,
tokens: []Token{rawToken(`0e12456789`)},
}, {
name: jsontest.Name("ExponentNumberP"),
in: " 0e+12456789 ",
outCompacted: `0e+12456789`,
outCanonicalized: `0`,
tokens: []Token{rawToken(`0e+12456789`)},
}, {
name: jsontest.Name("ExponentNumberN"),
in: " 0e-12456789 ",
outCompacted: `0e-12456789`,
outCanonicalized: `0`,
tokens: []Token{rawToken(`0e-12456789`)},
}, {
name: jsontest.Name("ComplicatedNumber"),
in: ` -123456789.987654321E+0123456789 `,
outCompacted: `-123456789.987654321E+0123456789`,
outCanonicalized: `-1.7976931348623157e+308`,
tokens: []Token{rawToken(`-123456789.987654321E+0123456789`)},
}, {
name: jsontest.Name("Numbers"),
in: ` [
0, -0, 0.0, -0.0, 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001, 1e1000,
-5e-324, 1e+100, 1.7976931348623157e+308,
9007199254740990, 9007199254740991, 9007199254740992, 9007199254740993, 9007199254740994,
-9223372036854775808, 9223372036854775807, 0, 18446744073709551615
] `,
outCompacted: "[0,-0,0.0,-0.0,1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,1e1000,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740993,9007199254740994,-9223372036854775808,9223372036854775807,0,18446744073709551615]",
outIndented: `[
0,
-0,
0.0,
-0.0,
1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,
1e1000,
-5e-324,
1e+100,
1.7976931348623157e+308,
9007199254740990,
9007199254740991,
9007199254740992,
9007199254740993,
9007199254740994,
-9223372036854775808,
9223372036854775807,
0,
18446744073709551615
]`,
outCanonicalized: `[0,0,0,0,1,1.7976931348623157e+308,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740992,9007199254740994,-9223372036854776000,9223372036854776000,0,18446744073709552000]`,
tokens: []Token{
BeginArray,
Float(0), Float(math.Copysign(0, -1)), rawToken(`0.0`), rawToken(`-0.0`), rawToken(`1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001`), rawToken(`1e1000`),
Float(-5e-324), Float(1e100), Float(1.7976931348623157e+308),
Float(9007199254740990), Float(9007199254740991), Float(9007199254740992), rawToken(`9007199254740993`), rawToken(`9007199254740994`),
Int(minInt64), Int(maxInt64), Uint(minUint64), Uint(maxUint64),
EndArray,
},
pointers: []Pointer{
"", "/0", "/1", "/2", "/3", "/4", "/5", "/6", "/7", "/8", "/9", "/10", "/11", "/12", "/13", "/14", "/15", "/16", "/17", "",
},
}, {
name: jsontest.Name("ObjectN0"),
in: ` { } `,
outCompacted: `{}`,
tokens: []Token{BeginObject, EndObject},
pointers: []Pointer{"", ""},
}, {
name: jsontest.Name("ObjectN1"),
in: ` { "0" : 0 } `,
outCompacted: `{"0":0}`,
outEscaped: `{"\u0030":0}`,
outIndented: `{
"0": 0
}`,
tokens: []Token{BeginObject, String("0"), Uint(0), EndObject},
pointers: []Pointer{"", "/0", "/0", ""},
}, {
name: jsontest.Name("ObjectN2"),
in: ` { "0" : 0 , "1" : 1 } `,
outCompacted: `{"0":0,"1":1}`,
outEscaped: `{"\u0030":0,"\u0031":1}`,
outIndented: `{
"0": 0,
"1": 1
}`,
tokens: []Token{BeginObject, String("0"), Uint(0), String("1"), Uint(1), EndObject},
pointers: []Pointer{"", "/0", "/0", "/1", "/1", ""},
}, {
name: jsontest.Name("ObjectNested"),
in: ` { "0" : { "1" : { "2" : { "3" : { "4" : { } } } } } } `,
outCompacted: `{"0":{"1":{"2":{"3":{"4":{}}}}}}`,
outEscaped: `{"\u0030":{"\u0031":{"\u0032":{"\u0033":{"\u0034":{}}}}}}`,
outIndented: `{
"0": {
"1": {
"2": {
"3": {
"4": {}
}
}
}
}
}`,
tokens: []Token{BeginObject, String("0"), BeginObject, String("1"), BeginObject, String("2"), BeginObject, String("3"), BeginObject, String("4"), BeginObject, EndObject, EndObject, EndObject, EndObject, EndObject, EndObject},
pointers: []Pointer{
"",
"/0", "/0",
"/0/1", "/0/1",
"/0/1/2", "/0/1/2",
"/0/1/2/3", "/0/1/2/3",
"/0/1/2/3/4", "/0/1/2/3/4",
"/0/1/2/3/4",
"/0/1/2/3",
"/0/1/2",
"/0/1",
"/0",
"",
},
}, {
name: jsontest.Name("ObjectSuperNested"),
in: `{"": {
"44444": {
"6666666": "ccccccc",
"77777777": "bb",
"555555": "aaaa"
},
"0": {
"3333": "bbb",
"11": "",
"222": "aaaaa"
}
}}`,
outCompacted: `{"":{"44444":{"6666666":"ccccccc","77777777":"bb","555555":"aaaa"},"0":{"3333":"bbb","11":"","222":"aaaaa"}}}`,
outEscaped: `{"":{"\u0034\u0034\u0034\u0034\u0034":{"\u0036\u0036\u0036\u0036\u0036\u0036\u0036":"\u0063\u0063\u0063\u0063\u0063\u0063\u0063","\u0037\u0037\u0037\u0037\u0037\u0037\u0037\u0037":"\u0062\u0062","\u0035\u0035\u0035\u0035\u0035\u0035":"\u0061\u0061\u0061\u0061"},"\u0030":{"\u0033\u0033\u0033\u0033":"\u0062\u0062\u0062","\u0031\u0031":"","\u0032\u0032\u0032":"\u0061\u0061\u0061\u0061\u0061"}}}`,
outIndented: `{
"": {
"44444": {
"6666666": "ccccccc",
"77777777": "bb",
"555555": "aaaa"
},
"0": {
"3333": "bbb",
"11": "",
"222": "aaaaa"
}
}
}`,
outCanonicalized: `{"":{"0":{"11":"","222":"aaaaa","3333":"bbb"},"44444":{"555555":"aaaa","6666666":"ccccccc","77777777":"bb"}}}`,
tokens: []Token{
BeginObject,
String(""),
BeginObject,
String("44444"),
BeginObject,
String("6666666"), String("ccccccc"),
String("77777777"), String("bb"),
String("555555"), String("aaaa"),
EndObject,
String("0"),
BeginObject,
String("3333"), String("bbb"),
String("11"), String(""),
String("222"), String("aaaaa"),
EndObject,
EndObject,
EndObject,
},
pointers: []Pointer{
"",
"/", "/",
"//44444", "//44444",
"//44444/6666666", "//44444/6666666",
"//44444/77777777", "//44444/77777777",
"//44444/555555", "//44444/555555",
"//44444",
"//0", "//0",
"//0/3333", "//0/3333",
"//0/11", "//0/11",
"//0/222", "//0/222",
"//0",
"/",
"",
},
}, {
name: jsontest.Name("ArrayN0"),
in: ` [ ] `,
outCompacted: `[]`,
tokens: []Token{BeginArray, EndArray},
pointers: []Pointer{"", ""},
}, {
name: jsontest.Name("ArrayN1"),
in: ` [ 0 ] `,
outCompacted: `[0]`,
outIndented: `[
0
]`,
tokens: []Token{BeginArray, Uint(0), EndArray},
pointers: []Pointer{"", "/0", ""},
}, {
name: jsontest.Name("ArrayN2"),
in: ` [ 0 , 1 ] `,
outCompacted: `[0,1]`,
outIndented: `[
0,
1
]`,
tokens: []Token{BeginArray, Uint(0), Uint(1), EndArray},
}, {
name: jsontest.Name("ArrayNested"),
in: ` [ [ [ [ [ ] ] ] ] ] `,
outCompacted: `[[[[[]]]]]`,
outIndented: `[
[
[
[
[]
]
]
]
]`,
tokens: []Token{BeginArray, BeginArray, BeginArray, BeginArray, BeginArray, EndArray, EndArray, EndArray, EndArray, EndArray},
pointers: []Pointer{
"",
"/0",
"/0/0",
"/0/0/0",
"/0/0/0/0",
"/0/0/0/0",
"/0/0/0",
"/0/0",
"/0",
"",
},
}, {
name: jsontest.Name("Everything"),
in: ` {
"literals" : [ null , false , true ],
"string" : "Hello, 世界" ,
"number" : 3.14159 ,
"arrayN0" : [ ] ,
"arrayN1" : [ 0 ] ,
"arrayN2" : [ 0 , 1 ] ,
"objectN0" : { } ,
"objectN1" : { "0" : 0 } ,
"objectN2" : { "0" : 0 , "1" : 1 }
} `,
outCompacted: `{"literals":[null,false,true],"string":"Hello, 世界","number":3.14159,"arrayN0":[],"arrayN1":[0],"arrayN2":[0,1],"objectN0":{},"objectN1":{"0":0},"objectN2":{"0":0,"1":1}}`,
outEscaped: `{"\u006c\u0069\u0074\u0065\u0072\u0061\u006c\u0073":[null,false,true],"\u0073\u0074\u0072\u0069\u006e\u0067":"\u0048\u0065\u006c\u006c\u006f\u002c\u0020\u4e16\u754c","\u006e\u0075\u006d\u0062\u0065\u0072":3.14159,"\u0061\u0072\u0072\u0061\u0079\u004e\u0030":[],"\u0061\u0072\u0072\u0061\u0079\u004e\u0031":[0],"\u0061\u0072\u0072\u0061\u0079\u004e\u0032":[0,1],"\u006f\u0062\u006a\u0065\u0063\u0074\u004e\u0030":{},"\u006f\u0062\u006a\u0065\u0063\u0074\u004e\u0031":{"\u0030":0},"\u006f\u0062\u006a\u0065\u0063\u0074\u004e\u0032":{"\u0030":0,"\u0031":1}}`,
outIndented: `{
"literals": [
null,
false,
true
],
"string": "Hello, 世界",
"number": 3.14159,
"arrayN0": [],
"arrayN1": [
0
],
"arrayN2": [
0,
1
],
"objectN0": {},
"objectN1": {
"0": 0
},
"objectN2": {
"0": 0,
"1": 1
}
}`,
outCanonicalized: `{"arrayN0":[],"arrayN1":[0],"arrayN2":[0,1],"literals":[null,false,true],"number":3.14159,"objectN0":{},"objectN1":{"0":0},"objectN2":{"0":0,"1":1},"string":"Hello, 世界"}`,
tokens: []Token{
BeginObject,
String("literals"), BeginArray, Null, False, True, EndArray,
String("string"), String("Hello, 世界"),
String("number"), Float(3.14159),
String("arrayN0"), BeginArray, EndArray,
String("arrayN1"), BeginArray, Uint(0), EndArray,
String("arrayN2"), BeginArray, Uint(0), Uint(1), EndArray,
String("objectN0"), BeginObject, EndObject,
String("objectN1"), BeginObject, String("0"), Uint(0), EndObject,
String("objectN2"), BeginObject, String("0"), Uint(0), String("1"), Uint(1), EndObject,
EndObject,
},
pointers: []Pointer{
"",
"/literals", "/literals",
"/literals/0",
"/literals/1",
"/literals/2",
"/literals",
"/string", "/string",
"/number", "/number",
"/arrayN0", "/arrayN0", "/arrayN0",
"/arrayN1", "/arrayN1",
"/arrayN1/0",
"/arrayN1",
"/arrayN2", "/arrayN2",
"/arrayN2/0",
"/arrayN2/1",
"/arrayN2",
"/objectN0", "/objectN0", "/objectN0",
"/objectN1", "/objectN1",
"/objectN1/0", "/objectN1/0",
"/objectN1",
"/objectN2", "/objectN2",
"/objectN2/0", "/objectN2/0",
"/objectN2/1", "/objectN2/1",
"/objectN2",
"",
},
}}
// TestCoderInterleaved tests that we can interleave calls that operate on
// tokens and raw values. The only error condition is trying to operate on a
// raw value when the next token is an end of object or array.
func TestCoderInterleaved(t *testing.T) {
for _, td := range coderTestdata {
// In TokenFirst and ValueFirst, alternate between tokens and values.
// In TokenDelims, only use tokens for object and array delimiters.
for _, modeName := range []string{"TokenFirst", "ValueFirst", "TokenDelims"} {
t.Run(path.Join(td.name.Name, modeName), func(t *testing.T) {
testCoderInterleaved(t, td.name.Where, modeName, td)
})
}
}
}
func testCoderInterleaved(t *testing.T, where jsontest.CasePos, modeName string, td coderTestdataEntry) {
src := strings.NewReader(td.in)
dst := new(bytes.Buffer)
dec := NewDecoder(src)
enc := NewEncoder(dst)
tickTock := modeName == "TokenFirst"
for {
if modeName == "TokenDelims" {
switch dec.PeekKind() {
case '{', '}', '[', ']':
tickTock = true // as token
default:
tickTock = false // as value
}
}
if tickTock {
tok, err := dec.ReadToken()
if err != nil {
if err == io.EOF {
break
}
t.Fatalf("%s: Decoder.ReadToken error: %v", where, err)
}
if err := enc.WriteToken(tok); err != nil {
t.Fatalf("%s: Encoder.WriteToken error: %v", where, err)
}
} else {
val, err := dec.ReadValue()
if err != nil {
// It is a syntactic error to call ReadValue
// at the end of an object or array.
// Retry as a ReadToken call.
expectError := dec.PeekKind() == '}' || dec.PeekKind() == ']'
if expectError {
if !errors.As(err, new(*SyntacticError)) {
t.Fatalf("%s: Decoder.ReadToken error is %T, want %T", where, err, new(SyntacticError))
}
tickTock = !tickTock
continue
}
if err == io.EOF {
break
}
t.Fatalf("%s: Decoder.ReadValue error: %v", where, err)
}
if err := enc.WriteValue(val); err != nil {
t.Fatalf("%s: Encoder.WriteValue error: %v", where, err)
}
}
tickTock = !tickTock
}
got := dst.String()
want := td.outCompacted + "\n"
if got != want {
t.Fatalf("%s: output mismatch:\ngot %q\nwant %q", where, got, want)
}
}
func TestCoderStackPointer(t *testing.T) {
tests := []struct {
token Token
want Pointer
}{
{Null, ""},
{BeginArray, ""},
{EndArray, ""},
{BeginArray, ""},
{Bool(true), "/0"},
{EndArray, ""},
{BeginArray, ""},
{String("hello"), "/0"},
{String("goodbye"), "/1"},
{EndArray, ""},
{BeginObject, ""},
{EndObject, ""},
{BeginObject, ""},
{String("hello"), "/hello"},
{String("goodbye"), "/hello"},
{EndObject, ""},
{BeginObject, ""},
{String(""), "/"},
{Null, "/"},
{String("0"), "/0"},
{Null, "/0"},
{String("~"), "/~0"},
{Null, "/~0"},
{String("/"), "/~1"},
{Null, "/~1"},
{String("a//b~/c/~d~~e"), "/a~1~1b~0~1c~1~0d~0~0e"},
{Null, "/a~1~1b~0~1c~1~0d~0~0e"},
{String(" \r\n\t"), "/ \r\n\t"},
{Null, "/ \r\n\t"},
{EndObject, ""},
{BeginArray, ""},
{BeginObject, "/0"},
{String(""), "/0/"},
{BeginArray, "/0/"},
{BeginObject, "/0//0"},
{String("#"), "/0//0/#"},
{Null, "/0//0/#"},
{EndObject, "/0//0"},
{EndArray, "/0/"},
{EndObject, "/0"},
{EndArray, ""},
}
for _, allowDupes := range []bool{false, true} {
var name string
switch allowDupes {
case false:
name = "RejectDuplicateNames"
case true:
name = "AllowDuplicateNames"
}
t.Run(name, func(t *testing.T) {
bb := new(bytes.Buffer)
enc := NewEncoder(bb, AllowDuplicateNames(allowDupes))
for i, tt := range tests {
if err := enc.WriteToken(tt.token); err != nil {
t.Fatalf("%d: Encoder.WriteToken error: %v", i, err)
}
if got := enc.StackPointer(); got != tests[i].want {
t.Fatalf("%d: Encoder.StackPointer = %v, want %v", i, got, tests[i].want)
}
}
dec := NewDecoder(bb, AllowDuplicateNames(allowDupes))
for i := range tests {
if _, err := dec.ReadToken(); err != nil {
t.Fatalf("%d: Decoder.ReadToken error: %v", i, err)
}
if got := dec.StackPointer(); got != tests[i].want {
t.Fatalf("%d: Decoder.StackPointer = %v, want %v", i, got, tests[i].want)
}
}
})
}
}
func TestCoderMaxDepth(t *testing.T) {
trimArray := func(b []byte) []byte { return b[len(`[`) : len(b)-len(`]`)] }
maxArrays := []byte(strings.Repeat(`[`, maxNestingDepth+1) + strings.Repeat(`]`, maxNestingDepth+1))
trimObject := func(b []byte) []byte { return b[len(`{"":`) : len(b)-len(`}`)] }
maxObjects := []byte(strings.Repeat(`{"":`, maxNestingDepth+1) + `""` + strings.Repeat(`}`, maxNestingDepth+1))
t.Run("Decoder", func(t *testing.T) {
var dec Decoder
checkReadToken := func(t *testing.T, wantKind Kind, wantErr error) {
t.Helper()
if tok, err := dec.ReadToken(); tok.Kind() != wantKind || !equalError(err, wantErr) {
t.Fatalf("Decoder.ReadToken = (%q, %v), want (%q, %v)", byte(tok.Kind()), err, byte(wantKind), wantErr)
}
}
checkReadValue := func(t *testing.T, wantLen int, wantErr error) {
t.Helper()
if val, err := dec.ReadValue(); len(val) != wantLen || !equalError(err, wantErr) {
t.Fatalf("Decoder.ReadValue = (%d, %v), want (%d, %v)", len(val), err, wantLen, wantErr)
}
}
t.Run("ArraysValid/SingleValue", func(t *testing.T) {
dec.s.reset(trimArray(maxArrays), nil)
checkReadValue(t, maxNestingDepth*len(`[]`), nil)
})
t.Run("ArraysValid/TokenThenValue", func(t *testing.T) {
dec.s.reset(trimArray(maxArrays), nil)
checkReadToken(t, '[', nil)
checkReadValue(t, (maxNestingDepth-1)*len(`[]`), nil)
checkReadToken(t, ']', nil)
})
t.Run("ArraysValid/AllTokens", func(t *testing.T) {
dec.s.reset(trimArray(maxArrays), nil)
for range maxNestingDepth {
checkReadToken(t, '[', nil)
}
for range maxNestingDepth {
checkReadToken(t, ']', nil)
}
})
wantErr := &SyntacticError{
ByteOffset: maxNestingDepth,
JSONPointer: Pointer(strings.Repeat("/0", maxNestingDepth)),
Err: errMaxDepth,
}
t.Run("ArraysInvalid/SingleValue", func(t *testing.T) {
dec.s.reset(maxArrays, nil)
checkReadValue(t, 0, wantErr)
})
t.Run("ArraysInvalid/TokenThenValue", func(t *testing.T) {
dec.s.reset(maxArrays, nil)
checkReadToken(t, '[', nil)
checkReadValue(t, 0, wantErr)
})
t.Run("ArraysInvalid/AllTokens", func(t *testing.T) {
dec.s.reset(maxArrays, nil)
for range maxNestingDepth {
checkReadToken(t, '[', nil)
}
checkReadValue(t, 0, wantErr)
})
t.Run("ObjectsValid/SingleValue", func(t *testing.T) {
dec.s.reset(trimObject(maxObjects), nil)
checkReadValue(t, maxNestingDepth*len(`{"":}`)+len(`""`), nil)
})
t.Run("ObjectsValid/TokenThenValue", func(t *testing.T) {
dec.s.reset(trimObject(maxObjects), nil)
checkReadToken(t, '{', nil)
checkReadToken(t, '"', nil)
checkReadValue(t, (maxNestingDepth-1)*len(`{"":}`)+len(`""`), nil)
checkReadToken(t, '}', nil)
})
t.Run("ObjectsValid/AllTokens", func(t *testing.T) {
dec.s.reset(trimObject(maxObjects), nil)
for range maxNestingDepth {
checkReadToken(t, '{', nil)
checkReadToken(t, '"', nil)
}
checkReadToken(t, '"', nil)
for range maxNestingDepth {
checkReadToken(t, '}', nil)
}
})
wantErr = &SyntacticError{
ByteOffset: maxNestingDepth * int64(len(`{"":`)),
JSONPointer: Pointer(strings.Repeat("/", maxNestingDepth)),
Err: errMaxDepth,
}
t.Run("ObjectsInvalid/SingleValue", func(t *testing.T) {
dec.s.reset(maxObjects, nil)
checkReadValue(t, 0, wantErr)
})
t.Run("ObjectsInvalid/TokenThenValue", func(t *testing.T) {
dec.s.reset(maxObjects, nil)
checkReadToken(t, '{', nil)
checkReadToken(t, '"', nil)
checkReadValue(t, 0, wantErr)
})
t.Run("ObjectsInvalid/AllTokens", func(t *testing.T) {
dec.s.reset(maxObjects, nil)
for range maxNestingDepth {
checkReadToken(t, '{', nil)
checkReadToken(t, '"', nil)
}
checkReadToken(t, 0, wantErr)
})
})
t.Run("Encoder", func(t *testing.T) {
var enc Encoder
checkWriteToken := func(t *testing.T, tok Token, wantErr error) {
t.Helper()
if err := enc.WriteToken(tok); !equalError(err, wantErr) {
t.Fatalf("Encoder.WriteToken = %v, want %v", err, wantErr)
}
}
checkWriteValue := func(t *testing.T, val Value, wantErr error) {
t.Helper()
if err := enc.WriteValue(val); !equalError(err, wantErr) {
t.Fatalf("Encoder.WriteValue = %v, want %v", err, wantErr)
}
}
wantErr := &SyntacticError{
ByteOffset: maxNestingDepth,
JSONPointer: Pointer(strings.Repeat("/0", maxNestingDepth)),
Err: errMaxDepth,
}
t.Run("Arrays/SingleValue", func(t *testing.T) {
enc.s.reset(enc.s.Buf[:0], nil)
checkWriteValue(t, maxArrays, wantErr)
checkWriteValue(t, trimArray(maxArrays), nil)
})
t.Run("Arrays/TokenThenValue", func(t *testing.T) {
enc.s.reset(enc.s.Buf[:0], nil)
checkWriteToken(t, BeginArray, nil)
checkWriteValue(t, trimArray(maxArrays), wantErr)
checkWriteValue(t, trimArray(trimArray(maxArrays)), nil)
checkWriteToken(t, EndArray, nil)
})
t.Run("Arrays/AllTokens", func(t *testing.T) {
enc.s.reset(enc.s.Buf[:0], nil)
for range maxNestingDepth {
checkWriteToken(t, BeginArray, nil)
}
checkWriteToken(t, BeginArray, wantErr)
for range maxNestingDepth {
checkWriteToken(t, EndArray, nil)
}
})
wantErr = &SyntacticError{
ByteOffset: maxNestingDepth * int64(len(`{"":`)),
JSONPointer: Pointer(strings.Repeat("/", maxNestingDepth)),
Err: errMaxDepth,
}
t.Run("Objects/SingleValue", func(t *testing.T) {
enc.s.reset(enc.s.Buf[:0], nil)
checkWriteValue(t, maxObjects, wantErr)
checkWriteValue(t, trimObject(maxObjects), nil)
})
t.Run("Objects/TokenThenValue", func(t *testing.T) {
enc.s.reset(enc.s.Buf[:0], nil)
checkWriteToken(t, BeginObject, nil)
checkWriteToken(t, String(""), nil)
checkWriteValue(t, trimObject(maxObjects), wantErr)
checkWriteValue(t, trimObject(trimObject(maxObjects)), nil)
checkWriteToken(t, EndObject, nil)
})
t.Run("Objects/AllTokens", func(t *testing.T) {
enc.s.reset(enc.s.Buf[:0], nil)
for range maxNestingDepth - 1 {
checkWriteToken(t, BeginObject, nil)
checkWriteToken(t, String(""), nil)
}
checkWriteToken(t, BeginObject, nil)
checkWriteToken(t, String(""), nil)
checkWriteToken(t, BeginObject, wantErr)
checkWriteToken(t, String(""), nil)
for range maxNestingDepth {
checkWriteToken(t, EndObject, nil)
}
})
})
}
// FaultyBuffer implements io.Reader and io.Writer.
// It may process fewer bytes than the provided buffer
// and may randomly return an error.
type FaultyBuffer struct {
B []byte
// MaxBytes is the maximum number of bytes read/written.
// A random number of bytes within [0, MaxBytes] are processed.
// A non-positive value is treated as infinity.
MaxBytes int
// MayError specifies whether to randomly provide this error.
// Even if an error is returned, no bytes are dropped.
MayError error
// Rand to use for pseudo-random behavior.
// If nil, it will be initialized with rand.NewSource(0).
Rand rand.Source
}
func (p *FaultyBuffer) Read(b []byte) (int, error) {
b = b[:copy(b[:p.mayTruncate(len(b))], p.B)]
p.B = p.B[len(b):]
if len(p.B) == 0 && (len(b) == 0 || p.randN(2) == 0) {
return len(b), io.EOF
}
return len(b), p.mayError()
}
func (p *FaultyBuffer) Write(b []byte) (int, error) {
b2 := b[:p.mayTruncate(len(b))]
p.B = append(p.B, b2...)
if len(b2) < len(b) {
return len(b2), io.ErrShortWrite
}
return len(b2), p.mayError()
}
// mayTruncate may return a value between [0, n].
func (p *FaultyBuffer) mayTruncate(n int) int {
if p.MaxBytes > 0 {
if n > p.MaxBytes {
n = p.MaxBytes
}
return p.randN(n + 1)
}
return n
}
// mayError may return a non-nil error.
func (p *FaultyBuffer) mayError() error {
if p.MayError != nil && p.randN(2) == 0 {
return p.MayError
}
return nil
}
func (p *FaultyBuffer) randN(n int) int {
if p.Rand == nil {
p.Rand = rand.NewSource(0)
}
return int(p.Rand.Int63() % int64(n))
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,116 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// Package jsontext implements syntactic processing of JSON
// as specified in RFC 4627, RFC 7159, RFC 7493, RFC 8259, and RFC 8785.
// JSON is a simple data interchange format that can represent
// primitive data types such as booleans, strings, and numbers,
// in addition to structured data types such as objects and arrays.
//
// This package (encoding/json/jsontext) is experimental,
// and not subject to the Go 1 compatibility promise.
// It only exists when building with the GOEXPERIMENT=jsonv2 environment variable set.
// Most users should use [encoding/json].
//
// The [Encoder] and [Decoder] types are used to encode or decode
// a stream of JSON tokens or values.
//
// # Tokens and Values
//
// A JSON token refers to the basic structural elements of JSON:
//
// - a JSON literal (i.e., null, true, or false)
// - a JSON string (e.g., "hello, world!")
// - a JSON number (e.g., 123.456)
// - a begin or end delimiter for a JSON object (i.e., '{' or '}')
// - a begin or end delimiter for a JSON array (i.e., '[' or ']')
//
// A JSON token is represented by the [Token] type in Go. Technically,
// there are two additional structural characters (i.e., ':' and ','),
// but there is no [Token] representation for them since their presence
// can be inferred by the structure of the JSON grammar itself.
// For example, there must always be an implicit colon between
// the name and value of a JSON object member.
//
// A JSON value refers to a complete unit of JSON data:
//
// - a JSON literal, string, or number
// - a JSON object (e.g., `{"name":"value"}`)
// - a JSON array (e.g., `[1,2,3,]`)
//
// A JSON value is represented by the [Value] type in Go and is a []byte
// containing the raw textual representation of the value. There is some overlap
// between tokens and values as both contain literals, strings, and numbers.
// However, only a value can represent the entirety of a JSON object or array.
//
// The [Encoder] and [Decoder] types contain methods to read or write the next
// [Token] or [Value] in a sequence. They maintain a state machine to validate
// whether the sequence of JSON tokens and/or values produces a valid JSON.
// [Options] may be passed to the [NewEncoder] or [NewDecoder] constructors
// to configure the syntactic behavior of encoding and decoding.
//
// # Terminology
//
// The terms "encode" and "decode" are used for syntactic functionality
// that is concerned with processing JSON based on its grammar, and
// the terms "marshal" and "unmarshal" are used for semantic functionality
// that determines the meaning of JSON values as Go values and vice-versa.
// This package (i.e., [jsontext]) deals with JSON at a syntactic layer,
// while [encoding/json/v2] deals with JSON at a semantic layer.
// The goal is to provide a clear distinction between functionality that
// is purely concerned with encoding versus that of marshaling.
// For example, one can directly encode a stream of JSON tokens without
// needing to marshal a concrete Go value representing them.
// Similarly, one can decode a stream of JSON tokens without
// needing to unmarshal them into a concrete Go value.
//
// This package uses JSON terminology when discussing JSON, which may differ
// from related concepts in Go or elsewhere in computing literature.
//
// - a JSON "object" refers to an unordered collection of name/value members.
// - a JSON "array" refers to an ordered sequence of elements.
// - a JSON "value" refers to either a literal (i.e., null, false, or true),
// string, number, object, or array.
//
// See RFC 8259 for more information.
//
// # Specifications
//
// Relevant specifications include RFC 4627, RFC 7159, RFC 7493, RFC 8259,
// and RFC 8785. Each RFC is generally a stricter subset of another RFC.
// In increasing order of strictness:
//
// - RFC 4627 and RFC 7159 do not require (but recommend) the use of UTF-8
// and also do not require (but recommend) that object names be unique.
// - RFC 8259 requires the use of UTF-8,
// but does not require (but recommends) that object names be unique.
// - RFC 7493 requires the use of UTF-8
// and also requires that object names be unique.
// - RFC 8785 defines a canonical representation. It requires the use of UTF-8
// and also requires that object names be unique and in a specific ordering.
// It specifies exactly how strings and numbers must be formatted.
//
// The primary difference between RFC 4627 and RFC 7159 is that the former
// restricted top-level values to only JSON objects and arrays, while
// RFC 7159 and subsequent RFCs permit top-level values to additionally be
// JSON nulls, booleans, strings, or numbers.
//
// By default, this package operates on RFC 7493, but can be configured
// to operate according to the other RFC specifications.
// RFC 7493 is a stricter subset of RFC 8259 and fully compliant with it.
// In particular, it makes specific choices about behavior that RFC 8259
// leaves as undefined in order to ensure greater interoperability.
//
// # Security Considerations
//
// See the "Security Considerations" section in [encoding/json/v2].
package jsontext
// requireKeyedLiterals can be embedded in a struct to require keyed literals.
type requireKeyedLiterals struct{}
// nonComparable can be embedded in a struct to prevent comparability.
type nonComparable [0]func()

View File

@@ -0,0 +1,972 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"io"
"math/bits"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonopts"
"encoding/json/internal/jsonwire"
)
// Encoder is a streaming encoder from raw JSON tokens and values.
// It is used to write a stream of top-level JSON values,
// each terminated with a newline character.
//
// [Encoder.WriteToken] and [Encoder.WriteValue] calls may be interleaved.
// For example, the following JSON value:
//
// {"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}}
//
// can be composed with the following calls (ignoring errors for brevity):
//
// e.WriteToken(BeginObject) // {
// e.WriteToken(String("name")) // "name"
// e.WriteToken(String("value")) // "value"
// e.WriteValue(Value(`"array"`)) // "array"
// e.WriteToken(BeginArray) // [
// e.WriteToken(Null) // null
// e.WriteToken(False) // false
// e.WriteValue(Value("true")) // true
// e.WriteToken(Float(3.14159)) // 3.14159
// e.WriteToken(EndArray) // ]
// e.WriteValue(Value(`"object"`)) // "object"
// e.WriteValue(Value(`{"k":"v"}`)) // {"k":"v"}
// e.WriteToken(EndObject) // }
//
// The above is one of many possible sequence of calls and
// may not represent the most sensible method to call for any given token/value.
// For example, it is probably more common to call [Encoder.WriteToken] with a string
// for object names.
type Encoder struct {
s encoderState
}
// encoderState is the low-level state of Encoder.
// It has exported fields and method for use by the "json" package.
type encoderState struct {
state
encodeBuffer
jsonopts.Struct
SeenPointers map[any]struct{} // only used when marshaling; identical to json.seenPointers
}
// encodeBuffer is a buffer split into 2 segments:
//
// - buf[0:len(buf)] // written (but unflushed) portion of the buffer
// - buf[len(buf):cap(buf)] // unused portion of the buffer
type encodeBuffer struct {
Buf []byte // may alias wr if it is a bytes.Buffer
// baseOffset is added to len(buf) to obtain the absolute offset
// relative to the start of io.Writer stream.
baseOffset int64
wr io.Writer
// maxValue is the approximate maximum Value size passed to WriteValue.
maxValue int
// availBuffer is the buffer returned by the AvailableBuffer method.
availBuffer []byte // always has zero length
// bufStats is statistics about buffer utilization.
// It is only used with pooled encoders in pools.go.
bufStats bufferStatistics
}
// NewEncoder constructs a new streaming encoder writing to w
// configured with the provided options.
// It flushes the internal buffer when the buffer is sufficiently full or
// when a top-level value has been written.
//
// If w is a [bytes.Buffer], then the encoder appends directly into the buffer
// without copying the contents from an intermediate buffer.
func NewEncoder(w io.Writer, opts ...Options) *Encoder {
e := new(Encoder)
e.Reset(w, opts...)
return e
}
// Reset resets an encoder such that it is writing afresh to w and
// configured with the provided options. Reset must not be called on
// a Encoder passed to the [encoding/json/v2.MarshalerTo.MarshalJSONTo] method
// or the [encoding/json/v2.MarshalToFunc] function.
func (e *Encoder) Reset(w io.Writer, opts ...Options) {
switch {
case e == nil:
panic("jsontext: invalid nil Encoder")
case w == nil:
panic("jsontext: invalid nil io.Writer")
case e.s.Flags.Get(jsonflags.WithinArshalCall):
panic("jsontext: cannot reset Encoder passed to json.MarshalerTo")
}
e.s.reset(nil, w, opts...)
}
func (e *encoderState) reset(b []byte, w io.Writer, opts ...Options) {
e.state.reset()
e.encodeBuffer = encodeBuffer{Buf: b, wr: w, bufStats: e.bufStats}
if bb, ok := w.(*bytes.Buffer); ok && bb != nil {
e.Buf = bb.AvailableBuffer() // alias the unused buffer of bb
}
opts2 := jsonopts.Struct{} // avoid mutating e.Struct in case it is part of opts
opts2.Join(opts...)
e.Struct = opts2
if e.Flags.Get(jsonflags.Multiline) {
if !e.Flags.Has(jsonflags.SpaceAfterColon) {
e.Flags.Set(jsonflags.SpaceAfterColon | 1)
}
if !e.Flags.Has(jsonflags.SpaceAfterComma) {
e.Flags.Set(jsonflags.SpaceAfterComma | 0)
}
if !e.Flags.Has(jsonflags.Indent) {
e.Flags.Set(jsonflags.Indent | 1)
e.Indent = "\t"
}
}
}
// Options returns the options used to construct the decoder and
// may additionally contain semantic options passed to a
// [encoding/json/v2.MarshalEncode] call.
//
// If operating within
// a [encoding/json/v2.MarshalerTo.MarshalJSONTo] method call or
// a [encoding/json/v2.MarshalToFunc] function call,
// then the returned options are only valid within the call.
func (e *Encoder) Options() Options {
return &e.s.Struct
}
// NeedFlush determines whether to flush at this point.
func (e *encoderState) NeedFlush() bool {
// NOTE: This function is carefully written to be inlinable.
// Avoid flushing if e.wr is nil since there is no underlying writer.
// Flush if less than 25% of the capacity remains.
// Flushing at some constant fraction ensures that the buffer stops growing
// so long as the largest Token or Value fits within that unused capacity.
return e.wr != nil && (e.Tokens.Depth() == 1 || len(e.Buf) > 3*cap(e.Buf)/4)
}
// Flush flushes the buffer to the underlying io.Writer.
// It may append a trailing newline after the top-level value.
func (e *encoderState) Flush() error {
if e.wr == nil || e.avoidFlush() {
return nil
}
// In streaming mode, always emit a newline after the top-level value.
if e.Tokens.Depth() == 1 && !e.Flags.Get(jsonflags.OmitTopLevelNewline) {
e.Buf = append(e.Buf, '\n')
}
// Inform objectNameStack that we are about to flush the buffer content.
e.Names.copyQuotedBuffer(e.Buf)
// Specialize bytes.Buffer for better performance.
if bb, ok := e.wr.(*bytes.Buffer); ok {
// If e.buf already aliases the internal buffer of bb,
// then the Write call simply increments the internal offset,
// otherwise Write operates as expected.
// See https://go.dev/issue/42986.
n, _ := bb.Write(e.Buf) // never fails unless bb is nil
e.baseOffset += int64(n)
// If the internal buffer of bytes.Buffer is too small,
// append operations elsewhere in the Encoder may grow the buffer.
// This would be semantically correct, but hurts performance.
// As such, ensure 25% of the current length is always available
// to reduce the probability that other appends must allocate.
if avail := bb.Available(); avail < bb.Len()/4 {
bb.Grow(avail + 1)
}
e.Buf = bb.AvailableBuffer()
return nil
}
// Flush the internal buffer to the underlying io.Writer.
n, err := e.wr.Write(e.Buf)
e.baseOffset += int64(n)
if err != nil {
// In the event of an error, preserve the unflushed portion.
// Thus, write errors aren't fatal so long as the io.Writer
// maintains consistent state after errors.
if n > 0 {
e.Buf = e.Buf[:copy(e.Buf, e.Buf[n:])]
}
return &ioError{action: "write", err: err}
}
e.Buf = e.Buf[:0]
// Check whether to grow the buffer.
// Note that cap(e.buf) may already exceed maxBufferSize since
// an append elsewhere already grew it to store a large token.
const maxBufferSize = 4 << 10
const growthSizeFactor = 2 // higher value is faster
const growthRateFactor = 2 // higher value is slower
// By default, grow if below the maximum buffer size.
grow := cap(e.Buf) <= maxBufferSize/growthSizeFactor
// Growing can be expensive, so only grow
// if a sufficient number of bytes have been processed.
grow = grow && int64(cap(e.Buf)) < e.previousOffsetEnd()/growthRateFactor
if grow {
e.Buf = make([]byte, 0, cap(e.Buf)*growthSizeFactor)
}
return nil
}
func (d *encodeBuffer) offsetAt(pos int) int64 { return d.baseOffset + int64(pos) }
func (e *encodeBuffer) previousOffsetEnd() int64 { return e.baseOffset + int64(len(e.Buf)) }
func (e *encodeBuffer) unflushedBuffer() []byte { return e.Buf }
// avoidFlush indicates whether to avoid flushing to ensure there is always
// enough in the buffer to unwrite the last object member if it were empty.
func (e *encoderState) avoidFlush() bool {
switch {
case e.Tokens.Last.Length() == 0:
// Never flush after BeginObject or BeginArray since we don't know yet
// if the object or array will end up being empty.
return true
case e.Tokens.Last.needObjectValue():
// Never flush before the object value since we don't know yet
// if the object value will end up being empty.
return true
case e.Tokens.Last.NeedObjectName() && len(e.Buf) >= 2:
// Never flush after the object value if it does turn out to be empty.
switch string(e.Buf[len(e.Buf)-2:]) {
case `ll`, `""`, `{}`, `[]`: // last two bytes of every empty value
return true
}
}
return false
}
// UnwriteEmptyObjectMember unwrites the last object member if it is empty
// and reports whether it performed an unwrite operation.
func (e *encoderState) UnwriteEmptyObjectMember(prevName *string) bool {
if last := e.Tokens.Last; !last.isObject() || !last.NeedObjectName() || last.Length() == 0 {
panic("BUG: must be called on an object after writing a value")
}
// The flushing logic is modified to never flush a trailing empty value.
// The encoder never writes trailing whitespace eagerly.
b := e.unflushedBuffer()
// Detect whether the last value was empty.
var n int
if len(b) >= 3 {
switch string(b[len(b)-2:]) {
case "ll": // last two bytes of `null`
n = len(`null`)
case `""`:
// It is possible for a non-empty string to have `""` as a suffix
// if the second to the last quote was escaped.
if b[len(b)-3] == '\\' {
return false // e.g., `"\""` is not empty
}
n = len(`""`)
case `{}`:
n = len(`{}`)
case `[]`:
n = len(`[]`)
}
}
if n == 0 {
return false
}
// Unwrite the value, whitespace, colon, name, whitespace, and comma.
b = b[:len(b)-n]
b = jsonwire.TrimSuffixWhitespace(b)
b = jsonwire.TrimSuffixByte(b, ':')
b = jsonwire.TrimSuffixString(b)
b = jsonwire.TrimSuffixWhitespace(b)
b = jsonwire.TrimSuffixByte(b, ',')
e.Buf = b // store back truncated unflushed buffer
// Undo state changes.
e.Tokens.Last.decrement() // for object member value
e.Tokens.Last.decrement() // for object member name
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if e.Tokens.Last.isActiveNamespace() {
e.Namespaces.Last().removeLast()
}
}
e.Names.clearLast()
if prevName != nil {
e.Names.copyQuotedBuffer(e.Buf) // required by objectNameStack.replaceLastUnquotedName
e.Names.replaceLastUnquotedName(*prevName)
}
return true
}
// UnwriteOnlyObjectMemberName unwrites the only object member name
// and returns the unquoted name.
func (e *encoderState) UnwriteOnlyObjectMemberName() string {
if last := e.Tokens.Last; !last.isObject() || last.Length() != 1 {
panic("BUG: must be called on an object after writing first name")
}
// Unwrite the name and whitespace.
b := jsonwire.TrimSuffixString(e.Buf)
isVerbatim := bytes.IndexByte(e.Buf[len(b):], '\\') < 0
name := string(jsonwire.UnquoteMayCopy(e.Buf[len(b):], isVerbatim))
e.Buf = jsonwire.TrimSuffixWhitespace(b)
// Undo state changes.
e.Tokens.Last.decrement()
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if e.Tokens.Last.isActiveNamespace() {
e.Namespaces.Last().removeLast()
}
}
e.Names.clearLast()
return name
}
// WriteToken writes the next token and advances the internal write offset.
//
// The provided token kind must be consistent with the JSON grammar.
// For example, it is an error to provide a number when the encoder
// is expecting an object name (which is always a string), or
// to provide an end object delimiter when the encoder is finishing an array.
// If the provided token is invalid, then it reports a [SyntacticError] and
// the internal state remains unchanged. The offset reported
// in [SyntacticError] will be relative to the [Encoder.OutputOffset].
func (e *Encoder) WriteToken(t Token) error {
return e.s.WriteToken(t)
}
func (e *encoderState) WriteToken(t Token) error {
k := t.Kind()
b := e.Buf // use local variable to avoid mutating e in case of error
// Append any delimiters or optional whitespace.
b = e.Tokens.MayAppendDelim(b, k)
if e.Flags.Get(jsonflags.AnyWhitespace) {
b = e.appendWhitespace(b, k)
}
pos := len(b) // offset before the token
// Append the token to the output and to the state machine.
var err error
switch k {
case 'n':
b = append(b, "null"...)
err = e.Tokens.appendLiteral()
case 'f':
b = append(b, "false"...)
err = e.Tokens.appendLiteral()
case 't':
b = append(b, "true"...)
err = e.Tokens.appendLiteral()
case '"':
if b, err = t.appendString(b, &e.Flags); err != nil {
break
}
if e.Tokens.Last.NeedObjectName() {
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if !e.Tokens.Last.isValidNamespace() {
err = errInvalidNamespace
break
}
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) {
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
break
}
}
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
}
err = e.Tokens.appendString()
case '0':
if b, err = t.appendNumber(b, &e.Flags); err != nil {
break
}
err = e.Tokens.appendNumber()
case '{':
b = append(b, '{')
if err = e.Tokens.pushObject(); err != nil {
break
}
e.Names.push()
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
e.Namespaces.push()
}
case '}':
b = append(b, '}')
if err = e.Tokens.popObject(); err != nil {
break
}
e.Names.pop()
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
e.Namespaces.pop()
}
case '[':
b = append(b, '[')
err = e.Tokens.pushArray()
case ']':
b = append(b, ']')
err = e.Tokens.popArray()
default:
err = errInvalidToken
}
if err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
// Finish off the buffer and store it back into e.
e.Buf = b
if e.NeedFlush() {
return e.Flush()
}
return nil
}
// AppendRaw appends either a raw string (without double quotes) or number.
// Specify safeASCII if the string output is guaranteed to be ASCII
// without any characters (including '<', '>', and '&') that need escaping,
// otherwise this will validate whether the string needs escaping.
// The appended bytes for a JSON number must be valid.
//
// This is a specialized implementation of Encoder.WriteValue
// that allows appending directly into the buffer.
// It is only called from marshal logic in the "json" package.
func (e *encoderState) AppendRaw(k Kind, safeASCII bool, appendFn func([]byte) ([]byte, error)) error {
b := e.Buf // use local variable to avoid mutating e in case of error
// Append any delimiters or optional whitespace.
b = e.Tokens.MayAppendDelim(b, k)
if e.Flags.Get(jsonflags.AnyWhitespace) {
b = e.appendWhitespace(b, k)
}
pos := len(b) // offset before the token
var err error
switch k {
case '"':
// Append directly into the encoder buffer by assuming that
// most of the time none of the characters need escaping.
b = append(b, '"')
if b, err = appendFn(b); err != nil {
return err
}
b = append(b, '"')
// Check whether we need to escape the string and if necessary
// copy it to a scratch buffer and then escape it back.
isVerbatim := safeASCII || !jsonwire.NeedEscape(b[pos+len(`"`):len(b)-len(`"`)])
if !isVerbatim {
var err error
b2 := append(e.availBuffer, b[pos+len(`"`):len(b)-len(`"`)]...)
b, err = jsonwire.AppendQuote(b[:pos], string(b2), &e.Flags)
e.availBuffer = b2[:0]
if err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
}
// Update the state machine.
if e.Tokens.Last.NeedObjectName() {
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if !e.Tokens.Last.isValidNamespace() {
return wrapSyntacticError(e, err, pos, +1)
}
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], isVerbatim) {
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
return wrapSyntacticError(e, err, pos, +1)
}
}
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
}
if err := e.Tokens.appendString(); err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
case '0':
if b, err = appendFn(b); err != nil {
return err
}
if err := e.Tokens.appendNumber(); err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
default:
panic("BUG: invalid kind")
}
// Finish off the buffer and store it back into e.
e.Buf = b
if e.NeedFlush() {
return e.Flush()
}
return nil
}
// WriteValue writes the next raw value and advances the internal write offset.
// The Encoder does not simply copy the provided value verbatim, but
// parses it to ensure that it is syntactically valid and reformats it
// according to how the Encoder is configured to format whitespace and strings.
// If [AllowInvalidUTF8] is specified, then any invalid UTF-8 is mangled
// as the Unicode replacement character, U+FFFD.
//
// The provided value kind must be consistent with the JSON grammar
// (see examples on [Encoder.WriteToken]). If the provided value is invalid,
// then it reports a [SyntacticError] and the internal state remains unchanged.
// The offset reported in [SyntacticError] will be relative to the
// [Encoder.OutputOffset] plus the offset into v of any encountered syntax error.
func (e *Encoder) WriteValue(v Value) error {
return e.s.WriteValue(v)
}
func (e *encoderState) WriteValue(v Value) error {
e.maxValue |= len(v) // bitwise OR is a fast approximation of max
k := v.Kind()
b := e.Buf // use local variable to avoid mutating e in case of error
// Append any delimiters or optional whitespace.
b = e.Tokens.MayAppendDelim(b, k)
if e.Flags.Get(jsonflags.AnyWhitespace) {
b = e.appendWhitespace(b, k)
}
pos := len(b) // offset before the value
// Append the value the output.
var n int
n += jsonwire.ConsumeWhitespace(v[n:])
b, m, err := e.reformatValue(b, v[n:], e.Tokens.Depth())
if err != nil {
return wrapSyntacticError(e, err, pos+n+m, +1)
}
n += m
n += jsonwire.ConsumeWhitespace(v[n:])
if len(v) > n {
err = jsonwire.NewInvalidCharacterError(v[n:], "after top-level value")
return wrapSyntacticError(e, err, pos+n, 0)
}
// Append the kind to the state machine.
switch k {
case 'n', 'f', 't':
err = e.Tokens.appendLiteral()
case '"':
if e.Tokens.Last.NeedObjectName() {
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if !e.Tokens.Last.isValidNamespace() {
err = errInvalidNamespace
break
}
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) {
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
break
}
}
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
}
err = e.Tokens.appendString()
case '0':
err = e.Tokens.appendNumber()
case '{':
if err = e.Tokens.pushObject(); err != nil {
break
}
if err = e.Tokens.popObject(); err != nil {
panic("BUG: popObject should never fail immediately after pushObject: " + err.Error())
}
if e.Flags.Get(jsonflags.ReorderRawObjects) {
mustReorderObjects(b[pos:])
}
case '[':
if err = e.Tokens.pushArray(); err != nil {
break
}
if err = e.Tokens.popArray(); err != nil {
panic("BUG: popArray should never fail immediately after pushArray: " + err.Error())
}
if e.Flags.Get(jsonflags.ReorderRawObjects) {
mustReorderObjects(b[pos:])
}
}
if err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
// Finish off the buffer and store it back into e.
e.Buf = b
if e.NeedFlush() {
return e.Flush()
}
return nil
}
// CountNextDelimWhitespace counts the number of bytes of delimiter and
// whitespace bytes assuming the upcoming token is a JSON value.
// This method is used for error reporting at the semantic layer.
func (e *encoderState) CountNextDelimWhitespace() (n int) {
const next = Kind('"') // arbitrary kind as next JSON value
delim := e.Tokens.needDelim(next)
if delim > 0 {
n += len(",") | len(":")
}
if delim == ':' {
if e.Flags.Get(jsonflags.SpaceAfterColon) {
n += len(" ")
}
} else {
if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) {
n += len(" ")
}
if e.Flags.Get(jsonflags.Multiline) {
if m := e.Tokens.NeedIndent(next); m > 0 {
n += len("\n") + len(e.IndentPrefix) + (m-1)*len(e.Indent)
}
}
}
return n
}
// appendWhitespace appends whitespace that immediately precedes the next token.
func (e *encoderState) appendWhitespace(b []byte, next Kind) []byte {
if delim := e.Tokens.needDelim(next); delim == ':' {
if e.Flags.Get(jsonflags.SpaceAfterColon) {
b = append(b, ' ')
}
} else {
if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) {
b = append(b, ' ')
}
if e.Flags.Get(jsonflags.Multiline) {
b = e.AppendIndent(b, e.Tokens.NeedIndent(next))
}
}
return b
}
// AppendIndent appends the appropriate number of indentation characters
// for the current nested level, n.
func (e *encoderState) AppendIndent(b []byte, n int) []byte {
if n == 0 {
return b
}
b = append(b, '\n')
b = append(b, e.IndentPrefix...)
for ; n > 1; n-- {
b = append(b, e.Indent...)
}
return b
}
// reformatValue parses a JSON value from the start of src and
// appends it to the end of dst, reformatting whitespace and strings as needed.
// It returns the extended dst buffer and the number of consumed input bytes.
func (e *encoderState) reformatValue(dst []byte, src Value, depth int) ([]byte, int, error) {
// TODO: Should this update ValueFlags as input?
if len(src) == 0 {
return dst, 0, io.ErrUnexpectedEOF
}
switch k := Kind(src[0]).normalize(); k {
case 'n':
if jsonwire.ConsumeNull(src) == 0 {
n, err := jsonwire.ConsumeLiteral(src, "null")
return dst, n, err
}
return append(dst, "null"...), len("null"), nil
case 'f':
if jsonwire.ConsumeFalse(src) == 0 {
n, err := jsonwire.ConsumeLiteral(src, "false")
return dst, n, err
}
return append(dst, "false"...), len("false"), nil
case 't':
if jsonwire.ConsumeTrue(src) == 0 {
n, err := jsonwire.ConsumeLiteral(src, "true")
return dst, n, err
}
return append(dst, "true"...), len("true"), nil
case '"':
if n := jsonwire.ConsumeSimpleString(src); n > 0 {
dst = append(dst, src[:n]...) // copy simple strings verbatim
return dst, n, nil
}
return jsonwire.ReformatString(dst, src, &e.Flags)
case '0':
if n := jsonwire.ConsumeSimpleNumber(src); n > 0 && !e.Flags.Get(jsonflags.CanonicalizeNumbers) {
dst = append(dst, src[:n]...) // copy simple numbers verbatim
return dst, n, nil
}
return jsonwire.ReformatNumber(dst, src, &e.Flags)
case '{':
return e.reformatObject(dst, src, depth)
case '[':
return e.reformatArray(dst, src, depth)
default:
return dst, 0, jsonwire.NewInvalidCharacterError(src, "at start of value")
}
}
// reformatObject parses a JSON object from the start of src and
// appends it to the end of src, reformatting whitespace and strings as needed.
// It returns the extended dst buffer and the number of consumed input bytes.
func (e *encoderState) reformatObject(dst []byte, src Value, depth int) ([]byte, int, error) {
// Append object begin.
if len(src) == 0 || src[0] != '{' {
panic("BUG: reformatObject must be called with a buffer that starts with '{'")
} else if depth == maxNestingDepth+1 {
return dst, 0, errMaxDepth
}
dst = append(dst, '{')
n := len("{")
// Append (possible) object end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
if src[n] == '}' {
dst = append(dst, '}')
n += len("}")
return dst, n, nil
}
var err error
var names *objectNamespace
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
e.Namespaces.push()
defer e.Namespaces.pop()
names = e.Namespaces.Last()
}
depth++
for {
// Append optional newline and indentation.
if e.Flags.Get(jsonflags.Multiline) {
dst = e.AppendIndent(dst, depth)
}
// Append object name.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
m := jsonwire.ConsumeSimpleString(src[n:])
isVerbatim := m > 0
if isVerbatim {
dst = append(dst, src[n:n+m]...)
} else {
dst, m, err = jsonwire.ReformatString(dst, src[n:], &e.Flags)
if err != nil {
return dst, n + m, err
}
}
quotedName := src[n : n+m]
if !e.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(quotedName, isVerbatim) {
return dst, n, wrapWithObjectName(ErrDuplicateName, quotedName)
}
n += m
// Append colon.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, wrapWithObjectName(io.ErrUnexpectedEOF, quotedName)
}
if src[n] != ':' {
err = jsonwire.NewInvalidCharacterError(src[n:], "after object name (expecting ':')")
return dst, n, wrapWithObjectName(err, quotedName)
}
dst = append(dst, ':')
n += len(":")
if e.Flags.Get(jsonflags.SpaceAfterColon) {
dst = append(dst, ' ')
}
// Append object value.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, wrapWithObjectName(io.ErrUnexpectedEOF, quotedName)
}
dst, m, err = e.reformatValue(dst, src[n:], depth)
if err != nil {
return dst, n + m, wrapWithObjectName(err, quotedName)
}
n += m
// Append comma or object end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
switch src[n] {
case ',':
dst = append(dst, ',')
if e.Flags.Get(jsonflags.SpaceAfterComma) {
dst = append(dst, ' ')
}
n += len(",")
continue
case '}':
if e.Flags.Get(jsonflags.Multiline) {
dst = e.AppendIndent(dst, depth-1)
}
dst = append(dst, '}')
n += len("}")
return dst, n, nil
default:
return dst, n, jsonwire.NewInvalidCharacterError(src[n:], "after object value (expecting ',' or '}')")
}
}
}
// reformatArray parses a JSON array from the start of src and
// appends it to the end of dst, reformatting whitespace and strings as needed.
// It returns the extended dst buffer and the number of consumed input bytes.
func (e *encoderState) reformatArray(dst []byte, src Value, depth int) ([]byte, int, error) {
// Append array begin.
if len(src) == 0 || src[0] != '[' {
panic("BUG: reformatArray must be called with a buffer that starts with '['")
} else if depth == maxNestingDepth+1 {
return dst, 0, errMaxDepth
}
dst = append(dst, '[')
n := len("[")
// Append (possible) array end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
if src[n] == ']' {
dst = append(dst, ']')
n += len("]")
return dst, n, nil
}
var idx int64
var err error
depth++
for {
// Append optional newline and indentation.
if e.Flags.Get(jsonflags.Multiline) {
dst = e.AppendIndent(dst, depth)
}
// Append array value.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
var m int
dst, m, err = e.reformatValue(dst, src[n:], depth)
if err != nil {
return dst, n + m, wrapWithArrayIndex(err, idx)
}
n += m
// Append comma or array end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
switch src[n] {
case ',':
dst = append(dst, ',')
if e.Flags.Get(jsonflags.SpaceAfterComma) {
dst = append(dst, ' ')
}
n += len(",")
idx++
continue
case ']':
if e.Flags.Get(jsonflags.Multiline) {
dst = e.AppendIndent(dst, depth-1)
}
dst = append(dst, ']')
n += len("]")
return dst, n, nil
default:
return dst, n, jsonwire.NewInvalidCharacterError(src[n:], "after array value (expecting ',' or ']')")
}
}
}
// OutputOffset returns the current output byte offset. It gives the location
// of the next byte immediately after the most recently written token or value.
// The number of bytes actually written to the underlying [io.Writer] may be less
// than this offset due to internal buffering effects.
func (e *Encoder) OutputOffset() int64 {
return e.s.previousOffsetEnd()
}
// AvailableBuffer returns a zero-length buffer with a possible non-zero capacity.
// This buffer is intended to be used to populate a [Value]
// being passed to an immediately succeeding [Encoder.WriteValue] call.
//
// Example usage:
//
// b := d.AvailableBuffer()
// b = append(b, '"')
// b = appendString(b, v) // append the string formatting of v
// b = append(b, '"')
// ... := d.WriteValue(b)
//
// It is the user's responsibility to ensure that the value is valid JSON.
func (e *Encoder) AvailableBuffer() []byte {
// NOTE: We don't return e.buf[len(e.buf):cap(e.buf)] since WriteValue would
// need to take special care to avoid mangling the data while reformatting.
// WriteValue can't easily identify whether the input Value aliases e.buf
// without using unsafe.Pointer. Thus, we just return a different buffer.
// Should this ever alias e.buf, we need to consider how it operates with
// the specialized performance optimization for bytes.Buffer.
n := 1 << bits.Len(uint(e.s.maxValue|63)) // fast approximation for max length
if cap(e.s.availBuffer) < n {
e.s.availBuffer = make([]byte, 0, n)
}
return e.s.availBuffer
}
// StackDepth returns the depth of the state machine for written JSON data.
// Each level on the stack represents a nested JSON object or array.
// It is incremented whenever an [BeginObject] or [BeginArray] token is encountered
// and decremented whenever an [EndObject] or [EndArray] token is encountered.
// The depth is zero-indexed, where zero represents the top-level JSON value.
func (e *Encoder) StackDepth() int {
// NOTE: Keep in sync with Decoder.StackDepth.
return e.s.Tokens.Depth() - 1
}
// StackIndex returns information about the specified stack level.
// It must be a number between 0 and [Encoder.StackDepth], inclusive.
// For each level, it reports the kind:
//
// - 0 for a level of zero,
// - '{' for a level representing a JSON object, and
// - '[' for a level representing a JSON array.
//
// It also reports the length of that JSON object or array.
// Each name and value in a JSON object is counted separately,
// so the effective number of members would be half the length.
// A complete JSON object must have an even length.
func (e *Encoder) StackIndex(i int) (Kind, int64) {
// NOTE: Keep in sync with Decoder.StackIndex.
switch s := e.s.Tokens.index(i); {
case i > 0 && s.isObject():
return '{', s.Length()
case i > 0 && s.isArray():
return '[', s.Length()
default:
return 0, s.Length()
}
}
// StackPointer returns a JSON Pointer (RFC 6901) to the most recently written value.
func (e *Encoder) StackPointer() Pointer {
return Pointer(e.s.AppendStackPointer(nil, -1))
}
func (e *encoderState) AppendStackPointer(b []byte, where int) []byte {
e.Names.copyQuotedBuffer(e.Buf)
return e.state.appendStackPointer(b, where)
}

View File

@@ -0,0 +1,737 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"errors"
"io"
"path"
"slices"
"testing"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsontest"
"encoding/json/internal/jsonwire"
)
// TestEncoder tests whether we can produce JSON with either tokens or raw values.
func TestEncoder(t *testing.T) {
for _, td := range coderTestdata {
for _, formatName := range []string{"Compact", "Indented"} {
for _, typeName := range []string{"Token", "Value", "TokenDelims"} {
t.Run(path.Join(td.name.Name, typeName, formatName), func(t *testing.T) {
testEncoder(t, td.name.Where, formatName, typeName, td)
})
}
}
}
}
func testEncoder(t *testing.T, where jsontest.CasePos, formatName, typeName string, td coderTestdataEntry) {
var want string
var opts []Options
dst := new(bytes.Buffer)
opts = append(opts, jsonflags.OmitTopLevelNewline|1)
want = td.outCompacted
switch formatName {
case "Indented":
opts = append(opts, Multiline(true))
opts = append(opts, WithIndentPrefix("\t"))
opts = append(opts, WithIndent(" "))
if td.outIndented != "" {
want = td.outIndented
}
}
enc := NewEncoder(dst, opts...)
switch typeName {
case "Token":
var pointers []Pointer
for _, tok := range td.tokens {
if err := enc.WriteToken(tok); err != nil {
t.Fatalf("%s: Encoder.WriteToken error: %v", where, err)
}
if td.pointers != nil {
pointers = append(pointers, enc.StackPointer())
}
}
if !slices.Equal(pointers, td.pointers) {
t.Fatalf("%s: pointers mismatch:\ngot %q\nwant %q", where, pointers, td.pointers)
}
case "Value":
if err := enc.WriteValue(Value(td.in)); err != nil {
t.Fatalf("%s: Encoder.WriteValue error: %v", where, err)
}
case "TokenDelims":
// Use WriteToken for object/array delimiters, WriteValue otherwise.
for _, tok := range td.tokens {
switch tok.Kind() {
case '{', '}', '[', ']':
if err := enc.WriteToken(tok); err != nil {
t.Fatalf("%s: Encoder.WriteToken error: %v", where, err)
}
default:
val := Value(tok.String())
if tok.Kind() == '"' {
val, _ = jsonwire.AppendQuote(nil, tok.String(), &jsonflags.Flags{})
}
if err := enc.WriteValue(val); err != nil {
t.Fatalf("%s: Encoder.WriteValue error: %v", where, err)
}
}
}
}
got := dst.String()
if got != want {
t.Errorf("%s: output mismatch:\ngot %q\nwant %q", where, got, want)
}
}
// TestFaultyEncoder tests that temporary I/O errors are not fatal.
func TestFaultyEncoder(t *testing.T) {
for _, td := range coderTestdata {
for _, typeName := range []string{"Token", "Value"} {
t.Run(path.Join(td.name.Name, typeName), func(t *testing.T) {
testFaultyEncoder(t, td.name.Where, typeName, td)
})
}
}
}
func testFaultyEncoder(t *testing.T, where jsontest.CasePos, typeName string, td coderTestdataEntry) {
b := &FaultyBuffer{
MaxBytes: 1,
MayError: io.ErrShortWrite,
}
// Write all the tokens.
// Even if the underlying io.Writer may be faulty,
// writing a valid token or value is guaranteed to at least
// be appended to the internal buffer.
// In other words, syntactic errors occur before I/O errors.
enc := NewEncoder(b)
switch typeName {
case "Token":
for i, tok := range td.tokens {
err := enc.WriteToken(tok)
if err != nil && !errors.Is(err, io.ErrShortWrite) {
t.Fatalf("%s: %d: Encoder.WriteToken error: %v", where, i, err)
}
}
case "Value":
err := enc.WriteValue(Value(td.in))
if err != nil && !errors.Is(err, io.ErrShortWrite) {
t.Fatalf("%s: Encoder.WriteValue error: %v", where, err)
}
}
gotOutput := string(append(b.B, enc.s.unflushedBuffer()...))
wantOutput := td.outCompacted + "\n"
if gotOutput != wantOutput {
t.Fatalf("%s: output mismatch:\ngot %s\nwant %s", where, gotOutput, wantOutput)
}
}
type encoderMethodCall struct {
in tokOrVal
wantErr error
wantPointer Pointer
}
var encoderErrorTestdata = []struct {
name jsontest.CaseName
opts []Options
calls []encoderMethodCall
wantOut string
}{{
name: jsontest.Name("InvalidToken"),
calls: []encoderMethodCall{
{zeroToken, E(errInvalidToken), ""},
},
}, {
name: jsontest.Name("InvalidValue"),
calls: []encoderMethodCall{
{Value(`#`), newInvalidCharacterError("#", "at start of value"), ""},
},
}, {
name: jsontest.Name("InvalidValue/DoubleZero"),
calls: []encoderMethodCall{
{Value(`00`), newInvalidCharacterError("0", "after top-level value").withPos(`0`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedValue"),
calls: []encoderMethodCall{
{zeroValue, E(io.ErrUnexpectedEOF).withPos("", ""), ""},
},
}, {
name: jsontest.Name("TruncatedNull"),
calls: []encoderMethodCall{
{Value(`nul`), E(io.ErrUnexpectedEOF).withPos("nul", ""), ""},
},
}, {
name: jsontest.Name("InvalidNull"),
calls: []encoderMethodCall{
{Value(`nulL`), newInvalidCharacterError("L", "in literal null (expecting 'l')").withPos(`nul`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedFalse"),
calls: []encoderMethodCall{
{Value(`fals`), E(io.ErrUnexpectedEOF).withPos("fals", ""), ""},
},
}, {
name: jsontest.Name("InvalidFalse"),
calls: []encoderMethodCall{
{Value(`falsE`), newInvalidCharacterError("E", "in literal false (expecting 'e')").withPos(`fals`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedTrue"),
calls: []encoderMethodCall{
{Value(`tru`), E(io.ErrUnexpectedEOF).withPos(`tru`, ""), ""},
},
}, {
name: jsontest.Name("InvalidTrue"),
calls: []encoderMethodCall{
{Value(`truE`), newInvalidCharacterError("E", "in literal true (expecting 'e')").withPos(`tru`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedString"),
calls: []encoderMethodCall{
{Value(`"star`), E(io.ErrUnexpectedEOF).withPos(`"star`, ""), ""},
},
}, {
name: jsontest.Name("InvalidString"),
calls: []encoderMethodCall{
{Value(`"ok` + "\x00"), newInvalidCharacterError("\x00", `in string (expecting non-control character)`).withPos(`"ok`, ""), ""},
},
}, {
name: jsontest.Name("ValidString/AllowInvalidUTF8/Token"),
opts: []Options{AllowInvalidUTF8(true)},
calls: []encoderMethodCall{
{String("living\xde\xad\xbe\xef"), nil, ""},
},
wantOut: "\"living\xde\xad\ufffd\ufffd\"\n",
}, {
name: jsontest.Name("ValidString/AllowInvalidUTF8/Value"),
opts: []Options{AllowInvalidUTF8(true)},
calls: []encoderMethodCall{
{Value("\"living\xde\xad\xbe\xef\""), nil, ""},
},
wantOut: "\"living\xde\xad\ufffd\ufffd\"\n",
}, {
name: jsontest.Name("InvalidString/RejectInvalidUTF8"),
opts: []Options{AllowInvalidUTF8(false)},
calls: []encoderMethodCall{
{String("living\xde\xad\xbe\xef"), E(jsonwire.ErrInvalidUTF8), ""},
{Value("\"living\xde\xad\xbe\xef\""), E(jsonwire.ErrInvalidUTF8).withPos("\"living\xde\xad", ""), ""},
{BeginObject, nil, ""},
{String("name"), nil, ""},
{BeginArray, nil, ""},
{String("living\xde\xad\xbe\xef"), E(jsonwire.ErrInvalidUTF8).withPos(`{"name":[`, "/name/0"), ""},
{Value("\"living\xde\xad\xbe\xef\""), E(jsonwire.ErrInvalidUTF8).withPos("{\"name\":[\"living\xde\xad", "/name/0"), ""},
},
wantOut: `{"name":[`,
}, {
name: jsontest.Name("TruncatedNumber"),
calls: []encoderMethodCall{
{Value(`0.`), E(io.ErrUnexpectedEOF).withPos("0", ""), ""},
},
}, {
name: jsontest.Name("InvalidNumber"),
calls: []encoderMethodCall{
{Value(`0.e`), newInvalidCharacterError("e", "in number (expecting digit)").withPos(`0.`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedObject/AfterStart"),
calls: []encoderMethodCall{
{Value(`{`), E(io.ErrUnexpectedEOF).withPos("{", ""), ""},
},
}, {
name: jsontest.Name("TruncatedObject/AfterName"),
calls: []encoderMethodCall{
{Value(`{"X"`), E(io.ErrUnexpectedEOF).withPos(`{"X"`, "/X"), ""},
},
}, {
name: jsontest.Name("TruncatedObject/AfterColon"),
calls: []encoderMethodCall{
{Value(`{"X":`), E(io.ErrUnexpectedEOF).withPos(`{"X":`, "/X"), ""},
},
}, {
name: jsontest.Name("TruncatedObject/AfterValue"),
calls: []encoderMethodCall{
{Value(`{"0":0`), E(io.ErrUnexpectedEOF).withPos(`{"0":0`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedObject/AfterComma"),
calls: []encoderMethodCall{
{Value(`{"0":0,`), E(io.ErrUnexpectedEOF).withPos(`{"0":0,`, ""), ""},
},
}, {
name: jsontest.Name("InvalidObject/MissingColon"),
calls: []encoderMethodCall{
{Value(` { "fizz" "buzz" } `), newInvalidCharacterError("\"", "after object name (expecting ':')").withPos(` { "fizz" `, "/fizz"), ""},
{Value(` { "fizz" , "buzz" } `), newInvalidCharacterError(",", "after object name (expecting ':')").withPos(` { "fizz" `, "/fizz"), ""},
},
}, {
name: jsontest.Name("InvalidObject/MissingComma"),
calls: []encoderMethodCall{
{Value(` { "fizz" : "buzz" "gazz" } `), newInvalidCharacterError("\"", "after object value (expecting ',' or '}')").withPos(` { "fizz" : "buzz" `, ""), ""},
{Value(` { "fizz" : "buzz" : "gazz" } `), newInvalidCharacterError(":", "after object value (expecting ',' or '}')").withPos(` { "fizz" : "buzz" `, ""), ""},
},
}, {
name: jsontest.Name("InvalidObject/ExtraComma"),
calls: []encoderMethodCall{
{Value(` { , } `), newInvalidCharacterError(",", `at start of string (expecting '"')`).withPos(` { `, ""), ""},
{Value(` { "fizz" : "buzz" , } `), newInvalidCharacterError("}", `at start of string (expecting '"')`).withPos(` { "fizz" : "buzz" , `, ""), ""},
},
}, {
name: jsontest.Name("InvalidObject/InvalidName"),
calls: []encoderMethodCall{
{Value(`{ null }`), newInvalidCharacterError("n", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
{Value(`{ false }`), newInvalidCharacterError("f", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
{Value(`{ true }`), newInvalidCharacterError("t", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
{Value(`{ 0 }`), newInvalidCharacterError("0", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
{Value(`{ {} }`), newInvalidCharacterError("{", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
{Value(`{ [] }`), newInvalidCharacterError("[", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
{BeginObject, nil, ""},
{Null, E(ErrNonStringName).withPos(`{`, ""), ""},
{Value(`null`), E(ErrNonStringName).withPos(`{`, ""), ""},
{False, E(ErrNonStringName).withPos(`{`, ""), ""},
{Value(`false`), E(ErrNonStringName).withPos(`{`, ""), ""},
{True, E(ErrNonStringName).withPos(`{`, ""), ""},
{Value(`true`), E(ErrNonStringName).withPos(`{`, ""), ""},
{Uint(0), E(ErrNonStringName).withPos(`{`, ""), ""},
{Value(`0`), E(ErrNonStringName).withPos(`{`, ""), ""},
{BeginObject, E(ErrNonStringName).withPos(`{`, ""), ""},
{Value(`{}`), E(ErrNonStringName).withPos(`{`, ""), ""},
{BeginArray, E(ErrNonStringName).withPos(`{`, ""), ""},
{Value(`[]`), E(ErrNonStringName).withPos(`{`, ""), ""},
{EndObject, nil, ""},
},
wantOut: "{}\n",
}, {
name: jsontest.Name("InvalidObject/InvalidValue"),
calls: []encoderMethodCall{
{Value(`{ "0": x }`), newInvalidCharacterError("x", `at start of value`).withPos(`{ "0": `, "/0"), ""},
},
}, {
name: jsontest.Name("InvalidObject/MismatchingDelim"),
calls: []encoderMethodCall{
{Value(` { ] `), newInvalidCharacterError("]", `at start of string (expecting '"')`).withPos(` { `, ""), ""},
{Value(` { "0":0 ] `), newInvalidCharacterError("]", `after object value (expecting ',' or '}')`).withPos(` { "0":0 `, ""), ""},
{BeginObject, nil, ""},
{EndArray, E(errMismatchDelim).withPos(`{`, ""), ""},
{Value(`]`), newInvalidCharacterError("]", "at start of value").withPos(`{`, ""), ""},
{EndObject, nil, ""},
},
wantOut: "{}\n",
}, {
name: jsontest.Name("ValidObject/UniqueNames"),
calls: []encoderMethodCall{
{BeginObject, nil, ""},
{String("0"), nil, ""},
{Uint(0), nil, ""},
{String("1"), nil, ""},
{Uint(1), nil, ""},
{EndObject, nil, ""},
{Value(` { "0" : 0 , "1" : 1 } `), nil, ""},
},
wantOut: `{"0":0,"1":1}` + "\n" + `{"0":0,"1":1}` + "\n",
}, {
name: jsontest.Name("ValidObject/DuplicateNames"),
opts: []Options{AllowDuplicateNames(true)},
calls: []encoderMethodCall{
{BeginObject, nil, ""},
{String("0"), nil, ""},
{Uint(0), nil, ""},
{String("0"), nil, ""},
{Uint(0), nil, ""},
{EndObject, nil, ""},
{Value(` { "0" : 0 , "0" : 0 } `), nil, ""},
},
wantOut: `{"0":0,"0":0}` + "\n" + `{"0":0,"0":0}` + "\n",
}, {
name: jsontest.Name("InvalidObject/DuplicateNames"),
calls: []encoderMethodCall{
{BeginObject, nil, ""},
{String("X"), nil, ""},
{BeginObject, nil, ""},
{EndObject, nil, ""},
{String("X"), E(ErrDuplicateName).withPos(`{"X":{},`, "/X"), "/X"},
{Value(`"X"`), E(ErrDuplicateName).withPos(`{"X":{},`, "/X"), "/X"},
{String("Y"), nil, ""},
{BeginObject, nil, ""},
{EndObject, nil, ""},
{String("X"), E(ErrDuplicateName).withPos(`{"X":{},"Y":{},`, "/X"), "/Y"},
{Value(`"X"`), E(ErrDuplicateName).withPos(`{"X":{},"Y":{},`, "/X"), "/Y"},
{String("Y"), E(ErrDuplicateName).withPos(`{"X":{},"Y":{},`, "/Y"), "/Y"},
{Value(`"Y"`), E(ErrDuplicateName).withPos(`{"X":{},"Y":{},`, "/Y"), "/Y"},
{EndObject, nil, ""},
{Value(` { "X" : 0 , "Y" : 1 , "X" : 0 } `), E(ErrDuplicateName).withPos(`{"X":{},"Y":{}}`+"\n"+` { "X" : 0 , "Y" : 1 , `, "/X"), ""},
},
wantOut: `{"X":{},"Y":{}}` + "\n",
}, {
name: jsontest.Name("TruncatedArray/AfterStart"),
calls: []encoderMethodCall{
{Value(`[`), E(io.ErrUnexpectedEOF).withPos(`[`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedArray/AfterValue"),
calls: []encoderMethodCall{
{Value(`[0`), E(io.ErrUnexpectedEOF).withPos(`[0`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedArray/AfterComma"),
calls: []encoderMethodCall{
{Value(`[0,`), E(io.ErrUnexpectedEOF).withPos(`[0,`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedArray/MissingComma"),
calls: []encoderMethodCall{
{Value(` [ "fizz" "buzz" ] `), newInvalidCharacterError("\"", "after array value (expecting ',' or ']')").withPos(` [ "fizz" `, ""), ""},
},
}, {
name: jsontest.Name("InvalidArray/MismatchingDelim"),
calls: []encoderMethodCall{
{Value(` [ } `), newInvalidCharacterError("}", `at start of value`).withPos(` [ `, "/0"), ""},
{BeginArray, nil, ""},
{EndObject, E(errMismatchDelim).withPos(`[`, "/0"), ""},
{Value(`}`), newInvalidCharacterError("}", "at start of value").withPos(`[`, "/0"), ""},
{EndArray, nil, ""},
},
wantOut: "[]\n",
}, {
name: jsontest.Name("Format/Object/SpaceAfterColon"),
opts: []Options{SpaceAfterColon(true)},
calls: []encoderMethodCall{{Value(`{"fizz":"buzz","wizz":"wuzz"}`), nil, ""}},
wantOut: "{\"fizz\": \"buzz\",\"wizz\": \"wuzz\"}\n",
}, {
name: jsontest.Name("Format/Object/SpaceAfterComma"),
opts: []Options{SpaceAfterComma(true)},
calls: []encoderMethodCall{{Value(`{"fizz":"buzz","wizz":"wuzz"}`), nil, ""}},
wantOut: "{\"fizz\":\"buzz\", \"wizz\":\"wuzz\"}\n",
}, {
name: jsontest.Name("Format/Object/SpaceAfterColonAndComma"),
opts: []Options{SpaceAfterColon(true), SpaceAfterComma(true)},
calls: []encoderMethodCall{{Value(`{"fizz":"buzz","wizz":"wuzz"}`), nil, ""}},
wantOut: "{\"fizz\": \"buzz\", \"wizz\": \"wuzz\"}\n",
}, {
name: jsontest.Name("Format/Object/NoSpaceAfterColon+SpaceAfterComma+Multiline"),
opts: []Options{SpaceAfterColon(false), SpaceAfterComma(true), Multiline(true)},
calls: []encoderMethodCall{{Value(`{"fizz":"buzz","wizz":"wuzz"}`), nil, ""}},
wantOut: "{\n\t\"fizz\":\"buzz\", \n\t\"wizz\":\"wuzz\"\n}\n",
}, {
name: jsontest.Name("Format/Array/SpaceAfterComma"),
opts: []Options{SpaceAfterComma(true)},
calls: []encoderMethodCall{{Value(`["fizz","buzz"]`), nil, ""}},
wantOut: "[\"fizz\", \"buzz\"]\n",
}, {
name: jsontest.Name("Format/Array/NoSpaceAfterComma+Multiline"),
opts: []Options{SpaceAfterComma(false), Multiline(true)},
calls: []encoderMethodCall{{Value(`["fizz","buzz"]`), nil, ""}},
wantOut: "[\n\t\"fizz\",\n\t\"buzz\"\n]\n",
}, {
name: jsontest.Name("Format/ReorderWithWhitespace"),
opts: []Options{
AllowDuplicateNames(true),
AllowInvalidUTF8(true),
ReorderRawObjects(true),
SpaceAfterComma(true),
SpaceAfterColon(false),
Multiline(true),
WithIndentPrefix(" "),
WithIndent("\t"),
PreserveRawStrings(true),
},
calls: []encoderMethodCall{
{BeginArray, nil, ""},
{BeginArray, nil, ""},
{Value(` { "fizz" : "buzz" ,
"zip" : {
"x` + "\xfd" + `x" : 123 , "x` + "\xff" + `x" : 123, "x` + "\xfe" + `x" : 123
},
"zap" : {
"xxx" : 333, "xxx": 1, "xxx": 22
},
"alpha" : "bravo" } `), nil, ""},
{EndArray, nil, ""},
{EndArray, nil, ""},
},
wantOut: "[\n \t[\n \t\t{\n \t\t\t\"alpha\":\"bravo\", \n \t\t\t\"fizz\":\"buzz\", \n \t\t\t\"zap\":{\n \t\t\t\t\"xxx\":1, \n \t\t\t\t\"xxx\":22, \n \t\t\t\t\"xxx\":333\n \t\t\t}, \n \t\t\t\"zip\":{\n \t\t\t\t\"x\xfdx\":123, \n \t\t\t\t\"x\xfex\":123, \n \t\t\t\t\"x\xffx\":123\n \t\t\t}\n \t\t}\n \t]\n ]\n",
}, {
name: jsontest.Name("Format/CanonicalizeRawInts"),
opts: []Options{CanonicalizeRawInts(true), SpaceAfterComma(true)},
calls: []encoderMethodCall{
{Value(`[0.100,5.0,1E6,-9223372036854775808,-10,-1,-0,0,1,10,9223372036854775807]`), nil, ""},
},
wantOut: "[0.100, 5.0, 1E6, -9223372036854776000, -10, -1, 0, 0, 1, 10, 9223372036854776000]\n",
}, {
name: jsontest.Name("Format/CanonicalizeRawFloats"),
opts: []Options{CanonicalizeRawFloats(true), SpaceAfterComma(true)},
calls: []encoderMethodCall{
{Value(`[0.100,5.0,1E6,-9223372036854775808,-10,-1,-0,0,1,10,9223372036854775807]`), nil, ""},
},
wantOut: "[0.1, 5, 1000000, -9223372036854775808, -10, -1, 0, 0, 1, 10, 9223372036854775807]\n",
}, {
name: jsontest.Name("ErrorPosition"),
calls: []encoderMethodCall{
{Value(` "a` + "\xff" + `0" `), E(jsonwire.ErrInvalidUTF8).withPos(` "a`, ""), ""},
{String(`a` + "\xff" + `0`), E(jsonwire.ErrInvalidUTF8).withPos(``, ""), ""},
},
}, {
name: jsontest.Name("ErrorPosition/0"),
calls: []encoderMethodCall{
{Value(` [ "a` + "\xff" + `1" ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a`, "/0"), ""},
{BeginArray, nil, ""},
{Value(` "a` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`[ "a`, "/0"), ""},
{String(`a` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`[`, "/0"), ""},
},
wantOut: `[`,
}, {
name: jsontest.Name("ErrorPosition/1"),
calls: []encoderMethodCall{
{Value(` [ "a1" , "b` + "\xff" + `1" ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a1" , "b`, "/1"), ""},
{BeginArray, nil, ""},
{String("a1"), nil, ""},
{Value(` "b` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1", "b`, "/1"), ""},
{String(`b` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",`, "/1"), ""},
},
wantOut: `["a1"`,
}, {
name: jsontest.Name("ErrorPosition/0/0"),
calls: []encoderMethodCall{
{Value(` [ [ "a` + "\xff" + `2" ] ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ [ "a`, "/0/0"), ""},
{BeginArray, nil, ""},
{Value(` [ "a` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`[ [ "a`, "/0/0"), ""},
{BeginArray, nil, "/0"},
{Value(` "a` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`[[ "a`, "/0/0"), "/0"},
{String(`a` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`[[`, "/0/0"), "/0"},
},
wantOut: `[[`,
}, {
name: jsontest.Name("ErrorPosition/1/0"),
calls: []encoderMethodCall{
{Value(` [ "a1" , [ "a` + "\xff" + `2" ] ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a1" , [ "a`, "/1/0"), ""},
{BeginArray, nil, ""},
{String("a1"), nil, "/0"},
{Value(` [ "a` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1", [ "a`, "/1/0"), ""},
{BeginArray, nil, "/1"},
{Value(` "a` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",[ "a`, "/1/0"), "/1"},
{String(`a` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",[`, "/1/0"), "/1"},
},
wantOut: `["a1",[`,
}, {
name: jsontest.Name("ErrorPosition/0/1"),
calls: []encoderMethodCall{
{Value(` [ [ "a2" , "b` + "\xff" + `2" ] ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ [ "a2" , "b`, "/0/1"), ""},
{BeginArray, nil, ""},
{Value(` [ "a2" , "b` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`[ [ "a2" , "b`, "/0/1"), ""},
{BeginArray, nil, "/0"},
{String("a2"), nil, "/0/0"},
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`[["a2", "b`, "/0/1"), "/0/0"},
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`[["a2",`, "/0/1"), "/0/0"},
},
wantOut: `[["a2"`,
}, {
name: jsontest.Name("ErrorPosition/1/1"),
calls: []encoderMethodCall{
{Value(` [ "a1" , [ "a2" , "b` + "\xff" + `2" ] ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a1" , [ "a2" , "b`, "/1/1"), ""},
{BeginArray, nil, ""},
{String("a1"), nil, "/0"},
{Value(` [ "a2" , "b` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1", [ "a2" , "b`, "/1/1"), ""},
{BeginArray, nil, "/1"},
{String("a2"), nil, "/1/0"},
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",["a2", "b`, "/1/1"), "/1/0"},
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",["a2",`, "/1/1"), "/1/0"},
},
wantOut: `["a1",["a2"`,
}, {
name: jsontest.Name("ErrorPosition/a1-"),
calls: []encoderMethodCall{
{Value(` { "a` + "\xff" + `1" : "b1" } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a`, ""), ""},
{BeginObject, nil, ""},
{Value(` "a` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`{ "a`, ""), ""},
{String(`a` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`{`, ""), ""},
},
wantOut: `{`,
}, {
name: jsontest.Name("ErrorPosition/a1"),
calls: []encoderMethodCall{
{Value(` { "a1" : "b` + "\xff" + `1" } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : "b`, "/a1"), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{Value(` "b` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1": "b`, "/a1"), ""},
{String(`b` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":`, "/a1"), ""},
},
wantOut: `{"a1"`,
}, {
name: jsontest.Name("ErrorPosition/c1-"),
calls: []encoderMethodCall{
{Value(` { "a1" : "b1" , "c` + "\xff" + `1" : "d1" } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : "b1" , "c`, ""), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{String("b1"), nil, "/a1"},
{Value(` "c` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1": "c`, ""), "/a1"},
{String(`c` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1":`, ""), "/a1"},
},
wantOut: `{"a1":"b1"`,
}, {
name: jsontest.Name("ErrorPosition/c1"),
calls: []encoderMethodCall{
{Value(` { "a1" : "b1" , "c1" : "d` + "\xff" + `1" } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : "b1" , "c1" : "d`, "/c1"), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{String("b1"), nil, "/a1"},
{String("c1"), nil, "/c1"},
{Value(` "d` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1":"c1": "d`, "/c1"), "/c1"},
{String(`d` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1":"c1":`, "/c1"), "/c1"},
},
wantOut: `{"a1":"b1","c1"`,
}, {
name: jsontest.Name("ErrorPosition/a1/a2-"),
calls: []encoderMethodCall{
{Value(` { "a1" : { "a` + "\xff" + `2" : "b2" } } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : { "a`, "/a1"), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{Value(` { "a` + "\xff" + `2" : "b2" } `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1": { "a`, "/a1"), ""},
{BeginObject, nil, "/a1"},
{Value(` "a` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{ "a`, "/a1"), "/a1"},
{String(`a` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{`, "/a1"), "/a1"},
},
wantOut: `{"a1":{`,
}, {
name: jsontest.Name("ErrorPosition/a1/a2"),
calls: []encoderMethodCall{
{Value(` { "a1" : { "a2" : "b` + "\xff" + `2" } } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : { "a2" : "b`, "/a1/a2"), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{Value(` { "a2" : "b` + "\xff" + `2" } `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1": { "a2" : "b`, "/a1/a2"), ""},
{BeginObject, nil, "/a1"},
{String("a2"), nil, "/a1/a2"},
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2": "b`, "/a1/a2"), "/a1/a2"},
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":`, "/a1/a2"), "/a1/a2"},
},
wantOut: `{"a1":{"a2"`,
}, {
name: jsontest.Name("ErrorPosition/a1/c2-"),
calls: []encoderMethodCall{
{Value(` { "a1" : { "a2" : "b2" , "c` + "\xff" + `2" : "d2" } } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : { "a2" : "b2" , "c`, "/a1"), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{BeginObject, nil, "/a1"},
{String("a2"), nil, "/a1/a2"},
{String("b2"), nil, "/a1/a2"},
{Value(` "c` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":"b2", "c`, "/a1"), "/a1/a2"},
{String(`c` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":"b2",`, "/a1"), "/a1/a2"},
},
wantOut: `{"a1":{"a2":"b2"`,
}, {
name: jsontest.Name("ErrorPosition/a1/c2"),
calls: []encoderMethodCall{
{Value(` { "a1" : { "a2" : "b2" , "c2" : "d` + "\xff" + `2" } } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : { "a2" : "b2" , "c2" : "d`, "/a1/c2"), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{Value(` { "a2" : "b2" , "c2" : "d` + "\xff" + `2" } `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1": { "a2" : "b2" , "c2" : "d`, "/a1/c2"), ""},
{BeginObject, nil, ""},
{String("a2"), nil, "/a1/a2"},
{String("b2"), nil, "/a1/a2"},
{String("c2"), nil, "/a1/c2"},
{Value(` "d` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":"b2","c2": "d`, "/a1/c2"), "/a1/c2"},
{String(`d` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":"b2","c2":`, "/a1/c2"), "/a1/c2"},
},
wantOut: `{"a1":{"a2":"b2","c2"`,
}, {
name: jsontest.Name("ErrorPosition/1/a2"),
calls: []encoderMethodCall{
{Value(` [ "a1" , { "a2" : "b` + "\xff" + `2" } ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a1" , { "a2" : "b`, "/1/a2"), ""},
{BeginArray, nil, ""},
{String("a1"), nil, "/0"},
{Value(` { "a2" : "b` + "\xff" + `2" } `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1", { "a2" : "b`, "/1/a2"), ""},
{BeginObject, nil, "/1"},
{String("a2"), nil, "/1/a2"},
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",{"a2": "b`, "/1/a2"), "/1/a2"},
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",{"a2":`, "/1/a2"), "/1/a2"},
},
wantOut: `["a1",{"a2"`,
}, {
name: jsontest.Name("ErrorPosition/c1/1"),
calls: []encoderMethodCall{
{Value(` { "a1" : "b1" , "c1" : [ "a2" , "b` + "\xff" + `2" ] } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : "b1" , "c1" : [ "a2" , "b`, "/c1/1"), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{String("b1"), nil, "/a1"},
{String("c1"), nil, "/c1"},
{Value(` [ "a2" , "b` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1","c1": [ "a2" , "b`, "/c1/1"), ""},
{BeginArray, nil, "/c1"},
{String("a2"), nil, "/c1/0"},
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1","c1":["a2", "b`, "/c1/1"), "/c1/0"},
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1","c1":["a2",`, "/c1/1"), "/c1/0"},
},
wantOut: `{"a1":"b1","c1":["a2"`,
}, {
name: jsontest.Name("ErrorPosition/0/a1/1/c3/1"),
calls: []encoderMethodCall{
{Value(` [ { "a1" : [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b` + "\xff" + `4" ] } ] } ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ { "a1" : [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
{BeginArray, nil, ""},
{Value(` { "a1" : [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b` + "\xff" + `4" ] } ] } `), E(jsonwire.ErrInvalidUTF8).withPos(`[ { "a1" : [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
{BeginObject, nil, "/0"},
{String("a1"), nil, "/0/a1"},
{Value(` [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b` + "\xff" + `4" ] } ] `), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1": [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
{BeginArray, nil, ""},
{String("a2"), nil, "/0/a1/0"},
{Value(` { "a3" : "b3" , "c3" : [ "a4" , "b` + "\xff" + `4" ] } `), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1":["a2", { "a3" : "b3" , "c3" : [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
{BeginObject, nil, "/0/a1/1"},
{String("a3"), nil, "/0/a1/1/a3"},
{String("b3"), nil, "/0/a1/1/a3"},
{String("c3"), nil, "/0/a1/1/c3"},
{Value(` [ "a4" , "b` + "\xff" + `4" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1":["a2",{"a3":"b3","c3": [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
{BeginArray, nil, "/0/a1/1/c3"},
{String("a4"), nil, "/0/a1/1/c3/0"},
{Value(` "b` + "\xff" + `4" `), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1":["a2",{"a3":"b3","c3":["a4", "b`, "/0/a1/1/c3/1"), "/0/a1/1/c3/0"},
{String(`b` + "\xff" + `4`), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1":["a2",{"a3":"b3","c3":["a4",`, "/0/a1/1/c3/1"), "/0/a1/1/c3/0"},
},
wantOut: `[{"a1":["a2",{"a3":"b3","c3":["a4"`,
}}
// TestEncoderErrors test that Encoder errors occur when we expect and
// leaves the Encoder in a consistent state.
func TestEncoderErrors(t *testing.T) {
for _, td := range encoderErrorTestdata {
t.Run(path.Join(td.name.Name), func(t *testing.T) {
testEncoderErrors(t, td.name.Where, td.opts, td.calls, td.wantOut)
})
}
}
func testEncoderErrors(t *testing.T, where jsontest.CasePos, opts []Options, calls []encoderMethodCall, wantOut string) {
dst := new(bytes.Buffer)
enc := NewEncoder(dst, opts...)
for i, call := range calls {
var gotErr error
switch tokVal := call.in.(type) {
case Token:
gotErr = enc.WriteToken(tokVal)
case Value:
gotErr = enc.WriteValue(tokVal)
}
if !equalError(gotErr, call.wantErr) {
t.Fatalf("%s: %d: error mismatch:\ngot %v\nwant %v", where, i, gotErr, call.wantErr)
}
if call.wantPointer != "" {
gotPointer := enc.StackPointer()
if gotPointer != call.wantPointer {
t.Fatalf("%s: %d: Encoder.StackPointer = %s, want %s", where, i, gotPointer, call.wantPointer)
}
}
}
gotOut := dst.String() + string(enc.s.unflushedBuffer())
if gotOut != wantOut {
t.Fatalf("%s: output mismatch:\ngot %q\nwant %q", where, gotOut, wantOut)
}
gotOffset := int(enc.OutputOffset())
wantOffset := len(wantOut)
if gotOffset != wantOffset {
t.Fatalf("%s: Encoder.OutputOffset = %v, want %v", where, gotOffset, wantOffset)
}
}

View File

@@ -0,0 +1,182 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"io"
"strconv"
"encoding/json/internal/jsonwire"
)
const errorPrefix = "jsontext: "
type ioError struct {
action string // either "read" or "write"
err error
}
func (e *ioError) Error() string {
return errorPrefix + e.action + " error: " + e.err.Error()
}
func (e *ioError) Unwrap() error {
return e.err
}
// SyntacticError is a description of a syntactic error that occurred when
// encoding or decoding JSON according to the grammar.
//
// The contents of this error as produced by this package may change over time.
type SyntacticError struct {
requireKeyedLiterals
nonComparable
// ByteOffset indicates that an error occurred after this byte offset.
ByteOffset int64
// JSONPointer indicates that an error occurred within this JSON value
// as indicated using the JSON Pointer notation (see RFC 6901).
JSONPointer Pointer
// Err is the underlying error.
Err error
}
// wrapSyntacticError wraps an error and annotates it with a precise location
// using the provided [encoderState] or [decoderState].
// If err is an [ioError] or [io.EOF], then it is not wrapped.
//
// It takes a relative offset pos that can be resolved into
// an absolute offset using state.offsetAt.
//
// It takes a where that specify how the JSON pointer is derived.
// If the underlying error is a [pointerSuffixError],
// then the suffix is appended to the derived pointer.
func wrapSyntacticError(state interface {
offsetAt(pos int) int64
AppendStackPointer(b []byte, where int) []byte
}, err error, pos, where int) error {
if _, ok := err.(*ioError); err == io.EOF || ok {
return err
}
offset := state.offsetAt(pos)
ptr := state.AppendStackPointer(nil, where)
if serr, ok := err.(*pointerSuffixError); ok {
ptr = serr.appendPointer(ptr)
err = serr.error
}
if d, ok := state.(*decoderState); ok && err == errMismatchDelim {
where := "at start of value"
if len(d.Tokens.Stack) > 0 && d.Tokens.Last.Length() > 0 {
switch {
case d.Tokens.Last.isArray():
where = "after array element (expecting ',' or ']')"
ptr = []byte(Pointer(ptr).Parent()) // problem is with parent array
case d.Tokens.Last.isObject():
where = "after object value (expecting ',' or '}')"
ptr = []byte(Pointer(ptr).Parent()) // problem is with parent object
}
}
err = jsonwire.NewInvalidCharacterError(d.buf[pos:], where)
}
return &SyntacticError{ByteOffset: offset, JSONPointer: Pointer(ptr), Err: err}
}
func (e *SyntacticError) Error() string {
pointer := e.JSONPointer
offset := e.ByteOffset
b := []byte(errorPrefix)
if e.Err != nil {
b = append(b, e.Err.Error()...)
if e.Err == ErrDuplicateName {
b = strconv.AppendQuote(append(b, ' '), pointer.LastToken())
pointer = pointer.Parent()
offset = 0 // not useful to print offset for duplicate names
}
} else {
b = append(b, "syntactic error"...)
}
if pointer != "" {
b = strconv.AppendQuote(append(b, " within "...), jsonwire.TruncatePointer(string(pointer), 100))
}
if offset > 0 {
b = strconv.AppendInt(append(b, " after offset "...), offset, 10)
}
return string(b)
}
func (e *SyntacticError) Unwrap() error {
return e.Err
}
// pointerSuffixError represents a JSON pointer suffix to be appended
// to [SyntacticError.JSONPointer]. It is an internal error type
// used within this package and does not appear in the public API.
//
// This type is primarily used to annotate errors in Encoder.WriteValue
// and Decoder.ReadValue with precise positions.
// At the time WriteValue or ReadValue is called, a JSON pointer to the
// upcoming value can be constructed using the Encoder/Decoder state.
// However, tracking pointers within values during normal operation
// would incur a performance penalty in the error-free case.
//
// To provide precise error locations without this overhead,
// the error is wrapped with object names or array indices
// as the call stack is popped when an error occurs.
// Since this happens in reverse order, pointerSuffixError holds
// the pointer in reverse and is only later reversed when appending to
// the pointer prefix.
//
// For example, if the encoder is at "/alpha/bravo/charlie"
// and an error occurs in WriteValue at "/xray/yankee/zulu", then
// the final pointer should be "/alpha/bravo/charlie/xray/yankee/zulu".
//
// As pointerSuffixError is populated during the error return path,
// it first contains "/zulu", then "/zulu/yankee",
// and finally "/zulu/yankee/xray".
// These tokens are reversed and concatenated to "/alpha/bravo/charlie"
// to form the full pointer.
type pointerSuffixError struct {
error
// reversePointer is a JSON pointer, but with each token in reverse order.
reversePointer []byte
}
// wrapWithObjectName wraps err with a JSON object name access,
// which must be a valid quoted JSON string.
func wrapWithObjectName(err error, quotedName []byte) error {
serr, _ := err.(*pointerSuffixError)
if serr == nil {
serr = &pointerSuffixError{error: err}
}
name := jsonwire.UnquoteMayCopy(quotedName, false)
serr.reversePointer = appendEscapePointerName(append(serr.reversePointer, '/'), name)
return serr
}
// wrapWithArrayIndex wraps err with a JSON array index access.
func wrapWithArrayIndex(err error, index int64) error {
serr, _ := err.(*pointerSuffixError)
if serr == nil {
serr = &pointerSuffixError{error: err}
}
serr.reversePointer = strconv.AppendUint(append(serr.reversePointer, '/'), uint64(index), 10)
return serr
}
// appendPointer appends the path encoded in e to the end of pointer.
func (e *pointerSuffixError) appendPointer(pointer []byte) []byte {
// Copy each token in reversePointer to the end of pointer in reverse order.
// Double reversal means that the appended suffix is now in forward order.
bi, bo := e.reversePointer, pointer
for len(bi) > 0 {
i := bytes.LastIndexByte(bi, '/')
bi, bo = bi[:i], append(bo, bi[i:]...)
}
return bo
}

View File

@@ -0,0 +1,130 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext_test
import (
"bytes"
"fmt"
"io"
"log"
"strings"
"encoding/json/jsontext"
"encoding/json/v2"
)
// This example demonstrates the use of the [Encoder] and [Decoder] to
// parse and modify JSON without unmarshaling it into a concrete Go type.
func Example_stringReplace() {
// Example input with non-idiomatic use of "Golang" instead of "Go".
const input = `{
"title": "Golang version 1 is released",
"author": "Andrew Gerrand",
"date": "2012-03-28",
"text": "Today marks a major milestone in the development of the Golang programming language.",
"otherArticles": [
"Twelve Years of Golang",
"The Laws of Reflection",
"Learn Golang from your browser"
]
}`
// Using a Decoder and Encoder, we can parse through every token,
// check and modify the token if necessary, and
// write the token to the output.
var replacements []jsontext.Pointer
in := strings.NewReader(input)
dec := jsontext.NewDecoder(in)
out := new(bytes.Buffer)
enc := jsontext.NewEncoder(out, jsontext.Multiline(true)) // expand for readability
for {
// Read a token from the input.
tok, err := dec.ReadToken()
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
// Check whether the token contains the string "Golang" and
// replace each occurrence with "Go" instead.
if tok.Kind() == '"' && strings.Contains(tok.String(), "Golang") {
replacements = append(replacements, dec.StackPointer())
tok = jsontext.String(strings.ReplaceAll(tok.String(), "Golang", "Go"))
}
// Write the (possibly modified) token to the output.
if err := enc.WriteToken(tok); err != nil {
log.Fatal(err)
}
}
// Print the list of replacements and the adjusted JSON output.
if len(replacements) > 0 {
fmt.Println(`Replaced "Golang" with "Go" in:`)
for _, where := range replacements {
fmt.Println("\t" + where)
}
fmt.Println()
}
fmt.Println("Result:", out.String())
// Output:
// Replaced "Golang" with "Go" in:
// /title
// /text
// /otherArticles/0
// /otherArticles/2
//
// Result: {
// "title": "Go version 1 is released",
// "author": "Andrew Gerrand",
// "date": "2012-03-28",
// "text": "Today marks a major milestone in the development of the Go programming language.",
// "otherArticles": [
// "Twelve Years of Go",
// "The Laws of Reflection",
// "Learn Go from your browser"
// ]
// }
}
// Directly embedding JSON within HTML requires special handling for safety.
// Escape certain runes to prevent JSON directly treated as HTML
// from being able to perform <script> injection.
//
// This example shows how to obtain equivalent behavior provided by the
// v1 [encoding/json] package that is no longer directly supported by this package.
// Newly written code that intermix JSON and HTML should instead be using the
// [github.com/google/safehtml] module for safety purposes.
func ExampleEscapeForHTML() {
page := struct {
Title string
Body string
}{
Title: "Example Embedded Javascript",
Body: `<script> console.log("Hello, world!"); </script>`,
}
b, err := json.Marshal(&page,
// Escape certain runes within a JSON string so that
// JSON will be safe to directly embed inside HTML.
jsontext.EscapeForHTML(true),
jsontext.EscapeForJS(true),
jsontext.Multiline(true)) // expand for readability
if err != nil {
log.Fatal(err)
}
fmt.Println(string(b))
// Output:
// {
// "Title": "Example Embedded Javascript",
// "Body": "\u003cscript\u003e console.log(\"Hello, world!\"); \u003c/script\u003e"
// }
}

View File

@@ -0,0 +1,77 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"io"
"encoding/json/internal"
)
// Internal is for internal use only.
// This is exempt from the Go compatibility agreement.
var Internal exporter
type exporter struct{}
// Export exposes internal functionality from "jsontext" to "json".
// This cannot be dynamically called by other packages since
// they cannot obtain a reference to the internal.AllowInternalUse value.
func (exporter) Export(p *internal.NotForPublicUse) export {
if p != &internal.AllowInternalUse {
panic("unauthorized call to Export")
}
return export{}
}
// The export type exposes functionality to packages with visibility to
// the internal.AllowInternalUse variable. The "json" package uses this
// to modify low-level state in the Encoder and Decoder types.
// It mutates the state directly instead of calling ReadToken or WriteToken
// since this is more performant. The public APIs need to track state to ensure
// that users are constructing a valid JSON value, but the "json" implementation
// guarantees that it emits valid JSON by the structure of the code itself.
type export struct{}
// Encoder returns a pointer to the underlying encoderState.
func (export) Encoder(e *Encoder) *encoderState { return &e.s }
// Decoder returns a pointer to the underlying decoderState.
func (export) Decoder(d *Decoder) *decoderState { return &d.s }
func (export) GetBufferedEncoder(o ...Options) *Encoder {
return getBufferedEncoder(o...)
}
func (export) PutBufferedEncoder(e *Encoder) {
putBufferedEncoder(e)
}
func (export) GetStreamingEncoder(w io.Writer, o ...Options) *Encoder {
return getStreamingEncoder(w, o...)
}
func (export) PutStreamingEncoder(e *Encoder) {
putStreamingEncoder(e)
}
func (export) GetBufferedDecoder(b []byte, o ...Options) *Decoder {
return getBufferedDecoder(b, o...)
}
func (export) PutBufferedDecoder(d *Decoder) {
putBufferedDecoder(d)
}
func (export) GetStreamingDecoder(r io.Reader, o ...Options) *Decoder {
return getStreamingDecoder(r, o...)
}
func (export) PutStreamingDecoder(d *Decoder) {
putStreamingDecoder(d)
}
func (export) IsIOError(err error) bool {
_, ok := err.(*ioError)
return ok
}

View File

@@ -0,0 +1,236 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"errors"
"io"
"math/rand"
"slices"
"testing"
"encoding/json/internal/jsontest"
)
func FuzzCoder(f *testing.F) {
// Add a number of inputs to the corpus including valid and invalid data.
for _, td := range coderTestdata {
f.Add(int64(0), []byte(td.in))
}
for _, td := range decoderErrorTestdata {
f.Add(int64(0), []byte(td.in))
}
for _, td := range encoderErrorTestdata {
f.Add(int64(0), []byte(td.wantOut))
}
for _, td := range jsontest.Data {
f.Add(int64(0), td.Data())
}
f.Fuzz(func(t *testing.T, seed int64, b []byte) {
var tokVals []tokOrVal
rn := rand.NewSource(seed)
// Read a sequence of tokens or values. Skip the test for any errors
// since we expect this with randomly generated fuzz inputs.
src := bytes.NewReader(b)
dec := NewDecoder(src)
for {
if rn.Int63()%8 > 0 {
tok, err := dec.ReadToken()
if err != nil {
if err == io.EOF {
break
}
t.Skipf("Decoder.ReadToken error: %v", err)
}
tokVals = append(tokVals, tok.Clone())
} else {
val, err := dec.ReadValue()
if err != nil {
expectError := dec.PeekKind() == '}' || dec.PeekKind() == ']'
if expectError && errors.As(err, new(*SyntacticError)) {
continue
}
if err == io.EOF {
break
}
t.Skipf("Decoder.ReadValue error: %v", err)
}
tokVals = append(tokVals, append(zeroValue, val...))
}
}
// Write a sequence of tokens or values. Fail the test for any errors
// since the previous stage guarantees that the input is valid.
dst := new(bytes.Buffer)
enc := NewEncoder(dst)
for _, tokVal := range tokVals {
switch tokVal := tokVal.(type) {
case Token:
if err := enc.WriteToken(tokVal); err != nil {
t.Fatalf("Encoder.WriteToken error: %v", err)
}
case Value:
if err := enc.WriteValue(tokVal); err != nil {
t.Fatalf("Encoder.WriteValue error: %v", err)
}
}
}
// Encoded output and original input must decode to the same thing.
var got, want []Token
for dec := NewDecoder(bytes.NewReader(b)); dec.PeekKind() > 0; {
tok, err := dec.ReadToken()
if err != nil {
t.Fatalf("Decoder.ReadToken error: %v", err)
}
got = append(got, tok.Clone())
}
for dec := NewDecoder(dst); dec.PeekKind() > 0; {
tok, err := dec.ReadToken()
if err != nil {
t.Fatalf("Decoder.ReadToken error: %v", err)
}
want = append(want, tok.Clone())
}
if !equalTokens(got, want) {
t.Fatalf("mismatching output:\ngot %v\nwant %v", got, want)
}
})
}
func FuzzResumableDecoder(f *testing.F) {
for _, td := range resumableDecoderTestdata {
f.Add(int64(0), []byte(td))
}
f.Fuzz(func(t *testing.T, seed int64, b []byte) {
rn := rand.NewSource(seed)
// Regardless of how many bytes the underlying io.Reader produces,
// the provided tokens, values, and errors should always be identical.
t.Run("ReadToken", func(t *testing.T) {
decGot := NewDecoder(&FaultyBuffer{B: b, MaxBytes: 8, Rand: rn})
decWant := NewDecoder(bytes.NewReader(b))
gotTok, gotErr := decGot.ReadToken()
wantTok, wantErr := decWant.ReadToken()
if gotTok.String() != wantTok.String() || !equalError(gotErr, wantErr) {
t.Errorf("Decoder.ReadToken = (%v, %v), want (%v, %v)", gotTok, gotErr, wantTok, wantErr)
}
})
t.Run("ReadValue", func(t *testing.T) {
decGot := NewDecoder(&FaultyBuffer{B: b, MaxBytes: 8, Rand: rn})
decWant := NewDecoder(bytes.NewReader(b))
gotVal, gotErr := decGot.ReadValue()
wantVal, wantErr := decWant.ReadValue()
if !slices.Equal(gotVal, wantVal) || !equalError(gotErr, wantErr) {
t.Errorf("Decoder.ReadValue = (%s, %v), want (%s, %v)", gotVal, gotErr, wantVal, wantErr)
}
})
})
}
func FuzzValueFormat(f *testing.F) {
for _, td := range valueTestdata {
f.Add(int64(0), []byte(td.in))
}
// isValid reports whether b is valid according to the specified options.
isValid := func(b []byte, opts ...Options) bool {
d := NewDecoder(bytes.NewReader(b), opts...)
_, errVal := d.ReadValue()
_, errEOF := d.ReadToken()
return errVal == nil && errEOF == io.EOF
}
// stripWhitespace removes all JSON whitespace characters from the input.
stripWhitespace := func(in []byte) (out []byte) {
out = make([]byte, 0, len(in))
for _, c := range in {
switch c {
case ' ', '\n', '\r', '\t':
default:
out = append(out, c)
}
}
return out
}
allOptions := []Options{
AllowDuplicateNames(true),
AllowInvalidUTF8(true),
EscapeForHTML(true),
EscapeForJS(true),
PreserveRawStrings(true),
CanonicalizeRawInts(true),
CanonicalizeRawFloats(true),
ReorderRawObjects(true),
SpaceAfterColon(true),
SpaceAfterComma(true),
Multiline(true),
WithIndent("\t"),
WithIndentPrefix(" "),
}
f.Fuzz(func(t *testing.T, seed int64, b []byte) {
validRFC7159 := isValid(b, AllowInvalidUTF8(true), AllowDuplicateNames(true))
validRFC8259 := isValid(b, AllowInvalidUTF8(false), AllowDuplicateNames(true))
validRFC7493 := isValid(b, AllowInvalidUTF8(false), AllowDuplicateNames(false))
switch {
case !validRFC7159 && validRFC8259:
t.Errorf("invalid input per RFC 7159 implies invalid per RFC 8259")
case !validRFC8259 && validRFC7493:
t.Errorf("invalid input per RFC 8259 implies invalid per RFC 7493")
}
gotValid := Value(b).IsValid()
wantValid := validRFC7493
if gotValid != wantValid {
t.Errorf("Value.IsValid = %v, want %v", gotValid, wantValid)
}
gotCompacted := Value(string(b))
gotCompactOk := gotCompacted.Compact() == nil
wantCompactOk := validRFC7159
if !bytes.Equal(stripWhitespace(gotCompacted), stripWhitespace(b)) {
t.Errorf("stripWhitespace(Value.Compact) = %s, want %s", stripWhitespace(gotCompacted), stripWhitespace(b))
}
if gotCompactOk != wantCompactOk {
t.Errorf("Value.Compact success mismatch: got %v, want %v", gotCompactOk, wantCompactOk)
}
gotIndented := Value(string(b))
gotIndentOk := gotIndented.Indent() == nil
wantIndentOk := validRFC7159
if !bytes.Equal(stripWhitespace(gotIndented), stripWhitespace(b)) {
t.Errorf("stripWhitespace(Value.Indent) = %s, want %s", stripWhitespace(gotIndented), stripWhitespace(b))
}
if gotIndentOk != wantIndentOk {
t.Errorf("Value.Indent success mismatch: got %v, want %v", gotIndentOk, wantIndentOk)
}
gotCanonicalized := Value(string(b))
gotCanonicalizeOk := gotCanonicalized.Canonicalize() == nil
wantCanonicalizeOk := validRFC7493
if gotCanonicalizeOk != wantCanonicalizeOk {
t.Errorf("Value.Canonicalize success mismatch: got %v, want %v", gotCanonicalizeOk, wantCanonicalizeOk)
}
// Random options should not result in a panic.
var opts []Options
rn := rand.New(rand.NewSource(seed))
for _, opt := range allOptions {
if rn.Intn(len(allOptions)/4) == 0 {
opts = append(opts, opt)
}
}
v := Value(b)
v.Format(opts...) // should not panic
})
}

View File

@@ -0,0 +1,304 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"strings"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonopts"
"encoding/json/internal/jsonwire"
)
// Options configures [NewEncoder], [Encoder.Reset], [NewDecoder],
// and [Decoder.Reset] with specific features.
// Each function takes in a variadic list of options, where properties
// set in latter options override the value of previously set properties.
//
// There is a single Options type, which is used with both encoding and decoding.
// Some options affect both operations, while others only affect one operation:
//
// - [AllowDuplicateNames] affects encoding and decoding
// - [AllowInvalidUTF8] affects encoding and decoding
// - [EscapeForHTML] affects encoding only
// - [EscapeForJS] affects encoding only
// - [PreserveRawStrings] affects encoding only
// - [CanonicalizeRawInts] affects encoding only
// - [CanonicalizeRawFloats] affects encoding only
// - [ReorderRawObjects] affects encoding only
// - [SpaceAfterColon] affects encoding only
// - [SpaceAfterComma] affects encoding only
// - [Multiline] affects encoding only
// - [WithIndent] affects encoding only
// - [WithIndentPrefix] affects encoding only
//
// Options that do not affect a particular operation are ignored.
//
// The Options type is identical to [encoding/json.Options] and
// [encoding/json/v2.Options]. Options from the other packages may
// be passed to functionality in this package, but are ignored.
// Options from this package may be used with the other packages.
type Options = jsonopts.Options
// AllowDuplicateNames specifies that JSON objects may contain
// duplicate member names. Disabling the duplicate name check may provide
// performance benefits, but breaks compliance with RFC 7493, section 2.3.
// The input or output will still be compliant with RFC 8259,
// which leaves the handling of duplicate names as unspecified behavior.
//
// This affects either encoding or decoding.
func AllowDuplicateNames(v bool) Options {
if v {
return jsonflags.AllowDuplicateNames | 1
} else {
return jsonflags.AllowDuplicateNames | 0
}
}
// AllowInvalidUTF8 specifies that JSON strings may contain invalid UTF-8,
// which will be mangled as the Unicode replacement character, U+FFFD.
// This causes the encoder or decoder to break compliance with
// RFC 7493, section 2.1, and RFC 8259, section 8.1.
//
// This affects either encoding or decoding.
func AllowInvalidUTF8(v bool) Options {
if v {
return jsonflags.AllowInvalidUTF8 | 1
} else {
return jsonflags.AllowInvalidUTF8 | 0
}
}
// EscapeForHTML specifies that '<', '>', and '&' characters within JSON strings
// should be escaped as a hexadecimal Unicode codepoint (e.g., \u003c) so that
// the output is safe to embed within HTML.
//
// This only affects encoding and is ignored when decoding.
func EscapeForHTML(v bool) Options {
if v {
return jsonflags.EscapeForHTML | 1
} else {
return jsonflags.EscapeForHTML | 0
}
}
// EscapeForJS specifies that U+2028 and U+2029 characters within JSON strings
// should be escaped as a hexadecimal Unicode codepoint (e.g., \u2028) so that
// the output is valid to embed within JavaScript. See RFC 8259, section 12.
//
// This only affects encoding and is ignored when decoding.
func EscapeForJS(v bool) Options {
if v {
return jsonflags.EscapeForJS | 1
} else {
return jsonflags.EscapeForJS | 0
}
}
// PreserveRawStrings specifies that when encoding a raw JSON string in a
// [Token] or [Value], pre-escaped sequences
// in a JSON string are preserved to the output.
// However, raw strings still respect [EscapeForHTML] and [EscapeForJS]
// such that the relevant characters are escaped.
// If [AllowInvalidUTF8] is enabled, bytes of invalid UTF-8
// are preserved to the output.
//
// This only affects encoding and is ignored when decoding.
func PreserveRawStrings(v bool) Options {
if v {
return jsonflags.PreserveRawStrings | 1
} else {
return jsonflags.PreserveRawStrings | 0
}
}
// CanonicalizeRawInts specifies that when encoding a raw JSON
// integer number (i.e., a number without a fraction and exponent) in a
// [Token] or [Value], the number is canonicalized
// according to RFC 8785, section 3.2.2.3. As a special case,
// the number -0 is canonicalized as 0.
//
// JSON numbers are treated as IEEE 754 double precision numbers.
// Any numbers with precision beyond what is representable by that form
// will lose their precision when canonicalized. For example,
// integer values beyond ±2⁵³ will lose their precision.
// For example, 1234567890123456789 is formatted as 1234567890123456800.
//
// This only affects encoding and is ignored when decoding.
func CanonicalizeRawInts(v bool) Options {
if v {
return jsonflags.CanonicalizeRawInts | 1
} else {
return jsonflags.CanonicalizeRawInts | 0
}
}
// CanonicalizeRawFloats specifies that when encoding a raw JSON
// floating-point number (i.e., a number with a fraction or exponent) in a
// [Token] or [Value], the number is canonicalized
// according to RFC 8785, section 3.2.2.3. As a special case,
// the number -0 is canonicalized as 0.
//
// JSON numbers are treated as IEEE 754 double precision numbers.
// It is safe to canonicalize a serialized single precision number and
// parse it back as a single precision number and expect the same value.
// If a number exceeds ±1.7976931348623157e+308, which is the maximum
// finite number, then it saturated at that value and formatted as such.
//
// This only affects encoding and is ignored when decoding.
func CanonicalizeRawFloats(v bool) Options {
if v {
return jsonflags.CanonicalizeRawFloats | 1
} else {
return jsonflags.CanonicalizeRawFloats | 0
}
}
// ReorderRawObjects specifies that when encoding a raw JSON object in a
// [Value], the object members are reordered according to
// RFC 8785, section 3.2.3.
//
// This only affects encoding and is ignored when decoding.
func ReorderRawObjects(v bool) Options {
if v {
return jsonflags.ReorderRawObjects | 1
} else {
return jsonflags.ReorderRawObjects | 0
}
}
// SpaceAfterColon specifies that the JSON output should emit a space character
// after each colon separator following a JSON object name.
// If false, then no space character appears after the colon separator.
//
// This only affects encoding and is ignored when decoding.
func SpaceAfterColon(v bool) Options {
if v {
return jsonflags.SpaceAfterColon | 1
} else {
return jsonflags.SpaceAfterColon | 0
}
}
// SpaceAfterComma specifies that the JSON output should emit a space character
// after each comma separator following a JSON object value or array element.
// If false, then no space character appears after the comma separator.
//
// This only affects encoding and is ignored when decoding.
func SpaceAfterComma(v bool) Options {
if v {
return jsonflags.SpaceAfterComma | 1
} else {
return jsonflags.SpaceAfterComma | 0
}
}
// Multiline specifies that the JSON output should expand to multiple lines,
// where every JSON object member or JSON array element appears on
// a new, indented line according to the nesting depth.
//
// If [SpaceAfterColon] is not specified, then the default is true.
// If [SpaceAfterComma] is not specified, then the default is false.
// If [WithIndent] is not specified, then the default is "\t".
//
// If set to false, then the output is a single-line,
// where the only whitespace emitted is determined by the current
// values of [SpaceAfterColon] and [SpaceAfterComma].
//
// This only affects encoding and is ignored when decoding.
func Multiline(v bool) Options {
if v {
return jsonflags.Multiline | 1
} else {
return jsonflags.Multiline | 0
}
}
// WithIndent specifies that the encoder should emit multiline output
// where each element in a JSON object or array begins on a new, indented line
// beginning with the indent prefix (see [WithIndentPrefix])
// followed by one or more copies of indent according to the nesting depth.
// The indent must only be composed of space or tab characters.
//
// If the intent to emit indented output without a preference for
// the particular indent string, then use [Multiline] instead.
//
// This only affects encoding and is ignored when decoding.
// Use of this option implies [Multiline] being set to true.
func WithIndent(indent string) Options {
// Fast-path: Return a constant for common indents, which avoids allocating.
// These are derived from analyzing the Go module proxy on 2023-07-01.
switch indent {
case "\t":
return jsonopts.Indent("\t") // ~14k usages
case " ":
return jsonopts.Indent(" ") // ~18k usages
case " ":
return jsonopts.Indent(" ") // ~1.7k usages
case " ":
return jsonopts.Indent(" ") // ~52k usages
case " ":
return jsonopts.Indent(" ") // ~12k usages
case "":
return jsonopts.Indent("") // ~1.5k usages
}
// Otherwise, allocate for this unique value.
if s := strings.Trim(indent, " \t"); len(s) > 0 {
panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent")
}
return jsonopts.Indent(indent)
}
// WithIndentPrefix specifies that the encoder should emit multiline output
// where each element in a JSON object or array begins on a new, indented line
// beginning with the indent prefix followed by one or more copies of indent
// (see [WithIndent]) according to the nesting depth.
// The prefix must only be composed of space or tab characters.
//
// This only affects encoding and is ignored when decoding.
// Use of this option implies [Multiline] being set to true.
func WithIndentPrefix(prefix string) Options {
if s := strings.Trim(prefix, " \t"); len(s) > 0 {
panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent prefix")
}
return jsonopts.IndentPrefix(prefix)
}
/*
// TODO(https://go.dev/issue/56733): Implement WithByteLimit and WithDepthLimit.
// Remember to also update the "Security Considerations" section.
// WithByteLimit sets a limit on the number of bytes of input or output bytes
// that may be consumed or produced for each top-level JSON value.
// If a [Decoder] or [Encoder] method call would need to consume/produce
// more than a total of n bytes to make progress on the top-level JSON value,
// then the call will report an error.
// Whitespace before and within the top-level value are counted against the limit.
// Whitespace after a top-level value are counted against the limit
// for the next top-level value.
//
// A non-positive limit is equivalent to no limit at all.
// If unspecified, the default limit is no limit at all.
// This affects either encoding or decoding.
func WithByteLimit(n int64) Options {
return jsonopts.ByteLimit(max(n, 0))
}
// WithDepthLimit sets a limit on the maximum depth of JSON nesting
// that may be consumed or produced for each top-level JSON value.
// If a [Decoder] or [Encoder] method call would need to consume or produce
// a depth greater than n to make progress on the top-level JSON value,
// then the call will report an error.
//
// A non-positive limit is equivalent to no limit at all.
// If unspecified, the default limit is 10000.
// This affects either encoding or decoding.
func WithDepthLimit(n int) Options {
return jsonopts.DepthLimit(max(n, 0))
}
*/

View File

@@ -0,0 +1,152 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"io"
"math/bits"
"sync"
)
// TODO(https://go.dev/issue/47657): Use sync.PoolOf.
var (
// This owns the internal buffer since there is no io.Writer to output to.
// Since the buffer can get arbitrarily large in normal usage,
// there is statistical tracking logic to determine whether to recycle
// the internal buffer or not based on a history of utilization.
bufferedEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
// This owns the internal buffer, but it is only used to temporarily store
// buffered JSON before flushing it to the underlying io.Writer.
// In a sufficiently efficient streaming mode, we do not expect the buffer
// to grow arbitrarily large. Thus, we avoid recycling large buffers.
streamingEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
// This does not own the internal buffer since
// it is taken directly from the provided bytes.Buffer.
bytesBufferEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
)
// bufferStatistics is statistics to track buffer utilization.
// It is used to determine whether to recycle a buffer or not
// to avoid https://go.dev/issue/23199.
type bufferStatistics struct {
strikes int // number of times the buffer was under-utilized
prevLen int // length of previous buffer
}
func getBufferedEncoder(opts ...Options) *Encoder {
e := bufferedEncoderPool.Get().(*Encoder)
if e.s.Buf == nil {
// Round up to nearest 2ⁿ to make best use of malloc size classes.
// See runtime/sizeclasses.go on Go1.15.
// Logical OR with 63 to ensure 64 as the minimum buffer size.
n := 1 << bits.Len(uint(e.s.bufStats.prevLen|63))
e.s.Buf = make([]byte, 0, n)
}
e.s.reset(e.s.Buf[:0], nil, opts...)
return e
}
func putBufferedEncoder(e *Encoder) {
// Recycle large buffers only if sufficiently utilized.
// If a buffer is under-utilized enough times sequentially,
// then it is discarded, ensuring that a single large buffer
// won't be kept alive by a continuous stream of small usages.
//
// The worst case utilization is computed as:
// MIN_UTILIZATION_THRESHOLD / (1 + MAX_NUM_STRIKES)
//
// For the constants chosen below, this is (25%)/(1+4) ⇒ 5%.
// This may seem low, but it ensures a lower bound on
// the absolute worst-case utilization. Without this check,
// this would be theoretically 0%, which is infinitely worse.
//
// See https://go.dev/issue/27735.
switch {
case cap(e.s.Buf) <= 4<<10: // always recycle buffers smaller than 4KiB
e.s.bufStats.strikes = 0
case cap(e.s.Buf)/4 <= len(e.s.Buf): // at least 25% utilization
e.s.bufStats.strikes = 0
case e.s.bufStats.strikes < 4: // at most 4 strikes
e.s.bufStats.strikes++
default: // discard the buffer; too large and too often under-utilized
e.s.bufStats.strikes = 0
e.s.bufStats.prevLen = len(e.s.Buf) // heuristic for size to allocate next time
e.s.Buf = nil
}
bufferedEncoderPool.Put(e)
}
func getStreamingEncoder(w io.Writer, opts ...Options) *Encoder {
if _, ok := w.(*bytes.Buffer); ok {
e := bytesBufferEncoderPool.Get().(*Encoder)
e.s.reset(nil, w, opts...) // buffer taken from bytes.Buffer
return e
} else {
e := streamingEncoderPool.Get().(*Encoder)
e.s.reset(e.s.Buf[:0], w, opts...) // preserve existing buffer
return e
}
}
func putStreamingEncoder(e *Encoder) {
if _, ok := e.s.wr.(*bytes.Buffer); ok {
bytesBufferEncoderPool.Put(e)
} else {
if cap(e.s.Buf) > 64<<10 {
e.s.Buf = nil // avoid pinning arbitrarily large amounts of memory
}
streamingEncoderPool.Put(e)
}
}
var (
// This does not own the internal buffer since it is externally provided.
bufferedDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }}
// This owns the internal buffer, but it is only used to temporarily store
// buffered JSON fetched from the underlying io.Reader.
// In a sufficiently efficient streaming mode, we do not expect the buffer
// to grow arbitrarily large. Thus, we avoid recycling large buffers.
streamingDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }}
// This does not own the internal buffer since
// it is taken directly from the provided bytes.Buffer.
bytesBufferDecoderPool = bufferedDecoderPool
)
func getBufferedDecoder(b []byte, opts ...Options) *Decoder {
d := bufferedDecoderPool.Get().(*Decoder)
d.s.reset(b, nil, opts...)
return d
}
func putBufferedDecoder(d *Decoder) {
bufferedDecoderPool.Put(d)
}
func getStreamingDecoder(r io.Reader, opts ...Options) *Decoder {
if _, ok := r.(*bytes.Buffer); ok {
d := bytesBufferDecoderPool.Get().(*Decoder)
d.s.reset(nil, r, opts...) // buffer taken from bytes.Buffer
return d
} else {
d := streamingDecoderPool.Get().(*Decoder)
d.s.reset(d.s.buf[:0], r, opts...) // preserve existing buffer
return d
}
}
func putStreamingDecoder(d *Decoder) {
if _, ok := d.s.rd.(*bytes.Buffer); ok {
bytesBufferDecoderPool.Put(d)
} else {
if cap(d.s.buf) > 64<<10 {
d.s.buf = nil // avoid pinning arbitrarily large amounts of memory
}
streamingDecoderPool.Put(d)
}
}

View File

@@ -0,0 +1,41 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonwire"
)
// AppendQuote appends a double-quoted JSON string literal representing src
// to dst and returns the extended buffer.
// It uses the minimal string representation per RFC 8785, section 3.2.2.2.
// Invalid UTF-8 bytes are replaced with the Unicode replacement character
// and an error is returned at the end indicating the presence of invalid UTF-8.
// The dst must not overlap with the src.
func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) {
dst, err := jsonwire.AppendQuote(dst, src, &jsonflags.Flags{})
if err != nil {
err = &SyntacticError{Err: err}
}
return dst, err
}
// AppendUnquote appends the decoded interpretation of src as a
// double-quoted JSON string literal to dst and returns the extended buffer.
// The input src must be a JSON string without any surrounding whitespace.
// Invalid UTF-8 bytes are replaced with the Unicode replacement character
// and an error is returned at the end indicating the presence of invalid UTF-8.
// Any trailing bytes after the JSON string literal results in an error.
// The dst must not overlap with the src.
func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) {
dst, err := jsonwire.AppendUnquote(dst, src)
if err != nil {
err = &SyntacticError{Err: err}
}
return dst, err
}

View File

@@ -0,0 +1,828 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"errors"
"iter"
"math"
"strconv"
"strings"
"unicode/utf8"
"encoding/json/internal/jsonwire"
)
// ErrDuplicateName indicates that a JSON token could not be
// encoded or decoded because it results in a duplicate JSON object name.
// This error is directly wrapped within a [SyntacticError] when produced.
//
// The name of a duplicate JSON object member can be extracted as:
//
// err := ...
// var serr jsontext.SyntacticError
// if errors.As(err, &serr) && serr.Err == jsontext.ErrDuplicateName {
// ptr := serr.JSONPointer // JSON pointer to duplicate name
// name := ptr.LastToken() // duplicate name itself
// ...
// }
//
// This error is only returned if [AllowDuplicateNames] is false.
var ErrDuplicateName = errors.New("duplicate object member name")
// ErrNonStringName indicates that a JSON token could not be
// encoded or decoded because it is not a string,
// as required for JSON object names according to RFC 8259, section 4.
// This error is directly wrapped within a [SyntacticError] when produced.
var ErrNonStringName = errors.New("object member name must be a string")
var (
errMissingValue = errors.New("missing value after object name")
errMismatchDelim = errors.New("mismatching structural token for object or array")
errMaxDepth = errors.New("exceeded max depth")
errInvalidNamespace = errors.New("object namespace is in an invalid state")
)
// Per RFC 8259, section 9, implementations may enforce a maximum depth.
// Such a limit is necessary to prevent stack overflows.
const maxNestingDepth = 10000
type state struct {
// Tokens validates whether the next token kind is valid.
Tokens stateMachine
// Names is a stack of object names.
Names objectNameStack
// Namespaces is a stack of object namespaces.
// For performance reasons, Encoder or Decoder may not update this
// if Marshal or Unmarshal is able to track names in a more efficient way.
// See makeMapArshaler and makeStructArshaler.
// Not used if AllowDuplicateNames is true.
Namespaces objectNamespaceStack
}
// needObjectValue reports whether the next token should be an object value.
// This method is used by [wrapSyntacticError].
func (s *state) needObjectValue() bool {
return s.Tokens.Last.needObjectValue()
}
func (s *state) reset() {
s.Tokens.reset()
s.Names.reset()
s.Namespaces.reset()
}
// Pointer is a JSON Pointer (RFC 6901) that references a particular JSON value
// relative to the root of the top-level JSON value.
//
// A Pointer is a slash-separated list of tokens, where each token is
// either a JSON object name or an index to a JSON array element
// encoded as a base-10 integer value.
// It is impossible to distinguish between an array index and an object name
// (that happens to be an base-10 encoded integer) without also knowing
// the structure of the top-level JSON value that the pointer refers to.
//
// There is exactly one representation of a pointer to a particular value,
// so comparability of Pointer values is equivalent to checking whether
// they both point to the exact same value.
type Pointer string
// IsValid reports whether p is a valid JSON Pointer according to RFC 6901.
// Note that the concatenation of two valid pointers produces a valid pointer.
func (p Pointer) IsValid() bool {
for i, r := range p {
switch {
case r == '~' && (i+1 == len(p) || (p[i+1] != '0' && p[i+1] != '1')):
return false // invalid escape
case r == '\ufffd' && !strings.HasPrefix(string(p[i:]), "\ufffd"):
return false // invalid UTF-8
}
}
return len(p) == 0 || p[0] == '/'
}
// Contains reports whether the JSON value that p points to
// is equal to or contains the JSON value that pc points to.
func (p Pointer) Contains(pc Pointer) bool {
// Invariant: len(p) <= len(pc) if p.Contains(pc)
suffix, ok := strings.CutPrefix(string(pc), string(p))
return ok && (suffix == "" || suffix[0] == '/')
}
// Parent strips off the last token and returns the remaining pointer.
// The parent of an empty p is an empty string.
func (p Pointer) Parent() Pointer {
return p[:max(strings.LastIndexByte(string(p), '/'), 0)]
}
// LastToken returns the last token in the pointer.
// The last token of an empty p is an empty string.
func (p Pointer) LastToken() string {
last := p[max(strings.LastIndexByte(string(p), '/'), 0):]
return unescapePointerToken(strings.TrimPrefix(string(last), "/"))
}
// AppendToken appends a token to the end of p and returns the full pointer.
func (p Pointer) AppendToken(tok string) Pointer {
return Pointer(appendEscapePointerName([]byte(p+"/"), tok))
}
// TODO: Add Pointer.AppendTokens,
// but should this take in a ...string or an iter.Seq[string]?
// Tokens returns an iterator over the reference tokens in the JSON pointer,
// starting from the first token until the last token (unless stopped early).
func (p Pointer) Tokens() iter.Seq[string] {
return func(yield func(string) bool) {
for len(p) > 0 {
p = Pointer(strings.TrimPrefix(string(p), "/"))
i := min(uint(strings.IndexByte(string(p), '/')), uint(len(p)))
if !yield(unescapePointerToken(string(p)[:i])) {
return
}
p = p[i:]
}
}
}
func unescapePointerToken(token string) string {
if strings.Contains(token, "~") {
// Per RFC 6901, section 3, unescape '~' and '/' characters.
token = strings.ReplaceAll(token, "~1", "/")
token = strings.ReplaceAll(token, "~0", "~")
}
return token
}
// appendStackPointer appends a JSON Pointer (RFC 6901) to the current value.
//
// - If where is -1, then it points to the previously processed token.
//
// - If where is 0, then it points to the parent JSON object or array,
// or an object member if in-between an object member key and value.
// This is useful when the position is ambiguous whether
// we are interested in the previous or next token, or
// when we are uncertain whether the next token
// continues or terminates the current object or array.
//
// - If where is +1, then it points to the next expected value,
// assuming that it continues the current JSON object or array.
// As a special case, if the next token is a JSON object name,
// then it points to the parent JSON object.
//
// Invariant: Must call s.names.copyQuotedBuffer beforehand.
func (s state) appendStackPointer(b []byte, where int) []byte {
var objectDepth int
for i := 1; i < s.Tokens.Depth(); i++ {
e := s.Tokens.index(i)
arrayDelta := -1 // by default point to previous array element
if isLast := i == s.Tokens.Depth()-1; isLast {
switch {
case where < 0 && e.Length() == 0 || where == 0 && !e.needObjectValue() || where > 0 && e.NeedObjectName():
return b
case where > 0 && e.isArray():
arrayDelta = 0 // point to next array element
}
}
switch {
case e.isObject():
b = appendEscapePointerName(append(b, '/'), s.Names.getUnquoted(objectDepth))
objectDepth++
case e.isArray():
b = strconv.AppendUint(append(b, '/'), uint64(e.Length()+int64(arrayDelta)), 10)
}
}
return b
}
func appendEscapePointerName[Bytes ~[]byte | ~string](b []byte, name Bytes) []byte {
for _, r := range string(name) {
// Per RFC 6901, section 3, escape '~' and '/' characters.
switch r {
case '~':
b = append(b, "~0"...)
case '/':
b = append(b, "~1"...)
default:
b = utf8.AppendRune(b, r)
}
}
return b
}
// stateMachine is a push-down automaton that validates whether
// a sequence of tokens is valid or not according to the JSON grammar.
// It is useful for both encoding and decoding.
//
// It is a stack where each entry represents a nested JSON object or array.
// The stack has a minimum depth of 1 where the first level is a
// virtual JSON array to handle a stream of top-level JSON values.
// The top-level virtual JSON array is special in that it doesn't require commas
// between each JSON value.
//
// For performance, most methods are carefully written to be inlinable.
// The zero value is a valid state machine ready for use.
type stateMachine struct {
Stack []stateEntry
Last stateEntry
}
// reset resets the state machine.
// The machine always starts with a minimum depth of 1.
func (m *stateMachine) reset() {
m.Stack = m.Stack[:0]
if cap(m.Stack) > 1<<10 {
m.Stack = nil
}
m.Last = stateTypeArray
}
// Depth is the current nested depth of JSON objects and arrays.
// It is one-indexed (i.e., top-level values have a depth of 1).
func (m stateMachine) Depth() int {
return len(m.Stack) + 1
}
// index returns a reference to the ith entry.
// It is only valid until the next push method call.
func (m *stateMachine) index(i int) *stateEntry {
if i == len(m.Stack) {
return &m.Last
}
return &m.Stack[i]
}
// DepthLength reports the current nested depth and
// the length of the last JSON object or array.
func (m stateMachine) DepthLength() (int, int64) {
return m.Depth(), m.Last.Length()
}
// appendLiteral appends a JSON literal as the next token in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) appendLiteral() error {
switch {
case m.Last.NeedObjectName():
return ErrNonStringName
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last.Increment()
return nil
}
}
// appendString appends a JSON string as the next token in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) appendString() error {
switch {
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last.Increment()
return nil
}
}
// appendNumber appends a JSON number as the next token in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) appendNumber() error {
return m.appendLiteral()
}
// pushObject appends a JSON begin object token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) pushObject() error {
switch {
case m.Last.NeedObjectName():
return ErrNonStringName
case !m.Last.isValidNamespace():
return errInvalidNamespace
case len(m.Stack) == maxNestingDepth:
return errMaxDepth
default:
m.Last.Increment()
m.Stack = append(m.Stack, m.Last)
m.Last = stateTypeObject
return nil
}
}
// popObject appends a JSON end object token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) popObject() error {
switch {
case !m.Last.isObject():
return errMismatchDelim
case m.Last.needObjectValue():
return errMissingValue
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last = m.Stack[len(m.Stack)-1]
m.Stack = m.Stack[:len(m.Stack)-1]
return nil
}
}
// pushArray appends a JSON begin array token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) pushArray() error {
switch {
case m.Last.NeedObjectName():
return ErrNonStringName
case !m.Last.isValidNamespace():
return errInvalidNamespace
case len(m.Stack) == maxNestingDepth:
return errMaxDepth
default:
m.Last.Increment()
m.Stack = append(m.Stack, m.Last)
m.Last = stateTypeArray
return nil
}
}
// popArray appends a JSON end array token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) popArray() error {
switch {
case !m.Last.isArray() || len(m.Stack) == 0: // forbid popping top-level virtual JSON array
return errMismatchDelim
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last = m.Stack[len(m.Stack)-1]
m.Stack = m.Stack[:len(m.Stack)-1]
return nil
}
}
// NeedIndent reports whether indent whitespace should be injected.
// A zero value means that no whitespace should be injected.
// A positive value means '\n', indentPrefix, and (n-1) copies of indentBody
// should be appended to the output immediately before the next token.
func (m stateMachine) NeedIndent(next Kind) (n int) {
willEnd := next == '}' || next == ']'
switch {
case m.Depth() == 1:
return 0 // top-level values are never indented
case m.Last.Length() == 0 && willEnd:
return 0 // an empty object or array is never indented
case m.Last.Length() == 0 || m.Last.needImplicitComma(next):
return m.Depth()
case willEnd:
return m.Depth() - 1
default:
return 0
}
}
// MayAppendDelim appends a colon or comma that may precede the next token.
func (m stateMachine) MayAppendDelim(b []byte, next Kind) []byte {
switch {
case m.Last.needImplicitColon():
return append(b, ':')
case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
return append(b, ',')
default:
return b
}
}
// needDelim reports whether a colon or comma token should be implicitly emitted
// before the next token of the specified kind.
// A zero value means no delimiter should be emitted.
func (m stateMachine) needDelim(next Kind) (delim byte) {
switch {
case m.Last.needImplicitColon():
return ':'
case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
return ','
default:
return 0
}
}
// InvalidateDisabledNamespaces marks all disabled namespaces as invalid.
//
// For efficiency, Marshal and Unmarshal may disable namespaces since there are
// more efficient ways to track duplicate names. However, if an error occurs,
// the namespaces in Encoder or Decoder will be left in an inconsistent state.
// Mark the namespaces as invalid so that future method calls on
// Encoder or Decoder will return an error.
func (m *stateMachine) InvalidateDisabledNamespaces() {
for i := range m.Depth() {
e := m.index(i)
if !e.isActiveNamespace() {
e.invalidateNamespace()
}
}
}
// stateEntry encodes several artifacts within a single unsigned integer:
// - whether this represents a JSON object or array,
// - whether this object should check for duplicate names, and
// - how many elements are in this JSON object or array.
type stateEntry uint64
const (
// The type mask (1 bit) records whether this is a JSON object or array.
stateTypeMask stateEntry = 0x8000_0000_0000_0000
stateTypeObject stateEntry = 0x8000_0000_0000_0000
stateTypeArray stateEntry = 0x0000_0000_0000_0000
// The name check mask (2 bit) records whether to update
// the namespaces for the current JSON object and
// whether the namespace is valid.
stateNamespaceMask stateEntry = 0x6000_0000_0000_0000
stateDisableNamespace stateEntry = 0x4000_0000_0000_0000
stateInvalidNamespace stateEntry = 0x2000_0000_0000_0000
// The count mask (61 bits) records the number of elements.
stateCountMask stateEntry = 0x1fff_ffff_ffff_ffff
stateCountLSBMask stateEntry = 0x0000_0000_0000_0001
stateCountOdd stateEntry = 0x0000_0000_0000_0001
stateCountEven stateEntry = 0x0000_0000_0000_0000
)
// Length reports the number of elements in the JSON object or array.
// Each name and value in an object entry is treated as a separate element.
func (e stateEntry) Length() int64 {
return int64(e & stateCountMask)
}
// isObject reports whether this is a JSON object.
func (e stateEntry) isObject() bool {
return e&stateTypeMask == stateTypeObject
}
// isArray reports whether this is a JSON array.
func (e stateEntry) isArray() bool {
return e&stateTypeMask == stateTypeArray
}
// NeedObjectName reports whether the next token must be a JSON string,
// which is necessary for JSON object names.
func (e stateEntry) NeedObjectName() bool {
return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven
}
// needImplicitColon reports whether an colon should occur next,
// which always occurs after JSON object names.
func (e stateEntry) needImplicitColon() bool {
return e.needObjectValue()
}
// needObjectValue reports whether the next token must be a JSON value,
// which is necessary after every JSON object name.
func (e stateEntry) needObjectValue() bool {
return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountOdd
}
// needImplicitComma reports whether an comma should occur next,
// which always occurs after a value in a JSON object or array
// before the next value (or name).
func (e stateEntry) needImplicitComma(next Kind) bool {
return !e.needObjectValue() && e.Length() > 0 && next != '}' && next != ']'
}
// Increment increments the number of elements for the current object or array.
// This assumes that overflow won't practically be an issue since
// 1<<bits.OnesCount(stateCountMask) is sufficiently large.
func (e *stateEntry) Increment() {
(*e)++
}
// decrement decrements the number of elements for the current object or array.
// It is the callers responsibility to ensure that e.length > 0.
func (e *stateEntry) decrement() {
(*e)--
}
// DisableNamespace disables the JSON object namespace such that the
// Encoder or Decoder no longer updates the namespace.
func (e *stateEntry) DisableNamespace() {
*e |= stateDisableNamespace
}
// isActiveNamespace reports whether the JSON object namespace is actively
// being updated and used for duplicate name checks.
func (e stateEntry) isActiveNamespace() bool {
return e&(stateDisableNamespace) == 0
}
// invalidateNamespace marks the JSON object namespace as being invalid.
func (e *stateEntry) invalidateNamespace() {
*e |= stateInvalidNamespace
}
// isValidNamespace reports whether the JSON object namespace is valid.
func (e stateEntry) isValidNamespace() bool {
return e&(stateInvalidNamespace) == 0
}
// objectNameStack is a stack of names when descending into a JSON object.
// In contrast to objectNamespaceStack, this only has to remember a single name
// per JSON object.
//
// This data structure may contain offsets to encodeBuffer or decodeBuffer.
// It violates clean abstraction of layers, but is significantly more efficient.
// This ensures that popping and pushing in the common case is a trivial
// push/pop of an offset integer.
//
// The zero value is an empty names stack ready for use.
type objectNameStack struct {
// offsets is a stack of offsets for each name.
// A non-negative offset is the ending offset into the local names buffer.
// A negative offset is the bit-wise inverse of a starting offset into
// a remote buffer (e.g., encodeBuffer or decodeBuffer).
// A math.MinInt offset at the end implies that the last object is empty.
// Invariant: Positive offsets always occur before negative offsets.
offsets []int
// unquotedNames is a back-to-back concatenation of names.
unquotedNames []byte
}
func (ns *objectNameStack) reset() {
ns.offsets = ns.offsets[:0]
ns.unquotedNames = ns.unquotedNames[:0]
if cap(ns.offsets) > 1<<6 {
ns.offsets = nil // avoid pinning arbitrarily large amounts of memory
}
if cap(ns.unquotedNames) > 1<<10 {
ns.unquotedNames = nil // avoid pinning arbitrarily large amounts of memory
}
}
func (ns *objectNameStack) length() int {
return len(ns.offsets)
}
// getUnquoted retrieves the ith unquoted name in the stack.
// It returns an empty string if the last object is empty.
//
// Invariant: Must call copyQuotedBuffer beforehand.
func (ns *objectNameStack) getUnquoted(i int) []byte {
ns.ensureCopiedBuffer()
if i == 0 {
return ns.unquotedNames[:ns.offsets[0]]
} else {
return ns.unquotedNames[ns.offsets[i-1]:ns.offsets[i-0]]
}
}
// invalidOffset indicates that the last JSON object currently has no name.
const invalidOffset = math.MinInt
// push descends into a nested JSON object.
func (ns *objectNameStack) push() {
ns.offsets = append(ns.offsets, invalidOffset)
}
// ReplaceLastQuotedOffset replaces the last name with the starting offset
// to the quoted name in some remote buffer. All offsets provided must be
// relative to the same buffer until copyQuotedBuffer is called.
func (ns *objectNameStack) ReplaceLastQuotedOffset(i int) {
// Use bit-wise inversion instead of naive multiplication by -1 to avoid
// ambiguity regarding zero (which is a valid offset into the names field).
// Bit-wise inversion is mathematically equivalent to -i-1,
// such that 0 becomes -1, 1 becomes -2, and so forth.
// This ensures that remote offsets are always negative.
ns.offsets[len(ns.offsets)-1] = ^i
}
// replaceLastUnquotedName replaces the last name with the provided name.
//
// Invariant: Must call copyQuotedBuffer beforehand.
func (ns *objectNameStack) replaceLastUnquotedName(s string) {
ns.ensureCopiedBuffer()
var startOffset int
if len(ns.offsets) > 1 {
startOffset = ns.offsets[len(ns.offsets)-2]
}
ns.unquotedNames = append(ns.unquotedNames[:startOffset], s...)
ns.offsets[len(ns.offsets)-1] = len(ns.unquotedNames)
}
// clearLast removes any name in the last JSON object.
// It is semantically equivalent to ns.push followed by ns.pop.
func (ns *objectNameStack) clearLast() {
ns.offsets[len(ns.offsets)-1] = invalidOffset
}
// pop ascends out of a nested JSON object.
func (ns *objectNameStack) pop() {
ns.offsets = ns.offsets[:len(ns.offsets)-1]
}
// copyQuotedBuffer copies names from the remote buffer into the local names
// buffer so that there are no more offset references into the remote buffer.
// This allows the remote buffer to change contents without affecting
// the names that this data structure is trying to remember.
func (ns *objectNameStack) copyQuotedBuffer(b []byte) {
// Find the first negative offset.
var i int
for i = len(ns.offsets) - 1; i >= 0 && ns.offsets[i] < 0; i-- {
continue
}
// Copy each name from the remote buffer into the local buffer.
for i = i + 1; i < len(ns.offsets); i++ {
if i == len(ns.offsets)-1 && ns.offsets[i] == invalidOffset {
if i == 0 {
ns.offsets[i] = 0
} else {
ns.offsets[i] = ns.offsets[i-1]
}
break // last JSON object had a push without any names
}
// As a form of Hyrum proofing, we write an invalid character into the
// buffer to make misuse of Decoder.ReadToken more obvious.
// We need to undo that mutation here.
quotedName := b[^ns.offsets[i]:]
if quotedName[0] == invalidateBufferByte {
quotedName[0] = '"'
}
// Append the unquoted name to the local buffer.
var startOffset int
if i > 0 {
startOffset = ns.offsets[i-1]
}
if n := jsonwire.ConsumeSimpleString(quotedName); n > 0 {
ns.unquotedNames = append(ns.unquotedNames[:startOffset], quotedName[len(`"`):n-len(`"`)]...)
} else {
ns.unquotedNames, _ = jsonwire.AppendUnquote(ns.unquotedNames[:startOffset], quotedName)
}
ns.offsets[i] = len(ns.unquotedNames)
}
}
func (ns *objectNameStack) ensureCopiedBuffer() {
if len(ns.offsets) > 0 && ns.offsets[len(ns.offsets)-1] < 0 {
panic("BUG: copyQuotedBuffer not called beforehand")
}
}
// objectNamespaceStack is a stack of object namespaces.
// This data structure assists in detecting duplicate names.
type objectNamespaceStack []objectNamespace
// reset resets the object namespace stack.
func (nss *objectNamespaceStack) reset() {
if cap(*nss) > 1<<10 {
*nss = nil
}
*nss = (*nss)[:0]
}
// push starts a new namespace for a nested JSON object.
func (nss *objectNamespaceStack) push() {
if cap(*nss) > len(*nss) {
*nss = (*nss)[:len(*nss)+1]
nss.Last().reset()
} else {
*nss = append(*nss, objectNamespace{})
}
}
// Last returns a pointer to the last JSON object namespace.
func (nss objectNamespaceStack) Last() *objectNamespace {
return &nss[len(nss)-1]
}
// pop terminates the namespace for a nested JSON object.
func (nss *objectNamespaceStack) pop() {
*nss = (*nss)[:len(*nss)-1]
}
// objectNamespace is the namespace for a JSON object.
// In contrast to objectNameStack, this needs to remember a all names
// per JSON object.
//
// The zero value is an empty namespace ready for use.
type objectNamespace struct {
// It relies on a linear search over all the names before switching
// to use a Go map for direct lookup.
// endOffsets is a list of offsets to the end of each name in buffers.
// The length of offsets is the number of names in the namespace.
endOffsets []uint
// allUnquotedNames is a back-to-back concatenation of every name in the namespace.
allUnquotedNames []byte
// mapNames is a Go map containing every name in the namespace.
// Only valid if non-nil.
mapNames map[string]struct{}
}
// reset resets the namespace to be empty.
func (ns *objectNamespace) reset() {
ns.endOffsets = ns.endOffsets[:0]
ns.allUnquotedNames = ns.allUnquotedNames[:0]
ns.mapNames = nil
if cap(ns.endOffsets) > 1<<6 {
ns.endOffsets = nil // avoid pinning arbitrarily large amounts of memory
}
if cap(ns.allUnquotedNames) > 1<<10 {
ns.allUnquotedNames = nil // avoid pinning arbitrarily large amounts of memory
}
}
// length reports the number of names in the namespace.
func (ns *objectNamespace) length() int {
return len(ns.endOffsets)
}
// getUnquoted retrieves the ith unquoted name in the namespace.
func (ns *objectNamespace) getUnquoted(i int) []byte {
if i == 0 {
return ns.allUnquotedNames[:ns.endOffsets[0]]
} else {
return ns.allUnquotedNames[ns.endOffsets[i-1]:ns.endOffsets[i-0]]
}
}
// lastUnquoted retrieves the last name in the namespace.
func (ns *objectNamespace) lastUnquoted() []byte {
return ns.getUnquoted(ns.length() - 1)
}
// insertQuoted inserts a name and reports whether it was inserted,
// which only occurs if name is not already in the namespace.
// The provided name must be a valid JSON string.
func (ns *objectNamespace) insertQuoted(name []byte, isVerbatim bool) bool {
if isVerbatim {
name = name[len(`"`) : len(name)-len(`"`)]
}
return ns.insert(name, !isVerbatim)
}
func (ns *objectNamespace) InsertUnquoted(name []byte) bool {
return ns.insert(name, false)
}
func (ns *objectNamespace) insert(name []byte, quoted bool) bool {
var allNames []byte
if quoted {
allNames, _ = jsonwire.AppendUnquote(ns.allUnquotedNames, name)
} else {
allNames = append(ns.allUnquotedNames, name...)
}
name = allNames[len(ns.allUnquotedNames):]
// Switch to a map if the buffer is too large for linear search.
// This does not add the current name to the map.
if ns.mapNames == nil && (ns.length() > 64 || len(ns.allUnquotedNames) > 1024) {
ns.mapNames = make(map[string]struct{})
var startOffset uint
for _, endOffset := range ns.endOffsets {
name := ns.allUnquotedNames[startOffset:endOffset]
ns.mapNames[string(name)] = struct{}{} // allocates a new string
startOffset = endOffset
}
}
if ns.mapNames == nil {
// Perform linear search over the buffer to find matching names.
// It provides O(n) lookup, but does not require any allocations.
var startOffset uint
for _, endOffset := range ns.endOffsets {
if string(ns.allUnquotedNames[startOffset:endOffset]) == string(name) {
return false
}
startOffset = endOffset
}
} else {
// Use the map if it is populated.
// It provides O(1) lookup, but requires a string allocation per name.
if _, ok := ns.mapNames[string(name)]; ok {
return false
}
ns.mapNames[string(name)] = struct{}{} // allocates a new string
}
ns.allUnquotedNames = allNames
ns.endOffsets = append(ns.endOffsets, uint(len(ns.allUnquotedNames)))
return true
}
// removeLast removes the last name in the namespace.
func (ns *objectNamespace) removeLast() {
if ns.mapNames != nil {
delete(ns.mapNames, string(ns.lastUnquoted()))
}
if ns.length()-1 == 0 {
ns.endOffsets = ns.endOffsets[:0]
ns.allUnquotedNames = ns.allUnquotedNames[:0]
} else {
ns.endOffsets = ns.endOffsets[:ns.length()-1]
ns.allUnquotedNames = ns.allUnquotedNames[:ns.endOffsets[ns.length()-1]]
}
}

View File

@@ -0,0 +1,396 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"fmt"
"slices"
"strings"
"testing"
"unicode/utf8"
)
func TestPointer(t *testing.T) {
tests := []struct {
in Pointer
wantParent Pointer
wantLast string
wantTokens []string
wantValid bool
}{
{"", "", "", nil, true},
{"a", "", "a", []string{"a"}, false},
{"~", "", "~", []string{"~"}, false},
{"/a", "", "a", []string{"a"}, true},
{"/foo/bar", "/foo", "bar", []string{"foo", "bar"}, true},
{"///", "//", "", []string{"", "", ""}, true},
{"/~0~1", "", "~/", []string{"~/"}, true},
{"/\xde\xad\xbe\xef", "", "\xde\xad\xbe\xef", []string{"\xde\xad\xbe\xef"}, false},
}
for _, tt := range tests {
if got := tt.in.Parent(); got != tt.wantParent {
t.Errorf("Pointer(%q).Parent = %q, want %q", tt.in, got, tt.wantParent)
}
if got := tt.in.LastToken(); got != tt.wantLast {
t.Errorf("Pointer(%q).Last = %q, want %q", tt.in, got, tt.wantLast)
}
if strings.HasPrefix(string(tt.in), "/") {
wantRoundtrip := tt.in
if !utf8.ValidString(string(wantRoundtrip)) {
// Replace bytes of invalid UTF-8 with Unicode replacement character.
wantRoundtrip = Pointer([]rune(wantRoundtrip))
}
if got := tt.in.Parent().AppendToken(tt.in.LastToken()); got != wantRoundtrip {
t.Errorf("Pointer(%q).Parent().AppendToken(LastToken()) = %q, want %q", tt.in, got, tt.in)
}
in := tt.in
for {
if (in + "x").Contains(tt.in) {
t.Errorf("Pointer(%q).Contains(%q) = true, want false", in+"x", tt.in)
}
if !in.Contains(tt.in) {
t.Errorf("Pointer(%q).Contains(%q) = false, want true", in, tt.in)
}
if in == in.Parent() {
break
}
in = in.Parent()
}
}
if got := slices.Collect(tt.in.Tokens()); !slices.Equal(got, tt.wantTokens) {
t.Errorf("Pointer(%q).Tokens = %q, want %q", tt.in, got, tt.wantTokens)
}
if got := tt.in.IsValid(); got != tt.wantValid {
t.Errorf("Pointer(%q).IsValid = %v, want %v", tt.in, got, tt.wantValid)
}
}
}
func TestStateMachine(t *testing.T) {
// To test a state machine, we pass an ordered sequence of operations and
// check whether the current state is as expected.
// The operation type is a union type of various possible operations,
// which either call mutating methods on the state machine or
// call accessor methods on state machine and verify the results.
type operation any
type (
// stackLengths checks the results of stateEntry.length accessors.
stackLengths []int64
// appendTokens is sequence of token kinds to append where
// none of them are expected to fail.
//
// For example: `[nft]` is equivalent to the following sequence:
//
// pushArray()
// appendLiteral()
// appendString()
// appendNumber()
// popArray()
//
appendTokens string
// appendToken is a single token kind to append with the expected error.
appendToken struct {
kind Kind
want error
}
// needDelim checks the result of the needDelim accessor.
needDelim struct {
next Kind
want byte
}
)
// Each entry is a sequence of tokens to pass to the state machine.
tests := []struct {
label string
ops []operation
}{{
"TopLevelValues",
[]operation{
stackLengths{0},
needDelim{'n', 0},
appendTokens(`nft`),
stackLengths{3},
needDelim{'"', 0},
appendTokens(`"0[]{}`),
stackLengths{7},
},
}, {
"ArrayValues",
[]operation{
stackLengths{0},
needDelim{'[', 0},
appendTokens(`[`),
stackLengths{1, 0},
needDelim{'n', 0},
appendTokens(`nft`),
stackLengths{1, 3},
needDelim{'"', ','},
appendTokens(`"0[]{}`),
stackLengths{1, 7},
needDelim{']', 0},
appendTokens(`]`),
stackLengths{1},
},
}, {
"ObjectValues",
[]operation{
stackLengths{0},
needDelim{'{', 0},
appendTokens(`{`),
stackLengths{1, 0},
needDelim{'"', 0},
appendTokens(`"`),
stackLengths{1, 1},
needDelim{'n', ':'},
appendTokens(`n`),
stackLengths{1, 2},
needDelim{'"', ','},
appendTokens(`"f"t`),
stackLengths{1, 6},
appendTokens(`"""0"[]"{}`),
stackLengths{1, 14},
needDelim{'}', 0},
appendTokens(`}`),
stackLengths{1},
},
}, {
"ObjectCardinality",
[]operation{
appendTokens(`{`),
// Appending any kind other than string for object name is an error.
appendToken{'n', ErrNonStringName},
appendToken{'f', ErrNonStringName},
appendToken{'t', ErrNonStringName},
appendToken{'0', ErrNonStringName},
appendToken{'{', ErrNonStringName},
appendToken{'[', ErrNonStringName},
appendTokens(`"`),
// Appending '}' without first appending any value is an error.
appendToken{'}', errMissingValue},
appendTokens(`"`),
appendTokens(`}`),
},
}, {
"MismatchingDelims",
[]operation{
appendToken{'}', errMismatchDelim}, // appending '}' without preceding '{'
appendTokens(`[[{`),
appendToken{']', errMismatchDelim}, // appending ']' that mismatches preceding '{'
appendTokens(`}]`),
appendToken{'}', errMismatchDelim}, // appending '}' that mismatches preceding '['
appendTokens(`]`),
appendToken{']', errMismatchDelim}, // appending ']' without preceding '['
},
}}
for _, tt := range tests {
t.Run(tt.label, func(t *testing.T) {
// Flatten appendTokens to sequence of appendToken entries.
var ops []operation
for _, op := range tt.ops {
if toks, ok := op.(appendTokens); ok {
for _, k := range []byte(toks) {
ops = append(ops, appendToken{Kind(k), nil})
}
continue
}
ops = append(ops, op)
}
// Append each token to the state machine and check the output.
var state stateMachine
state.reset()
var sequence []Kind
for _, op := range ops {
switch op := op.(type) {
case stackLengths:
var got []int64
for i := range state.Depth() {
e := state.index(i)
got = append(got, e.Length())
}
want := []int64(op)
if !slices.Equal(got, want) {
t.Fatalf("%s: stack lengths mismatch:\ngot %v\nwant %v", sequence, got, want)
}
case appendToken:
got := state.append(op.kind)
if !equalError(got, op.want) {
t.Fatalf("%s: append('%c') = %v, want %v", sequence, op.kind, got, op.want)
}
if got == nil {
sequence = append(sequence, op.kind)
}
case needDelim:
if got := state.needDelim(op.next); got != op.want {
t.Fatalf("%s: needDelim('%c') = '%c', want '%c'", sequence, op.next, got, op.want)
}
default:
panic(fmt.Sprintf("unknown operation: %T", op))
}
}
})
}
}
// append is a thin wrapper over the other append, pop, or push methods
// based on the token kind.
func (s *stateMachine) append(k Kind) error {
switch k {
case 'n', 'f', 't':
return s.appendLiteral()
case '"':
return s.appendString()
case '0':
return s.appendNumber()
case '{':
return s.pushObject()
case '}':
return s.popObject()
case '[':
return s.pushArray()
case ']':
return s.popArray()
default:
panic(fmt.Sprintf("invalid token kind: '%c'", k))
}
}
func TestObjectNamespace(t *testing.T) {
type operation any
type (
insert struct {
name string
wantInserted bool
}
removeLast struct{}
)
// Sequence of insert operations to perform (order matters).
ops := []operation{
insert{`""`, true},
removeLast{},
insert{`""`, true},
insert{`""`, false},
// Test insertion of the same name with different formatting.
insert{`"alpha"`, true},
insert{`"ALPHA"`, true}, // case-sensitive matching
insert{`"alpha"`, false},
insert{`"\u0061\u006c\u0070\u0068\u0061"`, false}, // unescapes to "alpha"
removeLast{}, // removes "ALPHA"
insert{`"alpha"`, false},
removeLast{}, // removes "alpha"
insert{`"alpha"`, true},
removeLast{},
// Bulk insert simple names.
insert{`"alpha"`, true},
insert{`"bravo"`, true},
insert{`"charlie"`, true},
insert{`"delta"`, true},
insert{`"echo"`, true},
insert{`"foxtrot"`, true},
insert{`"golf"`, true},
insert{`"hotel"`, true},
insert{`"india"`, true},
insert{`"juliet"`, true},
insert{`"kilo"`, true},
insert{`"lima"`, true},
insert{`"mike"`, true},
insert{`"november"`, true},
insert{`"oscar"`, true},
insert{`"papa"`, true},
insert{`"quebec"`, true},
insert{`"romeo"`, true},
insert{`"sierra"`, true},
insert{`"tango"`, true},
insert{`"uniform"`, true},
insert{`"victor"`, true},
insert{`"whiskey"`, true},
insert{`"xray"`, true},
insert{`"yankee"`, true},
insert{`"zulu"`, true},
// Test insertion of invalid UTF-8.
insert{`"` + "\ufffd" + `"`, true},
insert{`"` + "\ufffd" + `"`, false},
insert{`"\ufffd"`, false}, // unescapes to Unicode replacement character
insert{`"\uFFFD"`, false}, // unescapes to Unicode replacement character
insert{`"` + "\xff" + `"`, false}, // mangles as Unicode replacement character
removeLast{},
insert{`"` + "\ufffd" + `"`, true},
// Test insertion of unicode characters.
insert{`"☺☻☹"`, true},
insert{`"☺☻☹"`, false},
removeLast{},
insert{`"☺☻☹"`, true},
}
// Execute the sequence of operations twice:
// 1) on a fresh namespace and 2) on a namespace that has been reset.
var ns objectNamespace
wantNames := []string{}
for _, reset := range []bool{false, true} {
if reset {
ns.reset()
wantNames = nil
}
// Execute the operations and ensure the state is consistent.
for i, op := range ops {
switch op := op.(type) {
case insert:
gotInserted := ns.insertQuoted([]byte(op.name), false)
if gotInserted != op.wantInserted {
t.Fatalf("%d: objectNamespace{%v}.insert(%v) = %v, want %v", i, strings.Join(wantNames, " "), op.name, gotInserted, op.wantInserted)
}
if gotInserted {
b, _ := AppendUnquote(nil, []byte(op.name))
wantNames = append(wantNames, string(b))
}
case removeLast:
ns.removeLast()
wantNames = wantNames[:len(wantNames)-1]
default:
panic(fmt.Sprintf("unknown operation: %T", op))
}
// Check that the namespace is consistent.
gotNames := []string{}
for i := range ns.length() {
gotNames = append(gotNames, string(ns.getUnquoted(i)))
}
if !slices.Equal(gotNames, wantNames) {
t.Fatalf("%d: objectNamespace = {%v}, want {%v}", i, strings.Join(gotNames, " "), strings.Join(wantNames, " "))
}
}
// Verify that we have not switched to using a Go map.
if ns.mapNames != nil {
t.Errorf("objectNamespace.mapNames = non-nil, want nil")
}
// Insert a large number of names.
for i := range 64 {
ns.InsertUnquoted([]byte(fmt.Sprintf(`name%d`, i)))
}
// Verify that we did switch to using a Go map.
if ns.mapNames == nil {
t.Errorf("objectNamespace.mapNames = nil, want non-nil")
}
}
}

View File

@@ -0,0 +1,527 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"errors"
"math"
"strconv"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonwire"
)
// NOTE: Token is analogous to v1 json.Token.
const (
maxInt64 = math.MaxInt64
minInt64 = math.MinInt64
maxUint64 = math.MaxUint64
minUint64 = 0 // for consistency and readability purposes
invalidTokenPanic = "invalid jsontext.Token; it has been voided by a subsequent json.Decoder call"
)
var errInvalidToken = errors.New("invalid jsontext.Token")
// Token represents a lexical JSON token, which may be one of the following:
// - a JSON literal (i.e., null, true, or false)
// - a JSON string (e.g., "hello, world!")
// - a JSON number (e.g., 123.456)
// - a begin or end delimiter for a JSON object (i.e., { or } )
// - a begin or end delimiter for a JSON array (i.e., [ or ] )
//
// A Token cannot represent entire array or object values, while a [Value] can.
// There is no Token to represent commas and colons since
// these structural tokens can be inferred from the surrounding context.
type Token struct {
nonComparable
// Tokens can exist in either a "raw" or an "exact" form.
// Tokens produced by the Decoder are in the "raw" form.
// Tokens returned by constructors are usually in the "exact" form.
// The Encoder accepts Tokens in either the "raw" or "exact" form.
//
// The following chart shows the possible values for each Token type:
// ╔═════════════════╦════════════╤════════════╤════════════╗
// ║ Token type ║ raw field │ str field │ num field ║
// ╠═════════════════╬════════════╪════════════╪════════════╣
// ║ null (raw) ║ "null" │ "" │ 0 ║
// ║ false (raw) ║ "false" │ "" │ 0 ║
// ║ true (raw) ║ "true" │ "" │ 0 ║
// ║ string (raw) ║ non-empty │ "" │ offset ║
// ║ string (string) ║ nil │ non-empty │ 0 ║
// ║ number (raw) ║ non-empty │ "" │ offset ║
// ║ number (float) ║ nil │ "f" │ non-zero ║
// ║ number (int64) ║ nil │ "i" │ non-zero ║
// ║ number (uint64) ║ nil │ "u" │ non-zero ║
// ║ object (delim) ║ "{" or "}" │ "" │ 0 ║
// ║ array (delim) ║ "[" or "]" │ "" │ 0 ║
// ╚═════════════════╩════════════╧════════════╧════════════╝
//
// Notes:
// - For tokens stored in "raw" form, the num field contains the
// absolute offset determined by raw.previousOffsetStart().
// The buffer itself is stored in raw.previousBuffer().
// - JSON literals and structural characters are always in the "raw" form.
// - JSON strings and numbers can be in either "raw" or "exact" forms.
// - The exact zero value of JSON strings and numbers in the "exact" forms
// have ambiguous representation. Thus, they are always represented
// in the "raw" form.
// raw contains a reference to the raw decode buffer.
// If non-nil, then its value takes precedence over str and num.
// It is only valid if num == raw.previousOffsetStart().
raw *decodeBuffer
// str is the unescaped JSON string if num is zero.
// Otherwise, it is "f", "i", or "u" if num should be interpreted
// as a float64, int64, or uint64, respectively.
str string
// num is a float64, int64, or uint64 stored as a uint64 value.
// It is non-zero for any JSON number in the "exact" form.
num uint64
}
// TODO: Does representing 1-byte delimiters as *decodeBuffer cause performance issues?
var (
Null Token = rawToken("null")
False Token = rawToken("false")
True Token = rawToken("true")
BeginObject Token = rawToken("{")
EndObject Token = rawToken("}")
BeginArray Token = rawToken("[")
EndArray Token = rawToken("]")
zeroString Token = rawToken(`""`)
zeroNumber Token = rawToken(`0`)
nanString Token = String("NaN")
pinfString Token = String("Infinity")
ninfString Token = String("-Infinity")
)
func rawToken(s string) Token {
return Token{raw: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}}
}
// Bool constructs a Token representing a JSON boolean.
func Bool(b bool) Token {
if b {
return True
}
return False
}
// String constructs a Token representing a JSON string.
// The provided string should contain valid UTF-8, otherwise invalid characters
// may be mangled as the Unicode replacement character.
func String(s string) Token {
if len(s) == 0 {
return zeroString
}
return Token{str: s}
}
// Float constructs a Token representing a JSON number.
// The values NaN, +Inf, and -Inf will be represented
// as a JSON string with the values "NaN", "Infinity", and "-Infinity".
func Float(n float64) Token {
switch {
case math.Float64bits(n) == 0:
return zeroNumber
case math.IsNaN(n):
return nanString
case math.IsInf(n, +1):
return pinfString
case math.IsInf(n, -1):
return ninfString
}
return Token{str: "f", num: math.Float64bits(n)}
}
// Int constructs a Token representing a JSON number from an int64.
func Int(n int64) Token {
if n == 0 {
return zeroNumber
}
return Token{str: "i", num: uint64(n)}
}
// Uint constructs a Token representing a JSON number from a uint64.
func Uint(n uint64) Token {
if n == 0 {
return zeroNumber
}
return Token{str: "u", num: uint64(n)}
}
// Clone makes a copy of the Token such that its value remains valid
// even after a subsequent [Decoder.Read] call.
func (t Token) Clone() Token {
// TODO: Allow caller to avoid any allocations?
if raw := t.raw; raw != nil {
// Avoid copying globals.
if t.raw.prevStart == 0 {
switch t.raw {
case Null.raw:
return Null
case False.raw:
return False
case True.raw:
return True
case BeginObject.raw:
return BeginObject
case EndObject.raw:
return EndObject
case BeginArray.raw:
return BeginArray
case EndArray.raw:
return EndArray
}
}
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
buf := bytes.Clone(raw.previousBuffer())
return Token{raw: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}}
}
return t
}
// Bool returns the value for a JSON boolean.
// It panics if the token kind is not a JSON boolean.
func (t Token) Bool() bool {
switch t.raw {
case True.raw:
return true
case False.raw:
return false
default:
panic("invalid JSON token kind: " + t.Kind().String())
}
}
// appendString appends a JSON string to dst and returns it.
// It panics if t is not a JSON string.
func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
if raw := t.raw; raw != nil {
// Handle raw string value.
buf := raw.previousBuffer()
if Kind(buf[0]) == '"' {
if jsonwire.ConsumeSimpleString(buf) == len(buf) {
return append(dst, buf...), nil
}
dst, _, err := jsonwire.ReformatString(dst, buf, flags)
return dst, err
}
} else if len(t.str) != 0 && t.num == 0 {
// Handle exact string value.
return jsonwire.AppendQuote(dst, t.str, flags)
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// String returns the unescaped string value for a JSON string.
// For other JSON kinds, this returns the raw JSON representation.
func (t Token) String() string {
// This is inlinable to take advantage of "function outlining".
// This avoids an allocation for the string(b) conversion
// if the caller does not use the string in an escaping manner.
// See https://blog.filippo.io/efficient-go-apis-with-the-inliner/
s, b := t.string()
if len(b) > 0 {
return string(b)
}
return s
}
func (t Token) string() (string, []byte) {
if raw := t.raw; raw != nil {
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
buf := raw.previousBuffer()
if buf[0] == '"' {
// TODO: Preserve ValueFlags in Token?
isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf)
return "", jsonwire.UnquoteMayCopy(buf, isVerbatim)
}
// Handle tokens that are not JSON strings for fmt.Stringer.
return "", buf
}
if len(t.str) != 0 && t.num == 0 {
return t.str, nil
}
// Handle tokens that are not JSON strings for fmt.Stringer.
if t.num > 0 {
switch t.str[0] {
case 'f':
return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.num), 64)), nil
case 'i':
return strconv.FormatInt(int64(t.num), 10), nil
case 'u':
return strconv.FormatUint(uint64(t.num), 10), nil
}
}
return "<invalid jsontext.Token>", nil
}
// appendNumber appends a JSON number to dst and returns it.
// It panics if t is not a JSON number.
func (t Token) appendNumber(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
if raw := t.raw; raw != nil {
// Handle raw number value.
buf := raw.previousBuffer()
if Kind(buf[0]).normalize() == '0' {
dst, _, err := jsonwire.ReformatNumber(dst, buf, flags)
return dst, err
}
} else if t.num != 0 {
// Handle exact number value.
switch t.str[0] {
case 'f':
return jsonwire.AppendFloat(dst, math.Float64frombits(t.num), 64), nil
case 'i':
return strconv.AppendInt(dst, int64(t.num), 10), nil
case 'u':
return strconv.AppendUint(dst, uint64(t.num), 10), nil
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Float returns the floating-point value for a JSON number.
// It returns a NaN, +Inf, or -Inf value for any JSON string
// with the values "NaN", "Infinity", or "-Infinity".
// It panics for all other cases.
func (t Token) Float() float64 {
if raw := t.raw; raw != nil {
// Handle raw number value.
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
buf := raw.previousBuffer()
if Kind(buf[0]).normalize() == '0' {
fv, _ := jsonwire.ParseFloat(buf, 64)
return fv
}
} else if t.num != 0 {
// Handle exact number value.
switch t.str[0] {
case 'f':
return math.Float64frombits(t.num)
case 'i':
return float64(int64(t.num))
case 'u':
return float64(uint64(t.num))
}
}
// Handle string values with "NaN", "Infinity", or "-Infinity".
if t.Kind() == '"' {
switch t.String() {
case "NaN":
return math.NaN()
case "Infinity":
return math.Inf(+1)
case "-Infinity":
return math.Inf(-1)
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Int returns the signed integer value for a JSON number.
// The fractional component of any number is ignored (truncation toward zero).
// Any number beyond the representation of an int64 will be saturated
// to the closest representable value.
// It panics if the token kind is not a JSON number.
func (t Token) Int() int64 {
if raw := t.raw; raw != nil {
// Handle raw integer value.
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
neg := false
buf := raw.previousBuffer()
if len(buf) > 0 && buf[0] == '-' {
neg, buf = true, buf[1:]
}
if numAbs, ok := jsonwire.ParseUint(buf); ok {
if neg {
if numAbs > -minInt64 {
return minInt64
}
return -1 * int64(numAbs)
} else {
if numAbs > +maxInt64 {
return maxInt64
}
return +1 * int64(numAbs)
}
}
} else if t.num != 0 {
// Handle exact integer value.
switch t.str[0] {
case 'i':
return int64(t.num)
case 'u':
if t.num > maxInt64 {
return maxInt64
}
return int64(t.num)
}
}
// Handle JSON number that is a floating-point value.
if t.Kind() == '0' {
switch fv := t.Float(); {
case fv >= maxInt64:
return maxInt64
case fv <= minInt64:
return minInt64
default:
return int64(fv) // truncation toward zero
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Uint returns the unsigned integer value for a JSON number.
// The fractional component of any number is ignored (truncation toward zero).
// Any number beyond the representation of an uint64 will be saturated
// to the closest representable value.
// It panics if the token kind is not a JSON number.
func (t Token) Uint() uint64 {
// NOTE: This accessor returns 0 for any negative JSON number,
// which might be surprising, but is at least consistent with the behavior
// of saturating out-of-bounds numbers to the closest representable number.
if raw := t.raw; raw != nil {
// Handle raw integer value.
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
neg := false
buf := raw.previousBuffer()
if len(buf) > 0 && buf[0] == '-' {
neg, buf = true, buf[1:]
}
if num, ok := jsonwire.ParseUint(buf); ok {
if neg {
return minUint64
}
return num
}
} else if t.num != 0 {
// Handle exact integer value.
switch t.str[0] {
case 'u':
return t.num
case 'i':
if int64(t.num) < minUint64 {
return minUint64
}
return uint64(int64(t.num))
}
}
// Handle JSON number that is a floating-point value.
if t.Kind() == '0' {
switch fv := t.Float(); {
case fv >= maxUint64:
return maxUint64
case fv <= minUint64:
return minUint64
default:
return uint64(fv) // truncation toward zero
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Kind returns the token kind.
func (t Token) Kind() Kind {
switch {
case t.raw != nil:
raw := t.raw
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
return Kind(t.raw.buf[raw.prevStart]).normalize()
case t.num != 0:
return '0'
case len(t.str) != 0:
return '"'
default:
return invalidKind
}
}
// Kind represents each possible JSON token kind with a single byte,
// which is conveniently the first byte of that kind's grammar
// with the restriction that numbers always be represented with '0':
//
// - 'n': null
// - 'f': false
// - 't': true
// - '"': string
// - '0': number
// - '{': object begin
// - '}': object end
// - '[': array begin
// - ']': array end
//
// An invalid kind is usually represented using 0,
// but may be non-zero due to invalid JSON data.
type Kind byte
const invalidKind Kind = 0
// String prints the kind in a humanly readable fashion.
func (k Kind) String() string {
switch k {
case 'n':
return "null"
case 'f':
return "false"
case 't':
return "true"
case '"':
return "string"
case '0':
return "number"
case '{':
return "{"
case '}':
return "}"
case '[':
return "["
case ']':
return "]"
default:
return "<invalid jsontext.Kind: " + jsonwire.QuoteRune(string(k)) + ">"
}
}
// normalize coalesces all possible starting characters of a number as just '0'.
func (k Kind) normalize() Kind {
if k == '-' || ('0' <= k && k <= '9') {
return '0'
}
return k
}

View File

@@ -0,0 +1,168 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"math"
"reflect"
"testing"
)
func TestTokenStringAllocations(t *testing.T) {
if testing.CoverMode() != "" {
t.Skip("coverage mode breaks the compiler optimization this depends on")
}
tok := rawToken(`"hello"`)
var m map[string]bool
got := int(testing.AllocsPerRun(10, func() {
// This function uses tok.String() is a non-escaping manner
// (i.e., looking it up in a Go map). It should not allocate.
if m[tok.String()] {
panic("never executed")
}
}))
if got > 0 {
t.Errorf("Token.String allocated %d times, want 0", got)
}
}
func TestTokenAccessors(t *testing.T) {
type token struct {
Bool bool
String string
Float float64
Int int64
Uint uint64
Kind Kind
}
tests := []struct {
in Token
want token
}{
{Token{}, token{String: "<invalid jsontext.Token>"}},
{Null, token{String: "null", Kind: 'n'}},
{False, token{Bool: false, String: "false", Kind: 'f'}},
{True, token{Bool: true, String: "true", Kind: 't'}},
{Bool(false), token{Bool: false, String: "false", Kind: 'f'}},
{Bool(true), token{Bool: true, String: "true", Kind: 't'}},
{BeginObject, token{String: "{", Kind: '{'}},
{EndObject, token{String: "}", Kind: '}'}},
{BeginArray, token{String: "[", Kind: '['}},
{EndArray, token{String: "]", Kind: ']'}},
{String(""), token{String: "", Kind: '"'}},
{String("hello, world!"), token{String: "hello, world!", Kind: '"'}},
{rawToken(`"hello, world!"`), token{String: "hello, world!", Kind: '"'}},
{Float(0), token{String: "0", Float: 0, Int: 0, Uint: 0, Kind: '0'}},
{Float(math.Copysign(0, -1)), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}},
{Float(math.NaN()), token{String: "NaN", Float: math.NaN(), Int: 0, Uint: 0, Kind: '"'}},
{Float(math.Inf(+1)), token{String: "Infinity", Float: math.Inf(+1), Kind: '"'}},
{Float(math.Inf(-1)), token{String: "-Infinity", Float: math.Inf(-1), Kind: '"'}},
{Int(minInt64), token{String: "-9223372036854775808", Float: minInt64, Int: minInt64, Uint: minUint64, Kind: '0'}},
{Int(minInt64 + 1), token{String: "-9223372036854775807", Float: minInt64 + 1, Int: minInt64 + 1, Uint: minUint64, Kind: '0'}},
{Int(-1), token{String: "-1", Float: -1, Int: -1, Uint: minUint64, Kind: '0'}},
{Int(0), token{String: "0", Float: 0, Int: 0, Uint: 0, Kind: '0'}},
{Int(+1), token{String: "1", Float: +1, Int: +1, Uint: +1, Kind: '0'}},
{Int(maxInt64 - 1), token{String: "9223372036854775806", Float: maxInt64 - 1, Int: maxInt64 - 1, Uint: maxInt64 - 1, Kind: '0'}},
{Int(maxInt64), token{String: "9223372036854775807", Float: maxInt64, Int: maxInt64, Uint: maxInt64, Kind: '0'}},
{Uint(minUint64), token{String: "0", Kind: '0'}},
{Uint(minUint64 + 1), token{String: "1", Float: minUint64 + 1, Int: minUint64 + 1, Uint: minUint64 + 1, Kind: '0'}},
{Uint(maxUint64 - 1), token{String: "18446744073709551614", Float: maxUint64 - 1, Int: maxInt64, Uint: maxUint64 - 1, Kind: '0'}},
{Uint(maxUint64), token{String: "18446744073709551615", Float: maxUint64, Int: maxInt64, Uint: maxUint64, Kind: '0'}},
{rawToken(`-0`), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}},
{rawToken(`1e1000`), token{String: "1e1000", Float: math.MaxFloat64, Int: maxInt64, Uint: maxUint64, Kind: '0'}},
{rawToken(`-1e1000`), token{String: "-1e1000", Float: -math.MaxFloat64, Int: minInt64, Uint: minUint64, Kind: '0'}},
{rawToken(`0.1`), token{String: "0.1", Float: 0.1, Int: 0, Uint: 0, Kind: '0'}},
{rawToken(`0.5`), token{String: "0.5", Float: 0.5, Int: 0, Uint: 0, Kind: '0'}},
{rawToken(`0.9`), token{String: "0.9", Float: 0.9, Int: 0, Uint: 0, Kind: '0'}},
{rawToken(`1.1`), token{String: "1.1", Float: 1.1, Int: 1, Uint: 1, Kind: '0'}},
{rawToken(`-0.1`), token{String: "-0.1", Float: -0.1, Int: 0, Uint: 0, Kind: '0'}},
{rawToken(`-0.5`), token{String: "-0.5", Float: -0.5, Int: 0, Uint: 0, Kind: '0'}},
{rawToken(`-0.9`), token{String: "-0.9", Float: -0.9, Int: 0, Uint: 0, Kind: '0'}},
{rawToken(`-1.1`), token{String: "-1.1", Float: -1.1, Int: -1, Uint: 0, Kind: '0'}},
{rawToken(`99999999999999999999`), token{String: "99999999999999999999", Float: 1e20 - 1, Int: maxInt64, Uint: maxUint64, Kind: '0'}},
{rawToken(`-99999999999999999999`), token{String: "-99999999999999999999", Float: -1e20 - 1, Int: minInt64, Uint: minUint64, Kind: '0'}},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
got := token{
Bool: func() bool {
defer func() { recover() }()
return tt.in.Bool()
}(),
String: tt.in.String(),
Float: func() float64 {
defer func() { recover() }()
return tt.in.Float()
}(),
Int: func() int64 {
defer func() { recover() }()
return tt.in.Int()
}(),
Uint: func() uint64 {
defer func() { recover() }()
return tt.in.Uint()
}(),
Kind: tt.in.Kind(),
}
if got.Bool != tt.want.Bool {
t.Errorf("Token(%s).Bool() = %v, want %v", tt.in, got.Bool, tt.want.Bool)
}
if got.String != tt.want.String {
t.Errorf("Token(%s).String() = %v, want %v", tt.in, got.String, tt.want.String)
}
if math.Float64bits(got.Float) != math.Float64bits(tt.want.Float) {
t.Errorf("Token(%s).Float() = %v, want %v", tt.in, got.Float, tt.want.Float)
}
if got.Int != tt.want.Int {
t.Errorf("Token(%s).Int() = %v, want %v", tt.in, got.Int, tt.want.Int)
}
if got.Uint != tt.want.Uint {
t.Errorf("Token(%s).Uint() = %v, want %v", tt.in, got.Uint, tt.want.Uint)
}
if got.Kind != tt.want.Kind {
t.Errorf("Token(%s).Kind() = %v, want %v", tt.in, got.Kind, tt.want.Kind)
}
})
}
}
func TestTokenClone(t *testing.T) {
tests := []struct {
in Token
wantExactRaw bool
}{
{Token{}, true},
{Null, true},
{False, true},
{True, true},
{BeginObject, true},
{EndObject, true},
{BeginArray, true},
{EndArray, true},
{String("hello, world!"), true},
{rawToken(`"hello, world!"`), false},
{Float(3.14159), true},
{rawToken(`3.14159`), false},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
got := tt.in.Clone()
if !reflect.DeepEqual(got, tt.in) {
t.Errorf("Token(%s) == Token(%s).Clone() = false, want true", tt.in, tt.in)
}
gotExactRaw := got.raw == tt.in.raw
if gotExactRaw != tt.wantExactRaw {
t.Errorf("Token(%s).raw == Token(%s).Clone().raw = %v, want %v", tt.in, tt.in, gotExactRaw, tt.wantExactRaw)
}
})
}
}

View File

@@ -0,0 +1,395 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"errors"
"io"
"slices"
"sync"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonwire"
)
// NOTE: Value is analogous to v1 json.RawMessage.
// AppendFormat formats the JSON value in src and appends it to dst
// according to the specified options.
// See [Value.Format] for more details about the formatting behavior.
//
// The dst and src may overlap.
// If an error is reported, then the entirety of src is appended to dst.
func AppendFormat(dst, src []byte, opts ...Options) ([]byte, error) {
e := getBufferedEncoder(opts...)
defer putBufferedEncoder(e)
e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
if err := e.s.WriteValue(src); err != nil {
return append(dst, src...), err
}
return append(dst, e.s.Buf...), nil
}
// Value represents a single raw JSON value, which may be one of the following:
// - a JSON literal (i.e., null, true, or false)
// - a JSON string (e.g., "hello, world!")
// - a JSON number (e.g., 123.456)
// - an entire JSON object (e.g., {"fizz":"buzz"} )
// - an entire JSON array (e.g., [1,2,3] )
//
// Value can represent entire array or object values, while [Token] cannot.
// Value may contain leading and/or trailing whitespace.
type Value []byte
// Clone returns a copy of v.
func (v Value) Clone() Value {
return bytes.Clone(v)
}
// String returns the string formatting of v.
func (v Value) String() string {
if v == nil {
return "null"
}
return string(v)
}
// IsValid reports whether the raw JSON value is syntactically valid
// according to the specified options.
//
// By default (if no options are specified), it validates according to RFC 7493.
// It verifies whether the input is properly encoded as UTF-8,
// that escape sequences within strings decode to valid Unicode codepoints, and
// that all names in each object are unique.
// It does not verify whether numbers are representable within the limits
// of any common numeric type (e.g., float64, int64, or uint64).
//
// Relevant options include:
// - [AllowDuplicateNames]
// - [AllowInvalidUTF8]
//
// All other options are ignored.
func (v Value) IsValid(opts ...Options) bool {
// TODO: Document support for [WithByteLimit] and [WithDepthLimit].
d := getBufferedDecoder(v, opts...)
defer putBufferedDecoder(d)
_, errVal := d.ReadValue()
_, errEOF := d.ReadToken()
return errVal == nil && errEOF == io.EOF
}
// Format formats the raw JSON value in place.
//
// By default (if no options are specified), it validates according to RFC 7493
// and produces the minimal JSON representation, where
// all whitespace is elided and JSON strings use the shortest encoding.
//
// Relevant options include:
// - [AllowDuplicateNames]
// - [AllowInvalidUTF8]
// - [EscapeForHTML]
// - [EscapeForJS]
// - [PreserveRawStrings]
// - [CanonicalizeRawInts]
// - [CanonicalizeRawFloats]
// - [ReorderRawObjects]
// - [SpaceAfterColon]
// - [SpaceAfterComma]
// - [Multiline]
// - [WithIndent]
// - [WithIndentPrefix]
//
// All other options are ignored.
//
// It is guaranteed to succeed if the value is valid according to the same options.
// If the value is already formatted, then the buffer is not mutated.
func (v *Value) Format(opts ...Options) error {
// TODO: Document support for [WithByteLimit] and [WithDepthLimit].
return v.format(opts, nil)
}
// format accepts two []Options to avoid the allocation appending them together.
// It is equivalent to v.Format(append(opts1, opts2...)...).
func (v *Value) format(opts1, opts2 []Options) error {
e := getBufferedEncoder(opts1...)
defer putBufferedEncoder(e)
e.s.Join(opts2...)
e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
if err := e.s.WriteValue(*v); err != nil {
return err
}
if !bytes.Equal(*v, e.s.Buf) {
*v = append((*v)[:0], e.s.Buf...)
}
return nil
}
// Compact removes all whitespace from the raw JSON value.
//
// It does not reformat JSON strings or numbers to use any other representation.
// To maximize the set of JSON values that can be formatted,
// this permits values with duplicate names and invalid UTF-8.
//
// Compact is equivalent to calling [Value.Format] with the following options:
// - [AllowDuplicateNames](true)
// - [AllowInvalidUTF8](true)
// - [PreserveRawStrings](true)
//
// Any options specified by the caller are applied after the initial set
// and may deliberately override prior options.
func (v *Value) Compact(opts ...Options) error {
return v.format([]Options{
AllowDuplicateNames(true),
AllowInvalidUTF8(true),
PreserveRawStrings(true),
}, opts)
}
// Indent reformats the whitespace in the raw JSON value so that each element
// in a JSON object or array begins on a indented line according to the nesting.
//
// It does not reformat JSON strings or numbers to use any other representation.
// To maximize the set of JSON values that can be formatted,
// this permits values with duplicate names and invalid UTF-8.
//
// Indent is equivalent to calling [Value.Format] with the following options:
// - [AllowDuplicateNames](true)
// - [AllowInvalidUTF8](true)
// - [PreserveRawStrings](true)
// - [Multiline](true)
//
// Any options specified by the caller are applied after the initial set
// and may deliberately override prior options.
func (v *Value) Indent(opts ...Options) error {
return v.format([]Options{
AllowDuplicateNames(true),
AllowInvalidUTF8(true),
PreserveRawStrings(true),
Multiline(true),
}, opts)
}
// Canonicalize canonicalizes the raw JSON value according to the
// JSON Canonicalization Scheme (JCS) as defined by RFC 8785
// where it produces a stable representation of a JSON value.
//
// JSON strings are formatted to use their minimal representation,
// JSON numbers are formatted as double precision numbers according
// to some stable serialization algorithm.
// JSON object members are sorted in ascending order by name.
// All whitespace is removed.
//
// The output stability is dependent on the stability of the application data
// (see RFC 8785, Appendix E). It cannot produce stable output from
// fundamentally unstable input. For example, if the JSON value
// contains ephemeral data (e.g., a frequently changing timestamp),
// then the value is still unstable regardless of whether this is called.
//
// Canonicalize is equivalent to calling [Value.Format] with the following options:
// - [CanonicalizeRawInts](true)
// - [CanonicalizeRawFloats](true)
// - [ReorderRawObjects](true)
//
// Any options specified by the caller are applied after the initial set
// and may deliberately override prior options.
//
// Note that JCS treats all JSON numbers as IEEE 754 double precision numbers.
// Any numbers with precision beyond what is representable by that form
// will lose their precision when canonicalized. For example, integer values
// beyond ±2⁵³ will lose their precision. To preserve the original representation
// of JSON integers, additionally set [CanonicalizeRawInts] to false:
//
// v.Canonicalize(jsontext.CanonicalizeRawInts(false))
func (v *Value) Canonicalize(opts ...Options) error {
return v.format([]Options{
CanonicalizeRawInts(true),
CanonicalizeRawFloats(true),
ReorderRawObjects(true),
}, opts)
}
// MarshalJSON returns v as the JSON encoding of v.
// It returns the stored value as the raw JSON output without any validation.
// If v is nil, then this returns a JSON null.
func (v Value) MarshalJSON() ([]byte, error) {
// NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON.
if v == nil {
return []byte("null"), nil
}
return v, nil
}
// UnmarshalJSON sets v as the JSON encoding of b.
// It stores a copy of the provided raw JSON input without any validation.
func (v *Value) UnmarshalJSON(b []byte) error {
// NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON.
if v == nil {
return errors.New("jsontext.Value: UnmarshalJSON on nil pointer")
}
*v = append((*v)[:0], b...)
return nil
}
// Kind returns the starting token kind.
// For a valid value, this will never include '}' or ']'.
func (v Value) Kind() Kind {
if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 {
return Kind(v[0]).normalize()
}
return invalidKind
}
const commaAndWhitespace = ", \n\r\t"
type objectMember struct {
// name is the unquoted name.
name []byte // e.g., "name"
// buffer is the entirety of the raw JSON object member
// starting from right after the previous member (or opening '{')
// until right after the member value.
buffer []byte // e.g., `, \n\r\t"name": "value"`
}
func (x objectMember) Compare(y objectMember) int {
if c := jsonwire.CompareUTF16(x.name, y.name); c != 0 {
return c
}
// With [AllowDuplicateNames] or [AllowInvalidUTF8],
// names could be identical, so also sort using the member value.
return jsonwire.CompareUTF16(
bytes.TrimLeft(x.buffer, commaAndWhitespace),
bytes.TrimLeft(y.buffer, commaAndWhitespace))
}
var objectMemberPool = sync.Pool{New: func() any { return new([]objectMember) }}
func getObjectMembers() *[]objectMember {
ns := objectMemberPool.Get().(*[]objectMember)
*ns = (*ns)[:0]
return ns
}
func putObjectMembers(ns *[]objectMember) {
if cap(*ns) < 1<<10 {
clear(*ns) // avoid pinning name and buffer
objectMemberPool.Put(ns)
}
}
// mustReorderObjects reorders in-place all object members in a JSON value,
// which must be valid otherwise it panics.
func mustReorderObjects(b []byte) {
// Obtain a buffered encoder just to use its internal buffer as
// a scratch buffer for reordering object members.
e2 := getBufferedEncoder()
defer putBufferedEncoder(e2)
// Disable unnecessary checks to syntactically parse the JSON value.
d := getBufferedDecoder(b)
defer putBufferedDecoder(d)
d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1)
mustReorderObjectsFromDecoder(d, &e2.s.Buf) // per RFC 8785, section 3.2.3
}
// mustReorderObjectsFromDecoder recursively reorders all object members in place
// according to the ordering specified in RFC 8785, section 3.2.3.
//
// Pre-conditions:
// - The value is valid (i.e., no decoder errors should ever occur).
// - Initial call is provided a Decoder reading from the start of v.
//
// Post-conditions:
// - Exactly one JSON value is read from the Decoder.
// - All fully-parsed JSON objects are reordered by directly moving
// the members in the value buffer.
//
// The runtime is approximately O(n·log(n)) + O(m·log(m)),
// where n is len(v) and m is the total number of object members.
func mustReorderObjectsFromDecoder(d *Decoder, scratch *[]byte) {
switch tok, err := d.ReadToken(); tok.Kind() {
case '{':
// Iterate and collect the name and offsets for every object member.
members := getObjectMembers()
defer putObjectMembers(members)
var prevMember objectMember
isSorted := true
beforeBody := d.InputOffset() // offset after '{'
for d.PeekKind() != '}' {
beforeName := d.InputOffset()
var flags jsonwire.ValueFlags
name, _ := d.s.ReadValue(&flags)
name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim())
mustReorderObjectsFromDecoder(d, scratch)
afterValue := d.InputOffset()
currMember := objectMember{name, d.s.buf[beforeName:afterValue]}
if isSorted && len(*members) > 0 {
isSorted = objectMember.Compare(prevMember, currMember) < 0
}
*members = append(*members, currMember)
prevMember = currMember
}
afterBody := d.InputOffset() // offset before '}'
d.ReadToken()
// Sort the members; return early if it's already sorted.
if isSorted {
return
}
firstBufferBeforeSorting := (*members)[0].buffer
slices.SortFunc(*members, objectMember.Compare)
firstBufferAfterSorting := (*members)[0].buffer
// Append the reordered members to a new buffer,
// then copy the reordered members back over the original members.
// Avoid swapping in place since each member may be a different size
// where moving a member over a smaller member may corrupt the data
// for subsequent members before they have been moved.
//
// The following invariant must hold:
// sum([m.after-m.before for m in members]) == afterBody-beforeBody
commaAndWhitespacePrefix := func(b []byte) []byte {
return b[:len(b)-len(bytes.TrimLeft(b, commaAndWhitespace))]
}
sorted := (*scratch)[:0]
for i, member := range *members {
switch {
case i == 0 && &member.buffer[0] != &firstBufferBeforeSorting[0]:
// First member after sorting is not the first member before sorting,
// so use the prefix of the first member before sorting.
sorted = append(sorted, commaAndWhitespacePrefix(firstBufferBeforeSorting)...)
sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
case i != 0 && &member.buffer[0] == &firstBufferBeforeSorting[0]:
// Later member after sorting is the first member before sorting,
// so use the prefix of the first member after sorting.
sorted = append(sorted, commaAndWhitespacePrefix(firstBufferAfterSorting)...)
sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
default:
sorted = append(sorted, member.buffer...)
}
}
if int(afterBody-beforeBody) != len(sorted) {
panic("BUG: length invariant violated")
}
copy(d.s.buf[beforeBody:afterBody], sorted)
// Update scratch buffer to the largest amount ever used.
if len(sorted) > len(*scratch) {
*scratch = sorted
}
case '[':
for d.PeekKind() != ']' {
mustReorderObjectsFromDecoder(d, scratch)
}
d.ReadToken()
default:
if err != nil {
panic("BUG: " + err.Error())
}
}
}

View File

@@ -0,0 +1,200 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"io"
"strings"
"testing"
"encoding/json/internal/jsontest"
"encoding/json/internal/jsonwire"
)
type valueTestdataEntry struct {
name jsontest.CaseName
in string
wantValid bool
wantCompacted string
wantCompactErr error // implies wantCompacted is in
wantIndented string // wantCompacted if empty; uses "\t" for indent prefix and " " for indent
wantIndentErr error // implies wantCompacted is in
wantCanonicalized string // wantCompacted if empty
wantCanonicalizeErr error // implies wantCompacted is in
}
var valueTestdata = append(func() (out []valueTestdataEntry) {
// Initialize valueTestdata from coderTestdata.
for _, td := range coderTestdata {
// NOTE: The Compact method preserves the raw formatting of strings,
// while the Encoder (by default) does not.
if td.name.Name == "ComplicatedString" {
td.outCompacted = strings.TrimSpace(td.in)
}
out = append(out, valueTestdataEntry{
name: td.name,
in: td.in,
wantValid: true,
wantCompacted: td.outCompacted,
wantIndented: td.outIndented,
wantCanonicalized: td.outCanonicalized,
})
}
return out
}(), []valueTestdataEntry{{
name: jsontest.Name("RFC8785/Primitives"),
in: `{
"numbers": [333333333.33333329, 1E30, 4.50,
2e-3, 0.000000000000000000000000001, -0],
"string": "\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/",
"literals": [null, true, false]
}`,
wantValid: true,
wantCompacted: `{"numbers":[333333333.33333329,1E30,4.50,2e-3,0.000000000000000000000000001,-0],"string":"\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/","literals":[null,true,false]}`,
wantIndented: `{
"numbers": [
333333333.33333329,
1E30,
4.50,
2e-3,
0.000000000000000000000000001,
-0
],
"string": "\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/",
"literals": [
null,
true,
false
]
}`,
wantCanonicalized: `{"literals":[null,true,false],"numbers":[333333333.3333333,1e+30,4.5,0.002,1e-27,0],"string":"€$\u000f\nA'B\"\\\\\"/"}`,
}, {
name: jsontest.Name("RFC8785/ObjectOrdering"),
in: `{
"\u20ac": "Euro Sign",
"\r": "Carriage Return",
"\ufb33": "Hebrew Letter Dalet With Dagesh",
"1": "One",
"\ud83d\ude00": "Emoji: Grinning Face",
"\u0080": "Control",
"\u00f6": "Latin Small Letter O With Diaeresis"
}`,
wantValid: true,
wantCompacted: `{"\u20ac":"Euro Sign","\r":"Carriage Return","\ufb33":"Hebrew Letter Dalet With Dagesh","1":"One","\ud83d\ude00":"Emoji: Grinning Face","\u0080":"Control","\u00f6":"Latin Small Letter O With Diaeresis"}`,
wantIndented: `{
"\u20ac": "Euro Sign",
"\r": "Carriage Return",
"\ufb33": "Hebrew Letter Dalet With Dagesh",
"1": "One",
"\ud83d\ude00": "Emoji: Grinning Face",
"\u0080": "Control",
"\u00f6": "Latin Small Letter O With Diaeresis"
}`,
wantCanonicalized: `{"\r":"Carriage Return","1":"One","€":"Control","ö":"Latin Small Letter O With Diaeresis","€":"Euro Sign","😀":"Emoji: Grinning Face","דּ":"Hebrew Letter Dalet With Dagesh"}`,
}, {
name: jsontest.Name("LargeIntegers"),
in: ` [ -9223372036854775808 , 9223372036854775807 ] `,
wantValid: true,
wantCompacted: `[-9223372036854775808,9223372036854775807]`,
wantIndented: `[
-9223372036854775808,
9223372036854775807
]`,
wantCanonicalized: `[-9223372036854776000,9223372036854776000]`, // NOTE: Loss of precision due to numbers being treated as floats.
}, {
name: jsontest.Name("InvalidUTF8"),
in: ` "living` + "\xde\xad\xbe\xef" + `\ufffd<66>" `,
wantValid: false, // uses RFC 7493 as the definition; which validates UTF-8
wantCompacted: `"living` + "\xde\xad\xbe\xef" + `\ufffd<66>"`,
wantCanonicalizeErr: E(jsonwire.ErrInvalidUTF8).withPos(` "living`+"\xde\xad", ""),
}, {
name: jsontest.Name("InvalidUTF8/SurrogateHalf"),
in: `"\ud800"`,
wantValid: false, // uses RFC 7493 as the definition; which validates UTF-8
wantCompacted: `"\ud800"`,
wantCanonicalizeErr: newInvalidEscapeSequenceError(`\ud800"`).withPos(`"`, ""),
}, {
name: jsontest.Name("UppercaseEscaped"),
in: `"\u000B"`,
wantValid: true,
wantCompacted: `"\u000B"`,
wantCanonicalized: `"\u000b"`,
}, {
name: jsontest.Name("DuplicateNames"),
in: ` { "0" : 0 , "1" : 1 , "0" : 0 }`,
wantValid: false, // uses RFC 7493 as the definition; which does check for object uniqueness
wantCompacted: `{"0":0,"1":1,"0":0}`,
wantIndented: `{
"0": 0,
"1": 1,
"0": 0
}`,
wantCanonicalizeErr: E(ErrDuplicateName).withPos(` { "0" : 0 , "1" : 1 , `, "/0"),
}, {
name: jsontest.Name("Whitespace"),
in: " \n\r\t",
wantValid: false,
wantCompacted: " \n\r\t",
wantCompactErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""),
wantIndentErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""),
wantCanonicalizeErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""),
}}...)
func TestValueMethods(t *testing.T) {
for _, td := range valueTestdata {
t.Run(td.name.Name, func(t *testing.T) {
if td.wantIndented == "" {
td.wantIndented = td.wantCompacted
}
if td.wantCanonicalized == "" {
td.wantCanonicalized = td.wantCompacted
}
if td.wantCompactErr != nil {
td.wantCompacted = td.in
}
if td.wantIndentErr != nil {
td.wantIndented = td.in
}
if td.wantCanonicalizeErr != nil {
td.wantCanonicalized = td.in
}
v := Value(td.in)
gotValid := v.IsValid()
if gotValid != td.wantValid {
t.Errorf("%s: Value.IsValid = %v, want %v", td.name.Where, gotValid, td.wantValid)
}
gotCompacted := Value(td.in)
gotCompactErr := gotCompacted.Compact()
if string(gotCompacted) != td.wantCompacted {
t.Errorf("%s: Value.Compact = %s, want %s", td.name.Where, gotCompacted, td.wantCompacted)
}
if !equalError(gotCompactErr, td.wantCompactErr) {
t.Errorf("%s: Value.Compact error mismatch:\ngot %v\nwant %v", td.name.Where, gotCompactErr, td.wantCompactErr)
}
gotIndented := Value(td.in)
gotIndentErr := gotIndented.Indent(WithIndentPrefix("\t"), WithIndent(" "))
if string(gotIndented) != td.wantIndented {
t.Errorf("%s: Value.Indent = %s, want %s", td.name.Where, gotIndented, td.wantIndented)
}
if !equalError(gotIndentErr, td.wantIndentErr) {
t.Errorf("%s: Value.Indent error mismatch:\ngot %v\nwant %v", td.name.Where, gotIndentErr, td.wantIndentErr)
}
gotCanonicalized := Value(td.in)
gotCanonicalizeErr := gotCanonicalized.Canonicalize()
if string(gotCanonicalized) != td.wantCanonicalized {
t.Errorf("%s: Value.Canonicalize = %s, want %s", td.name.Where, gotCanonicalized, td.wantCanonicalized)
}
if !equalError(gotCanonicalizeErr, td.wantCanonicalizeErr) {
t.Errorf("%s: Value.Canonicalize error mismatch:\ngot %v\nwant %v", td.name.Where, gotCanonicalizeErr, td.wantCanonicalizeErr)
}
})
}
}