add forked version of encoding/json with html escape disabled
- modified all local packages to use this fork
This commit is contained in:
856
pkg/encoders/json/jsontext/coder_test.go
Normal file
856
pkg/encoders/json/jsontext/coder_test.go
Normal file
@@ -0,0 +1,856 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"io"
|
||||
"math"
|
||||
"math/rand"
|
||||
"path"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"encoding/json/internal/jsontest"
|
||||
"encoding/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
func E(err error) *SyntacticError {
|
||||
return &SyntacticError{Err: err}
|
||||
}
|
||||
|
||||
func newInvalidCharacterError(prefix, where string) *SyntacticError {
|
||||
return E(jsonwire.NewInvalidCharacterError(prefix, where))
|
||||
}
|
||||
|
||||
func newInvalidEscapeSequenceError(what string) *SyntacticError {
|
||||
return E(jsonwire.NewInvalidEscapeSequenceError(what))
|
||||
}
|
||||
|
||||
func (e *SyntacticError) withPos(prefix string, pointer Pointer) *SyntacticError {
|
||||
e.ByteOffset = int64(len(prefix))
|
||||
e.JSONPointer = pointer
|
||||
return e
|
||||
}
|
||||
|
||||
func equalError(x, y error) bool {
|
||||
return reflect.DeepEqual(x, y)
|
||||
}
|
||||
|
||||
var (
|
||||
zeroToken Token
|
||||
zeroValue Value
|
||||
)
|
||||
|
||||
// tokOrVal is either a Token or a Value.
|
||||
type tokOrVal interface{ Kind() Kind }
|
||||
|
||||
type coderTestdataEntry struct {
|
||||
name jsontest.CaseName
|
||||
in string
|
||||
outCompacted string
|
||||
outEscaped string // outCompacted if empty; escapes all runes in a string
|
||||
outIndented string // outCompacted if empty; uses " " for indent prefix and "\t" for indent
|
||||
outCanonicalized string // outCompacted if empty
|
||||
tokens []Token
|
||||
pointers []Pointer
|
||||
}
|
||||
|
||||
var coderTestdata = []coderTestdataEntry{{
|
||||
name: jsontest.Name("Null"),
|
||||
in: ` null `,
|
||||
outCompacted: `null`,
|
||||
tokens: []Token{Null},
|
||||
pointers: []Pointer{""},
|
||||
}, {
|
||||
name: jsontest.Name("False"),
|
||||
in: ` false `,
|
||||
outCompacted: `false`,
|
||||
tokens: []Token{False},
|
||||
}, {
|
||||
name: jsontest.Name("True"),
|
||||
in: ` true `,
|
||||
outCompacted: `true`,
|
||||
tokens: []Token{True},
|
||||
}, {
|
||||
name: jsontest.Name("EmptyString"),
|
||||
in: ` "" `,
|
||||
outCompacted: `""`,
|
||||
tokens: []Token{String("")},
|
||||
}, {
|
||||
name: jsontest.Name("SimpleString"),
|
||||
in: ` "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" `,
|
||||
outCompacted: `"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"`,
|
||||
outEscaped: `"\u0061\u0062\u0063\u0064\u0065\u0066\u0067\u0068\u0069\u006a\u006b\u006c\u006d\u006e\u006f\u0070\u0071\u0072\u0073\u0074\u0075\u0076\u0077\u0078\u0079\u007a\u0041\u0042\u0043\u0044\u0045\u0046\u0047\u0048\u0049\u004a\u004b\u004c\u004d\u004e\u004f\u0050\u0051\u0052\u0053\u0054\u0055\u0056\u0057\u0058\u0059\u005a"`,
|
||||
tokens: []Token{String("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")},
|
||||
}, {
|
||||
name: jsontest.Name("ComplicatedString"),
|
||||
in: " \"Hello, 世界 🌟★☆✩🌠 " + "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602" + ` \ud800\udead \"\\\/\b\f\n\r\t \u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009" `,
|
||||
outCompacted: "\"Hello, 世界 🌟★☆✩🌠 " + "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602" + " 𐊭 \\\"\\\\/\\b\\f\\n\\r\\t \\\"\\\\/\\b\\f\\n\\r\\t\"",
|
||||
outEscaped: `"\u0048\u0065\u006c\u006c\u006f\u002c\u0020\u4e16\u754c\u0020\ud83c\udf1f\u2605\u2606\u2729\ud83c\udf20\u0020\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\ud83d\ude02\u0020\ud800\udead\u0020\u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009\u0020\u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009"`,
|
||||
outCanonicalized: `"Hello, 世界 🌟★☆✩🌠 ö€힙דּ<EE8080>😂 𐊭 \"\\/\b\f\n\r\t \"\\/\b\f\n\r\t"`,
|
||||
tokens: []Token{rawToken("\"Hello, 世界 🌟★☆✩🌠 " + "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602" + " 𐊭 \\\"\\\\/\\b\\f\\n\\r\\t \\\"\\\\/\\b\\f\\n\\r\\t\"")},
|
||||
}, {
|
||||
name: jsontest.Name("ZeroNumber"),
|
||||
in: ` 0 `,
|
||||
outCompacted: `0`,
|
||||
tokens: []Token{Uint(0)},
|
||||
}, {
|
||||
name: jsontest.Name("SimpleNumber"),
|
||||
in: ` 123456789 `,
|
||||
outCompacted: `123456789`,
|
||||
tokens: []Token{Uint(123456789)},
|
||||
}, {
|
||||
name: jsontest.Name("NegativeNumber"),
|
||||
in: ` -123456789 `,
|
||||
outCompacted: `-123456789`,
|
||||
tokens: []Token{Int(-123456789)},
|
||||
}, {
|
||||
name: jsontest.Name("FractionalNumber"),
|
||||
in: " 0.123456789 ",
|
||||
outCompacted: `0.123456789`,
|
||||
tokens: []Token{Float(0.123456789)},
|
||||
}, {
|
||||
name: jsontest.Name("ExponentNumber"),
|
||||
in: " 0e12456789 ",
|
||||
outCompacted: `0e12456789`,
|
||||
outCanonicalized: `0`,
|
||||
tokens: []Token{rawToken(`0e12456789`)},
|
||||
}, {
|
||||
name: jsontest.Name("ExponentNumberP"),
|
||||
in: " 0e+12456789 ",
|
||||
outCompacted: `0e+12456789`,
|
||||
outCanonicalized: `0`,
|
||||
tokens: []Token{rawToken(`0e+12456789`)},
|
||||
}, {
|
||||
name: jsontest.Name("ExponentNumberN"),
|
||||
in: " 0e-12456789 ",
|
||||
outCompacted: `0e-12456789`,
|
||||
outCanonicalized: `0`,
|
||||
tokens: []Token{rawToken(`0e-12456789`)},
|
||||
}, {
|
||||
name: jsontest.Name("ComplicatedNumber"),
|
||||
in: ` -123456789.987654321E+0123456789 `,
|
||||
outCompacted: `-123456789.987654321E+0123456789`,
|
||||
outCanonicalized: `-1.7976931348623157e+308`,
|
||||
tokens: []Token{rawToken(`-123456789.987654321E+0123456789`)},
|
||||
}, {
|
||||
name: jsontest.Name("Numbers"),
|
||||
in: ` [
|
||||
0, -0, 0.0, -0.0, 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001, 1e1000,
|
||||
-5e-324, 1e+100, 1.7976931348623157e+308,
|
||||
9007199254740990, 9007199254740991, 9007199254740992, 9007199254740993, 9007199254740994,
|
||||
-9223372036854775808, 9223372036854775807, 0, 18446744073709551615
|
||||
] `,
|
||||
outCompacted: "[0,-0,0.0,-0.0,1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,1e1000,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740993,9007199254740994,-9223372036854775808,9223372036854775807,0,18446744073709551615]",
|
||||
outIndented: `[
|
||||
0,
|
||||
-0,
|
||||
0.0,
|
||||
-0.0,
|
||||
1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,
|
||||
1e1000,
|
||||
-5e-324,
|
||||
1e+100,
|
||||
1.7976931348623157e+308,
|
||||
9007199254740990,
|
||||
9007199254740991,
|
||||
9007199254740992,
|
||||
9007199254740993,
|
||||
9007199254740994,
|
||||
-9223372036854775808,
|
||||
9223372036854775807,
|
||||
0,
|
||||
18446744073709551615
|
||||
]`,
|
||||
outCanonicalized: `[0,0,0,0,1,1.7976931348623157e+308,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740992,9007199254740994,-9223372036854776000,9223372036854776000,0,18446744073709552000]`,
|
||||
tokens: []Token{
|
||||
BeginArray,
|
||||
Float(0), Float(math.Copysign(0, -1)), rawToken(`0.0`), rawToken(`-0.0`), rawToken(`1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001`), rawToken(`1e1000`),
|
||||
Float(-5e-324), Float(1e100), Float(1.7976931348623157e+308),
|
||||
Float(9007199254740990), Float(9007199254740991), Float(9007199254740992), rawToken(`9007199254740993`), rawToken(`9007199254740994`),
|
||||
Int(minInt64), Int(maxInt64), Uint(minUint64), Uint(maxUint64),
|
||||
EndArray,
|
||||
},
|
||||
pointers: []Pointer{
|
||||
"", "/0", "/1", "/2", "/3", "/4", "/5", "/6", "/7", "/8", "/9", "/10", "/11", "/12", "/13", "/14", "/15", "/16", "/17", "",
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("ObjectN0"),
|
||||
in: ` { } `,
|
||||
outCompacted: `{}`,
|
||||
tokens: []Token{BeginObject, EndObject},
|
||||
pointers: []Pointer{"", ""},
|
||||
}, {
|
||||
name: jsontest.Name("ObjectN1"),
|
||||
in: ` { "0" : 0 } `,
|
||||
outCompacted: `{"0":0}`,
|
||||
outEscaped: `{"\u0030":0}`,
|
||||
outIndented: `{
|
||||
"0": 0
|
||||
}`,
|
||||
tokens: []Token{BeginObject, String("0"), Uint(0), EndObject},
|
||||
pointers: []Pointer{"", "/0", "/0", ""},
|
||||
}, {
|
||||
name: jsontest.Name("ObjectN2"),
|
||||
in: ` { "0" : 0 , "1" : 1 } `,
|
||||
outCompacted: `{"0":0,"1":1}`,
|
||||
outEscaped: `{"\u0030":0,"\u0031":1}`,
|
||||
outIndented: `{
|
||||
"0": 0,
|
||||
"1": 1
|
||||
}`,
|
||||
tokens: []Token{BeginObject, String("0"), Uint(0), String("1"), Uint(1), EndObject},
|
||||
pointers: []Pointer{"", "/0", "/0", "/1", "/1", ""},
|
||||
}, {
|
||||
name: jsontest.Name("ObjectNested"),
|
||||
in: ` { "0" : { "1" : { "2" : { "3" : { "4" : { } } } } } } `,
|
||||
outCompacted: `{"0":{"1":{"2":{"3":{"4":{}}}}}}`,
|
||||
outEscaped: `{"\u0030":{"\u0031":{"\u0032":{"\u0033":{"\u0034":{}}}}}}`,
|
||||
outIndented: `{
|
||||
"0": {
|
||||
"1": {
|
||||
"2": {
|
||||
"3": {
|
||||
"4": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}`,
|
||||
tokens: []Token{BeginObject, String("0"), BeginObject, String("1"), BeginObject, String("2"), BeginObject, String("3"), BeginObject, String("4"), BeginObject, EndObject, EndObject, EndObject, EndObject, EndObject, EndObject},
|
||||
pointers: []Pointer{
|
||||
"",
|
||||
"/0", "/0",
|
||||
"/0/1", "/0/1",
|
||||
"/0/1/2", "/0/1/2",
|
||||
"/0/1/2/3", "/0/1/2/3",
|
||||
"/0/1/2/3/4", "/0/1/2/3/4",
|
||||
"/0/1/2/3/4",
|
||||
"/0/1/2/3",
|
||||
"/0/1/2",
|
||||
"/0/1",
|
||||
"/0",
|
||||
"",
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("ObjectSuperNested"),
|
||||
in: `{"": {
|
||||
"44444": {
|
||||
"6666666": "ccccccc",
|
||||
"77777777": "bb",
|
||||
"555555": "aaaa"
|
||||
},
|
||||
"0": {
|
||||
"3333": "bbb",
|
||||
"11": "",
|
||||
"222": "aaaaa"
|
||||
}
|
||||
}}`,
|
||||
outCompacted: `{"":{"44444":{"6666666":"ccccccc","77777777":"bb","555555":"aaaa"},"0":{"3333":"bbb","11":"","222":"aaaaa"}}}`,
|
||||
outEscaped: `{"":{"\u0034\u0034\u0034\u0034\u0034":{"\u0036\u0036\u0036\u0036\u0036\u0036\u0036":"\u0063\u0063\u0063\u0063\u0063\u0063\u0063","\u0037\u0037\u0037\u0037\u0037\u0037\u0037\u0037":"\u0062\u0062","\u0035\u0035\u0035\u0035\u0035\u0035":"\u0061\u0061\u0061\u0061"},"\u0030":{"\u0033\u0033\u0033\u0033":"\u0062\u0062\u0062","\u0031\u0031":"","\u0032\u0032\u0032":"\u0061\u0061\u0061\u0061\u0061"}}}`,
|
||||
outIndented: `{
|
||||
"": {
|
||||
"44444": {
|
||||
"6666666": "ccccccc",
|
||||
"77777777": "bb",
|
||||
"555555": "aaaa"
|
||||
},
|
||||
"0": {
|
||||
"3333": "bbb",
|
||||
"11": "",
|
||||
"222": "aaaaa"
|
||||
}
|
||||
}
|
||||
}`,
|
||||
outCanonicalized: `{"":{"0":{"11":"","222":"aaaaa","3333":"bbb"},"44444":{"555555":"aaaa","6666666":"ccccccc","77777777":"bb"}}}`,
|
||||
tokens: []Token{
|
||||
BeginObject,
|
||||
String(""),
|
||||
BeginObject,
|
||||
String("44444"),
|
||||
BeginObject,
|
||||
String("6666666"), String("ccccccc"),
|
||||
String("77777777"), String("bb"),
|
||||
String("555555"), String("aaaa"),
|
||||
EndObject,
|
||||
String("0"),
|
||||
BeginObject,
|
||||
String("3333"), String("bbb"),
|
||||
String("11"), String(""),
|
||||
String("222"), String("aaaaa"),
|
||||
EndObject,
|
||||
EndObject,
|
||||
EndObject,
|
||||
},
|
||||
pointers: []Pointer{
|
||||
"",
|
||||
"/", "/",
|
||||
"//44444", "//44444",
|
||||
"//44444/6666666", "//44444/6666666",
|
||||
"//44444/77777777", "//44444/77777777",
|
||||
"//44444/555555", "//44444/555555",
|
||||
"//44444",
|
||||
"//0", "//0",
|
||||
"//0/3333", "//0/3333",
|
||||
"//0/11", "//0/11",
|
||||
"//0/222", "//0/222",
|
||||
"//0",
|
||||
"/",
|
||||
"",
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("ArrayN0"),
|
||||
in: ` [ ] `,
|
||||
outCompacted: `[]`,
|
||||
tokens: []Token{BeginArray, EndArray},
|
||||
pointers: []Pointer{"", ""},
|
||||
}, {
|
||||
name: jsontest.Name("ArrayN1"),
|
||||
in: ` [ 0 ] `,
|
||||
outCompacted: `[0]`,
|
||||
outIndented: `[
|
||||
0
|
||||
]`,
|
||||
tokens: []Token{BeginArray, Uint(0), EndArray},
|
||||
pointers: []Pointer{"", "/0", ""},
|
||||
}, {
|
||||
name: jsontest.Name("ArrayN2"),
|
||||
in: ` [ 0 , 1 ] `,
|
||||
outCompacted: `[0,1]`,
|
||||
outIndented: `[
|
||||
0,
|
||||
1
|
||||
]`,
|
||||
tokens: []Token{BeginArray, Uint(0), Uint(1), EndArray},
|
||||
}, {
|
||||
name: jsontest.Name("ArrayNested"),
|
||||
in: ` [ [ [ [ [ ] ] ] ] ] `,
|
||||
outCompacted: `[[[[[]]]]]`,
|
||||
outIndented: `[
|
||||
[
|
||||
[
|
||||
[
|
||||
[]
|
||||
]
|
||||
]
|
||||
]
|
||||
]`,
|
||||
tokens: []Token{BeginArray, BeginArray, BeginArray, BeginArray, BeginArray, EndArray, EndArray, EndArray, EndArray, EndArray},
|
||||
pointers: []Pointer{
|
||||
"",
|
||||
"/0",
|
||||
"/0/0",
|
||||
"/0/0/0",
|
||||
"/0/0/0/0",
|
||||
"/0/0/0/0",
|
||||
"/0/0/0",
|
||||
"/0/0",
|
||||
"/0",
|
||||
"",
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("Everything"),
|
||||
in: ` {
|
||||
"literals" : [ null , false , true ],
|
||||
"string" : "Hello, 世界" ,
|
||||
"number" : 3.14159 ,
|
||||
"arrayN0" : [ ] ,
|
||||
"arrayN1" : [ 0 ] ,
|
||||
"arrayN2" : [ 0 , 1 ] ,
|
||||
"objectN0" : { } ,
|
||||
"objectN1" : { "0" : 0 } ,
|
||||
"objectN2" : { "0" : 0 , "1" : 1 }
|
||||
} `,
|
||||
outCompacted: `{"literals":[null,false,true],"string":"Hello, 世界","number":3.14159,"arrayN0":[],"arrayN1":[0],"arrayN2":[0,1],"objectN0":{},"objectN1":{"0":0},"objectN2":{"0":0,"1":1}}`,
|
||||
outEscaped: `{"\u006c\u0069\u0074\u0065\u0072\u0061\u006c\u0073":[null,false,true],"\u0073\u0074\u0072\u0069\u006e\u0067":"\u0048\u0065\u006c\u006c\u006f\u002c\u0020\u4e16\u754c","\u006e\u0075\u006d\u0062\u0065\u0072":3.14159,"\u0061\u0072\u0072\u0061\u0079\u004e\u0030":[],"\u0061\u0072\u0072\u0061\u0079\u004e\u0031":[0],"\u0061\u0072\u0072\u0061\u0079\u004e\u0032":[0,1],"\u006f\u0062\u006a\u0065\u0063\u0074\u004e\u0030":{},"\u006f\u0062\u006a\u0065\u0063\u0074\u004e\u0031":{"\u0030":0},"\u006f\u0062\u006a\u0065\u0063\u0074\u004e\u0032":{"\u0030":0,"\u0031":1}}`,
|
||||
outIndented: `{
|
||||
"literals": [
|
||||
null,
|
||||
false,
|
||||
true
|
||||
],
|
||||
"string": "Hello, 世界",
|
||||
"number": 3.14159,
|
||||
"arrayN0": [],
|
||||
"arrayN1": [
|
||||
0
|
||||
],
|
||||
"arrayN2": [
|
||||
0,
|
||||
1
|
||||
],
|
||||
"objectN0": {},
|
||||
"objectN1": {
|
||||
"0": 0
|
||||
},
|
||||
"objectN2": {
|
||||
"0": 0,
|
||||
"1": 1
|
||||
}
|
||||
}`,
|
||||
outCanonicalized: `{"arrayN0":[],"arrayN1":[0],"arrayN2":[0,1],"literals":[null,false,true],"number":3.14159,"objectN0":{},"objectN1":{"0":0},"objectN2":{"0":0,"1":1},"string":"Hello, 世界"}`,
|
||||
tokens: []Token{
|
||||
BeginObject,
|
||||
String("literals"), BeginArray, Null, False, True, EndArray,
|
||||
String("string"), String("Hello, 世界"),
|
||||
String("number"), Float(3.14159),
|
||||
String("arrayN0"), BeginArray, EndArray,
|
||||
String("arrayN1"), BeginArray, Uint(0), EndArray,
|
||||
String("arrayN2"), BeginArray, Uint(0), Uint(1), EndArray,
|
||||
String("objectN0"), BeginObject, EndObject,
|
||||
String("objectN1"), BeginObject, String("0"), Uint(0), EndObject,
|
||||
String("objectN2"), BeginObject, String("0"), Uint(0), String("1"), Uint(1), EndObject,
|
||||
EndObject,
|
||||
},
|
||||
pointers: []Pointer{
|
||||
"",
|
||||
"/literals", "/literals",
|
||||
"/literals/0",
|
||||
"/literals/1",
|
||||
"/literals/2",
|
||||
"/literals",
|
||||
"/string", "/string",
|
||||
"/number", "/number",
|
||||
"/arrayN0", "/arrayN0", "/arrayN0",
|
||||
"/arrayN1", "/arrayN1",
|
||||
"/arrayN1/0",
|
||||
"/arrayN1",
|
||||
"/arrayN2", "/arrayN2",
|
||||
"/arrayN2/0",
|
||||
"/arrayN2/1",
|
||||
"/arrayN2",
|
||||
"/objectN0", "/objectN0", "/objectN0",
|
||||
"/objectN1", "/objectN1",
|
||||
"/objectN1/0", "/objectN1/0",
|
||||
"/objectN1",
|
||||
"/objectN2", "/objectN2",
|
||||
"/objectN2/0", "/objectN2/0",
|
||||
"/objectN2/1", "/objectN2/1",
|
||||
"/objectN2",
|
||||
"",
|
||||
},
|
||||
}}
|
||||
|
||||
// TestCoderInterleaved tests that we can interleave calls that operate on
|
||||
// tokens and raw values. The only error condition is trying to operate on a
|
||||
// raw value when the next token is an end of object or array.
|
||||
func TestCoderInterleaved(t *testing.T) {
|
||||
for _, td := range coderTestdata {
|
||||
// In TokenFirst and ValueFirst, alternate between tokens and values.
|
||||
// In TokenDelims, only use tokens for object and array delimiters.
|
||||
for _, modeName := range []string{"TokenFirst", "ValueFirst", "TokenDelims"} {
|
||||
t.Run(path.Join(td.name.Name, modeName), func(t *testing.T) {
|
||||
testCoderInterleaved(t, td.name.Where, modeName, td)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
func testCoderInterleaved(t *testing.T, where jsontest.CasePos, modeName string, td coderTestdataEntry) {
|
||||
src := strings.NewReader(td.in)
|
||||
dst := new(bytes.Buffer)
|
||||
dec := NewDecoder(src)
|
||||
enc := NewEncoder(dst)
|
||||
tickTock := modeName == "TokenFirst"
|
||||
for {
|
||||
if modeName == "TokenDelims" {
|
||||
switch dec.PeekKind() {
|
||||
case '{', '}', '[', ']':
|
||||
tickTock = true // as token
|
||||
default:
|
||||
tickTock = false // as value
|
||||
}
|
||||
}
|
||||
if tickTock {
|
||||
tok, err := dec.ReadToken()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
t.Fatalf("%s: Decoder.ReadToken error: %v", where, err)
|
||||
}
|
||||
if err := enc.WriteToken(tok); err != nil {
|
||||
t.Fatalf("%s: Encoder.WriteToken error: %v", where, err)
|
||||
}
|
||||
} else {
|
||||
val, err := dec.ReadValue()
|
||||
if err != nil {
|
||||
// It is a syntactic error to call ReadValue
|
||||
// at the end of an object or array.
|
||||
// Retry as a ReadToken call.
|
||||
expectError := dec.PeekKind() == '}' || dec.PeekKind() == ']'
|
||||
if expectError {
|
||||
if !errors.As(err, new(*SyntacticError)) {
|
||||
t.Fatalf("%s: Decoder.ReadToken error is %T, want %T", where, err, new(SyntacticError))
|
||||
}
|
||||
tickTock = !tickTock
|
||||
continue
|
||||
}
|
||||
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
t.Fatalf("%s: Decoder.ReadValue error: %v", where, err)
|
||||
}
|
||||
if err := enc.WriteValue(val); err != nil {
|
||||
t.Fatalf("%s: Encoder.WriteValue error: %v", where, err)
|
||||
}
|
||||
}
|
||||
tickTock = !tickTock
|
||||
}
|
||||
|
||||
got := dst.String()
|
||||
want := td.outCompacted + "\n"
|
||||
if got != want {
|
||||
t.Fatalf("%s: output mismatch:\ngot %q\nwant %q", where, got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCoderStackPointer(t *testing.T) {
|
||||
tests := []struct {
|
||||
token Token
|
||||
want Pointer
|
||||
}{
|
||||
{Null, ""},
|
||||
|
||||
{BeginArray, ""},
|
||||
{EndArray, ""},
|
||||
|
||||
{BeginArray, ""},
|
||||
{Bool(true), "/0"},
|
||||
{EndArray, ""},
|
||||
|
||||
{BeginArray, ""},
|
||||
{String("hello"), "/0"},
|
||||
{String("goodbye"), "/1"},
|
||||
{EndArray, ""},
|
||||
|
||||
{BeginObject, ""},
|
||||
{EndObject, ""},
|
||||
|
||||
{BeginObject, ""},
|
||||
{String("hello"), "/hello"},
|
||||
{String("goodbye"), "/hello"},
|
||||
{EndObject, ""},
|
||||
|
||||
{BeginObject, ""},
|
||||
{String(""), "/"},
|
||||
{Null, "/"},
|
||||
{String("0"), "/0"},
|
||||
{Null, "/0"},
|
||||
{String("~"), "/~0"},
|
||||
{Null, "/~0"},
|
||||
{String("/"), "/~1"},
|
||||
{Null, "/~1"},
|
||||
{String("a//b~/c/~d~~e"), "/a~1~1b~0~1c~1~0d~0~0e"},
|
||||
{Null, "/a~1~1b~0~1c~1~0d~0~0e"},
|
||||
{String(" \r\n\t"), "/ \r\n\t"},
|
||||
{Null, "/ \r\n\t"},
|
||||
{EndObject, ""},
|
||||
|
||||
{BeginArray, ""},
|
||||
{BeginObject, "/0"},
|
||||
{String(""), "/0/"},
|
||||
{BeginArray, "/0/"},
|
||||
{BeginObject, "/0//0"},
|
||||
{String("#"), "/0//0/#"},
|
||||
{Null, "/0//0/#"},
|
||||
{EndObject, "/0//0"},
|
||||
{EndArray, "/0/"},
|
||||
{EndObject, "/0"},
|
||||
{EndArray, ""},
|
||||
}
|
||||
|
||||
for _, allowDupes := range []bool{false, true} {
|
||||
var name string
|
||||
switch allowDupes {
|
||||
case false:
|
||||
name = "RejectDuplicateNames"
|
||||
case true:
|
||||
name = "AllowDuplicateNames"
|
||||
}
|
||||
|
||||
t.Run(name, func(t *testing.T) {
|
||||
bb := new(bytes.Buffer)
|
||||
|
||||
enc := NewEncoder(bb, AllowDuplicateNames(allowDupes))
|
||||
for i, tt := range tests {
|
||||
if err := enc.WriteToken(tt.token); err != nil {
|
||||
t.Fatalf("%d: Encoder.WriteToken error: %v", i, err)
|
||||
}
|
||||
if got := enc.StackPointer(); got != tests[i].want {
|
||||
t.Fatalf("%d: Encoder.StackPointer = %v, want %v", i, got, tests[i].want)
|
||||
}
|
||||
}
|
||||
|
||||
dec := NewDecoder(bb, AllowDuplicateNames(allowDupes))
|
||||
for i := range tests {
|
||||
if _, err := dec.ReadToken(); err != nil {
|
||||
t.Fatalf("%d: Decoder.ReadToken error: %v", i, err)
|
||||
}
|
||||
if got := dec.StackPointer(); got != tests[i].want {
|
||||
t.Fatalf("%d: Decoder.StackPointer = %v, want %v", i, got, tests[i].want)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCoderMaxDepth(t *testing.T) {
|
||||
trimArray := func(b []byte) []byte { return b[len(`[`) : len(b)-len(`]`)] }
|
||||
maxArrays := []byte(strings.Repeat(`[`, maxNestingDepth+1) + strings.Repeat(`]`, maxNestingDepth+1))
|
||||
trimObject := func(b []byte) []byte { return b[len(`{"":`) : len(b)-len(`}`)] }
|
||||
maxObjects := []byte(strings.Repeat(`{"":`, maxNestingDepth+1) + `""` + strings.Repeat(`}`, maxNestingDepth+1))
|
||||
|
||||
t.Run("Decoder", func(t *testing.T) {
|
||||
var dec Decoder
|
||||
checkReadToken := func(t *testing.T, wantKind Kind, wantErr error) {
|
||||
t.Helper()
|
||||
if tok, err := dec.ReadToken(); tok.Kind() != wantKind || !equalError(err, wantErr) {
|
||||
t.Fatalf("Decoder.ReadToken = (%q, %v), want (%q, %v)", byte(tok.Kind()), err, byte(wantKind), wantErr)
|
||||
}
|
||||
}
|
||||
checkReadValue := func(t *testing.T, wantLen int, wantErr error) {
|
||||
t.Helper()
|
||||
if val, err := dec.ReadValue(); len(val) != wantLen || !equalError(err, wantErr) {
|
||||
t.Fatalf("Decoder.ReadValue = (%d, %v), want (%d, %v)", len(val), err, wantLen, wantErr)
|
||||
}
|
||||
}
|
||||
|
||||
t.Run("ArraysValid/SingleValue", func(t *testing.T) {
|
||||
dec.s.reset(trimArray(maxArrays), nil)
|
||||
checkReadValue(t, maxNestingDepth*len(`[]`), nil)
|
||||
})
|
||||
t.Run("ArraysValid/TokenThenValue", func(t *testing.T) {
|
||||
dec.s.reset(trimArray(maxArrays), nil)
|
||||
checkReadToken(t, '[', nil)
|
||||
checkReadValue(t, (maxNestingDepth-1)*len(`[]`), nil)
|
||||
checkReadToken(t, ']', nil)
|
||||
})
|
||||
t.Run("ArraysValid/AllTokens", func(t *testing.T) {
|
||||
dec.s.reset(trimArray(maxArrays), nil)
|
||||
for range maxNestingDepth {
|
||||
checkReadToken(t, '[', nil)
|
||||
}
|
||||
for range maxNestingDepth {
|
||||
checkReadToken(t, ']', nil)
|
||||
}
|
||||
})
|
||||
|
||||
wantErr := &SyntacticError{
|
||||
ByteOffset: maxNestingDepth,
|
||||
JSONPointer: Pointer(strings.Repeat("/0", maxNestingDepth)),
|
||||
Err: errMaxDepth,
|
||||
}
|
||||
t.Run("ArraysInvalid/SingleValue", func(t *testing.T) {
|
||||
dec.s.reset(maxArrays, nil)
|
||||
checkReadValue(t, 0, wantErr)
|
||||
})
|
||||
t.Run("ArraysInvalid/TokenThenValue", func(t *testing.T) {
|
||||
dec.s.reset(maxArrays, nil)
|
||||
checkReadToken(t, '[', nil)
|
||||
checkReadValue(t, 0, wantErr)
|
||||
})
|
||||
t.Run("ArraysInvalid/AllTokens", func(t *testing.T) {
|
||||
dec.s.reset(maxArrays, nil)
|
||||
for range maxNestingDepth {
|
||||
checkReadToken(t, '[', nil)
|
||||
}
|
||||
checkReadValue(t, 0, wantErr)
|
||||
})
|
||||
|
||||
t.Run("ObjectsValid/SingleValue", func(t *testing.T) {
|
||||
dec.s.reset(trimObject(maxObjects), nil)
|
||||
checkReadValue(t, maxNestingDepth*len(`{"":}`)+len(`""`), nil)
|
||||
})
|
||||
t.Run("ObjectsValid/TokenThenValue", func(t *testing.T) {
|
||||
dec.s.reset(trimObject(maxObjects), nil)
|
||||
checkReadToken(t, '{', nil)
|
||||
checkReadToken(t, '"', nil)
|
||||
checkReadValue(t, (maxNestingDepth-1)*len(`{"":}`)+len(`""`), nil)
|
||||
checkReadToken(t, '}', nil)
|
||||
})
|
||||
t.Run("ObjectsValid/AllTokens", func(t *testing.T) {
|
||||
dec.s.reset(trimObject(maxObjects), nil)
|
||||
for range maxNestingDepth {
|
||||
checkReadToken(t, '{', nil)
|
||||
checkReadToken(t, '"', nil)
|
||||
}
|
||||
checkReadToken(t, '"', nil)
|
||||
for range maxNestingDepth {
|
||||
checkReadToken(t, '}', nil)
|
||||
}
|
||||
})
|
||||
|
||||
wantErr = &SyntacticError{
|
||||
ByteOffset: maxNestingDepth * int64(len(`{"":`)),
|
||||
JSONPointer: Pointer(strings.Repeat("/", maxNestingDepth)),
|
||||
Err: errMaxDepth,
|
||||
}
|
||||
t.Run("ObjectsInvalid/SingleValue", func(t *testing.T) {
|
||||
dec.s.reset(maxObjects, nil)
|
||||
checkReadValue(t, 0, wantErr)
|
||||
})
|
||||
t.Run("ObjectsInvalid/TokenThenValue", func(t *testing.T) {
|
||||
dec.s.reset(maxObjects, nil)
|
||||
checkReadToken(t, '{', nil)
|
||||
checkReadToken(t, '"', nil)
|
||||
checkReadValue(t, 0, wantErr)
|
||||
})
|
||||
t.Run("ObjectsInvalid/AllTokens", func(t *testing.T) {
|
||||
dec.s.reset(maxObjects, nil)
|
||||
for range maxNestingDepth {
|
||||
checkReadToken(t, '{', nil)
|
||||
checkReadToken(t, '"', nil)
|
||||
}
|
||||
checkReadToken(t, 0, wantErr)
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("Encoder", func(t *testing.T) {
|
||||
var enc Encoder
|
||||
checkWriteToken := func(t *testing.T, tok Token, wantErr error) {
|
||||
t.Helper()
|
||||
if err := enc.WriteToken(tok); !equalError(err, wantErr) {
|
||||
t.Fatalf("Encoder.WriteToken = %v, want %v", err, wantErr)
|
||||
}
|
||||
}
|
||||
checkWriteValue := func(t *testing.T, val Value, wantErr error) {
|
||||
t.Helper()
|
||||
if err := enc.WriteValue(val); !equalError(err, wantErr) {
|
||||
t.Fatalf("Encoder.WriteValue = %v, want %v", err, wantErr)
|
||||
}
|
||||
}
|
||||
|
||||
wantErr := &SyntacticError{
|
||||
ByteOffset: maxNestingDepth,
|
||||
JSONPointer: Pointer(strings.Repeat("/0", maxNestingDepth)),
|
||||
Err: errMaxDepth,
|
||||
}
|
||||
t.Run("Arrays/SingleValue", func(t *testing.T) {
|
||||
enc.s.reset(enc.s.Buf[:0], nil)
|
||||
checkWriteValue(t, maxArrays, wantErr)
|
||||
checkWriteValue(t, trimArray(maxArrays), nil)
|
||||
})
|
||||
t.Run("Arrays/TokenThenValue", func(t *testing.T) {
|
||||
enc.s.reset(enc.s.Buf[:0], nil)
|
||||
checkWriteToken(t, BeginArray, nil)
|
||||
checkWriteValue(t, trimArray(maxArrays), wantErr)
|
||||
checkWriteValue(t, trimArray(trimArray(maxArrays)), nil)
|
||||
checkWriteToken(t, EndArray, nil)
|
||||
})
|
||||
t.Run("Arrays/AllTokens", func(t *testing.T) {
|
||||
enc.s.reset(enc.s.Buf[:0], nil)
|
||||
for range maxNestingDepth {
|
||||
checkWriteToken(t, BeginArray, nil)
|
||||
}
|
||||
checkWriteToken(t, BeginArray, wantErr)
|
||||
for range maxNestingDepth {
|
||||
checkWriteToken(t, EndArray, nil)
|
||||
}
|
||||
})
|
||||
|
||||
wantErr = &SyntacticError{
|
||||
ByteOffset: maxNestingDepth * int64(len(`{"":`)),
|
||||
JSONPointer: Pointer(strings.Repeat("/", maxNestingDepth)),
|
||||
Err: errMaxDepth,
|
||||
}
|
||||
t.Run("Objects/SingleValue", func(t *testing.T) {
|
||||
enc.s.reset(enc.s.Buf[:0], nil)
|
||||
checkWriteValue(t, maxObjects, wantErr)
|
||||
checkWriteValue(t, trimObject(maxObjects), nil)
|
||||
})
|
||||
t.Run("Objects/TokenThenValue", func(t *testing.T) {
|
||||
enc.s.reset(enc.s.Buf[:0], nil)
|
||||
checkWriteToken(t, BeginObject, nil)
|
||||
checkWriteToken(t, String(""), nil)
|
||||
checkWriteValue(t, trimObject(maxObjects), wantErr)
|
||||
checkWriteValue(t, trimObject(trimObject(maxObjects)), nil)
|
||||
checkWriteToken(t, EndObject, nil)
|
||||
})
|
||||
t.Run("Objects/AllTokens", func(t *testing.T) {
|
||||
enc.s.reset(enc.s.Buf[:0], nil)
|
||||
for range maxNestingDepth - 1 {
|
||||
checkWriteToken(t, BeginObject, nil)
|
||||
checkWriteToken(t, String(""), nil)
|
||||
}
|
||||
checkWriteToken(t, BeginObject, nil)
|
||||
checkWriteToken(t, String(""), nil)
|
||||
checkWriteToken(t, BeginObject, wantErr)
|
||||
checkWriteToken(t, String(""), nil)
|
||||
for range maxNestingDepth {
|
||||
checkWriteToken(t, EndObject, nil)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// FaultyBuffer implements io.Reader and io.Writer.
|
||||
// It may process fewer bytes than the provided buffer
|
||||
// and may randomly return an error.
|
||||
type FaultyBuffer struct {
|
||||
B []byte
|
||||
|
||||
// MaxBytes is the maximum number of bytes read/written.
|
||||
// A random number of bytes within [0, MaxBytes] are processed.
|
||||
// A non-positive value is treated as infinity.
|
||||
MaxBytes int
|
||||
|
||||
// MayError specifies whether to randomly provide this error.
|
||||
// Even if an error is returned, no bytes are dropped.
|
||||
MayError error
|
||||
|
||||
// Rand to use for pseudo-random behavior.
|
||||
// If nil, it will be initialized with rand.NewSource(0).
|
||||
Rand rand.Source
|
||||
}
|
||||
|
||||
func (p *FaultyBuffer) Read(b []byte) (int, error) {
|
||||
b = b[:copy(b[:p.mayTruncate(len(b))], p.B)]
|
||||
p.B = p.B[len(b):]
|
||||
if len(p.B) == 0 && (len(b) == 0 || p.randN(2) == 0) {
|
||||
return len(b), io.EOF
|
||||
}
|
||||
return len(b), p.mayError()
|
||||
}
|
||||
|
||||
func (p *FaultyBuffer) Write(b []byte) (int, error) {
|
||||
b2 := b[:p.mayTruncate(len(b))]
|
||||
p.B = append(p.B, b2...)
|
||||
if len(b2) < len(b) {
|
||||
return len(b2), io.ErrShortWrite
|
||||
}
|
||||
return len(b2), p.mayError()
|
||||
}
|
||||
|
||||
// mayTruncate may return a value between [0, n].
|
||||
func (p *FaultyBuffer) mayTruncate(n int) int {
|
||||
if p.MaxBytes > 0 {
|
||||
if n > p.MaxBytes {
|
||||
n = p.MaxBytes
|
||||
}
|
||||
return p.randN(n + 1)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// mayError may return a non-nil error.
|
||||
func (p *FaultyBuffer) mayError() error {
|
||||
if p.MayError != nil && p.randN(2) == 0 {
|
||||
return p.MayError
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *FaultyBuffer) randN(n int) int {
|
||||
if p.Rand == nil {
|
||||
p.Rand = rand.NewSource(0)
|
||||
}
|
||||
return int(p.Rand.Int63() % int64(n))
|
||||
}
|
||||
1168
pkg/encoders/json/jsontext/decode.go
Normal file
1168
pkg/encoders/json/jsontext/decode.go
Normal file
File diff suppressed because it is too large
Load Diff
1267
pkg/encoders/json/jsontext/decode_test.go
Normal file
1267
pkg/encoders/json/jsontext/decode_test.go
Normal file
File diff suppressed because it is too large
Load Diff
116
pkg/encoders/json/jsontext/doc.go
Normal file
116
pkg/encoders/json/jsontext/doc.go
Normal file
@@ -0,0 +1,116 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
// Package jsontext implements syntactic processing of JSON
|
||||
// as specified in RFC 4627, RFC 7159, RFC 7493, RFC 8259, and RFC 8785.
|
||||
// JSON is a simple data interchange format that can represent
|
||||
// primitive data types such as booleans, strings, and numbers,
|
||||
// in addition to structured data types such as objects and arrays.
|
||||
//
|
||||
// This package (encoding/json/jsontext) is experimental,
|
||||
// and not subject to the Go 1 compatibility promise.
|
||||
// It only exists when building with the GOEXPERIMENT=jsonv2 environment variable set.
|
||||
// Most users should use [encoding/json].
|
||||
//
|
||||
// The [Encoder] and [Decoder] types are used to encode or decode
|
||||
// a stream of JSON tokens or values.
|
||||
//
|
||||
// # Tokens and Values
|
||||
//
|
||||
// A JSON token refers to the basic structural elements of JSON:
|
||||
//
|
||||
// - a JSON literal (i.e., null, true, or false)
|
||||
// - a JSON string (e.g., "hello, world!")
|
||||
// - a JSON number (e.g., 123.456)
|
||||
// - a begin or end delimiter for a JSON object (i.e., '{' or '}')
|
||||
// - a begin or end delimiter for a JSON array (i.e., '[' or ']')
|
||||
//
|
||||
// A JSON token is represented by the [Token] type in Go. Technically,
|
||||
// there are two additional structural characters (i.e., ':' and ','),
|
||||
// but there is no [Token] representation for them since their presence
|
||||
// can be inferred by the structure of the JSON grammar itself.
|
||||
// For example, there must always be an implicit colon between
|
||||
// the name and value of a JSON object member.
|
||||
//
|
||||
// A JSON value refers to a complete unit of JSON data:
|
||||
//
|
||||
// - a JSON literal, string, or number
|
||||
// - a JSON object (e.g., `{"name":"value"}`)
|
||||
// - a JSON array (e.g., `[1,2,3,]`)
|
||||
//
|
||||
// A JSON value is represented by the [Value] type in Go and is a []byte
|
||||
// containing the raw textual representation of the value. There is some overlap
|
||||
// between tokens and values as both contain literals, strings, and numbers.
|
||||
// However, only a value can represent the entirety of a JSON object or array.
|
||||
//
|
||||
// The [Encoder] and [Decoder] types contain methods to read or write the next
|
||||
// [Token] or [Value] in a sequence. They maintain a state machine to validate
|
||||
// whether the sequence of JSON tokens and/or values produces a valid JSON.
|
||||
// [Options] may be passed to the [NewEncoder] or [NewDecoder] constructors
|
||||
// to configure the syntactic behavior of encoding and decoding.
|
||||
//
|
||||
// # Terminology
|
||||
//
|
||||
// The terms "encode" and "decode" are used for syntactic functionality
|
||||
// that is concerned with processing JSON based on its grammar, and
|
||||
// the terms "marshal" and "unmarshal" are used for semantic functionality
|
||||
// that determines the meaning of JSON values as Go values and vice-versa.
|
||||
// This package (i.e., [jsontext]) deals with JSON at a syntactic layer,
|
||||
// while [encoding/json/v2] deals with JSON at a semantic layer.
|
||||
// The goal is to provide a clear distinction between functionality that
|
||||
// is purely concerned with encoding versus that of marshaling.
|
||||
// For example, one can directly encode a stream of JSON tokens without
|
||||
// needing to marshal a concrete Go value representing them.
|
||||
// Similarly, one can decode a stream of JSON tokens without
|
||||
// needing to unmarshal them into a concrete Go value.
|
||||
//
|
||||
// This package uses JSON terminology when discussing JSON, which may differ
|
||||
// from related concepts in Go or elsewhere in computing literature.
|
||||
//
|
||||
// - a JSON "object" refers to an unordered collection of name/value members.
|
||||
// - a JSON "array" refers to an ordered sequence of elements.
|
||||
// - a JSON "value" refers to either a literal (i.e., null, false, or true),
|
||||
// string, number, object, or array.
|
||||
//
|
||||
// See RFC 8259 for more information.
|
||||
//
|
||||
// # Specifications
|
||||
//
|
||||
// Relevant specifications include RFC 4627, RFC 7159, RFC 7493, RFC 8259,
|
||||
// and RFC 8785. Each RFC is generally a stricter subset of another RFC.
|
||||
// In increasing order of strictness:
|
||||
//
|
||||
// - RFC 4627 and RFC 7159 do not require (but recommend) the use of UTF-8
|
||||
// and also do not require (but recommend) that object names be unique.
|
||||
// - RFC 8259 requires the use of UTF-8,
|
||||
// but does not require (but recommends) that object names be unique.
|
||||
// - RFC 7493 requires the use of UTF-8
|
||||
// and also requires that object names be unique.
|
||||
// - RFC 8785 defines a canonical representation. It requires the use of UTF-8
|
||||
// and also requires that object names be unique and in a specific ordering.
|
||||
// It specifies exactly how strings and numbers must be formatted.
|
||||
//
|
||||
// The primary difference between RFC 4627 and RFC 7159 is that the former
|
||||
// restricted top-level values to only JSON objects and arrays, while
|
||||
// RFC 7159 and subsequent RFCs permit top-level values to additionally be
|
||||
// JSON nulls, booleans, strings, or numbers.
|
||||
//
|
||||
// By default, this package operates on RFC 7493, but can be configured
|
||||
// to operate according to the other RFC specifications.
|
||||
// RFC 7493 is a stricter subset of RFC 8259 and fully compliant with it.
|
||||
// In particular, it makes specific choices about behavior that RFC 8259
|
||||
// leaves as undefined in order to ensure greater interoperability.
|
||||
//
|
||||
// # Security Considerations
|
||||
//
|
||||
// See the "Security Considerations" section in [encoding/json/v2].
|
||||
package jsontext
|
||||
|
||||
// requireKeyedLiterals can be embedded in a struct to require keyed literals.
|
||||
type requireKeyedLiterals struct{}
|
||||
|
||||
// nonComparable can be embedded in a struct to prevent comparability.
|
||||
type nonComparable [0]func()
|
||||
972
pkg/encoders/json/jsontext/encode.go
Normal file
972
pkg/encoders/json/jsontext/encode.go
Normal file
@@ -0,0 +1,972 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"math/bits"
|
||||
|
||||
"encoding/json/internal/jsonflags"
|
||||
"encoding/json/internal/jsonopts"
|
||||
"encoding/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
// Encoder is a streaming encoder from raw JSON tokens and values.
|
||||
// It is used to write a stream of top-level JSON values,
|
||||
// each terminated with a newline character.
|
||||
//
|
||||
// [Encoder.WriteToken] and [Encoder.WriteValue] calls may be interleaved.
|
||||
// For example, the following JSON value:
|
||||
//
|
||||
// {"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}}
|
||||
//
|
||||
// can be composed with the following calls (ignoring errors for brevity):
|
||||
//
|
||||
// e.WriteToken(BeginObject) // {
|
||||
// e.WriteToken(String("name")) // "name"
|
||||
// e.WriteToken(String("value")) // "value"
|
||||
// e.WriteValue(Value(`"array"`)) // "array"
|
||||
// e.WriteToken(BeginArray) // [
|
||||
// e.WriteToken(Null) // null
|
||||
// e.WriteToken(False) // false
|
||||
// e.WriteValue(Value("true")) // true
|
||||
// e.WriteToken(Float(3.14159)) // 3.14159
|
||||
// e.WriteToken(EndArray) // ]
|
||||
// e.WriteValue(Value(`"object"`)) // "object"
|
||||
// e.WriteValue(Value(`{"k":"v"}`)) // {"k":"v"}
|
||||
// e.WriteToken(EndObject) // }
|
||||
//
|
||||
// The above is one of many possible sequence of calls and
|
||||
// may not represent the most sensible method to call for any given token/value.
|
||||
// For example, it is probably more common to call [Encoder.WriteToken] with a string
|
||||
// for object names.
|
||||
type Encoder struct {
|
||||
s encoderState
|
||||
}
|
||||
|
||||
// encoderState is the low-level state of Encoder.
|
||||
// It has exported fields and method for use by the "json" package.
|
||||
type encoderState struct {
|
||||
state
|
||||
encodeBuffer
|
||||
jsonopts.Struct
|
||||
|
||||
SeenPointers map[any]struct{} // only used when marshaling; identical to json.seenPointers
|
||||
}
|
||||
|
||||
// encodeBuffer is a buffer split into 2 segments:
|
||||
//
|
||||
// - buf[0:len(buf)] // written (but unflushed) portion of the buffer
|
||||
// - buf[len(buf):cap(buf)] // unused portion of the buffer
|
||||
type encodeBuffer struct {
|
||||
Buf []byte // may alias wr if it is a bytes.Buffer
|
||||
|
||||
// baseOffset is added to len(buf) to obtain the absolute offset
|
||||
// relative to the start of io.Writer stream.
|
||||
baseOffset int64
|
||||
|
||||
wr io.Writer
|
||||
|
||||
// maxValue is the approximate maximum Value size passed to WriteValue.
|
||||
maxValue int
|
||||
// availBuffer is the buffer returned by the AvailableBuffer method.
|
||||
availBuffer []byte // always has zero length
|
||||
// bufStats is statistics about buffer utilization.
|
||||
// It is only used with pooled encoders in pools.go.
|
||||
bufStats bufferStatistics
|
||||
}
|
||||
|
||||
// NewEncoder constructs a new streaming encoder writing to w
|
||||
// configured with the provided options.
|
||||
// It flushes the internal buffer when the buffer is sufficiently full or
|
||||
// when a top-level value has been written.
|
||||
//
|
||||
// If w is a [bytes.Buffer], then the encoder appends directly into the buffer
|
||||
// without copying the contents from an intermediate buffer.
|
||||
func NewEncoder(w io.Writer, opts ...Options) *Encoder {
|
||||
e := new(Encoder)
|
||||
e.Reset(w, opts...)
|
||||
return e
|
||||
}
|
||||
|
||||
// Reset resets an encoder such that it is writing afresh to w and
|
||||
// configured with the provided options. Reset must not be called on
|
||||
// a Encoder passed to the [encoding/json/v2.MarshalerTo.MarshalJSONTo] method
|
||||
// or the [encoding/json/v2.MarshalToFunc] function.
|
||||
func (e *Encoder) Reset(w io.Writer, opts ...Options) {
|
||||
switch {
|
||||
case e == nil:
|
||||
panic("jsontext: invalid nil Encoder")
|
||||
case w == nil:
|
||||
panic("jsontext: invalid nil io.Writer")
|
||||
case e.s.Flags.Get(jsonflags.WithinArshalCall):
|
||||
panic("jsontext: cannot reset Encoder passed to json.MarshalerTo")
|
||||
}
|
||||
e.s.reset(nil, w, opts...)
|
||||
}
|
||||
|
||||
func (e *encoderState) reset(b []byte, w io.Writer, opts ...Options) {
|
||||
e.state.reset()
|
||||
e.encodeBuffer = encodeBuffer{Buf: b, wr: w, bufStats: e.bufStats}
|
||||
if bb, ok := w.(*bytes.Buffer); ok && bb != nil {
|
||||
e.Buf = bb.AvailableBuffer() // alias the unused buffer of bb
|
||||
}
|
||||
opts2 := jsonopts.Struct{} // avoid mutating e.Struct in case it is part of opts
|
||||
opts2.Join(opts...)
|
||||
e.Struct = opts2
|
||||
if e.Flags.Get(jsonflags.Multiline) {
|
||||
if !e.Flags.Has(jsonflags.SpaceAfterColon) {
|
||||
e.Flags.Set(jsonflags.SpaceAfterColon | 1)
|
||||
}
|
||||
if !e.Flags.Has(jsonflags.SpaceAfterComma) {
|
||||
e.Flags.Set(jsonflags.SpaceAfterComma | 0)
|
||||
}
|
||||
if !e.Flags.Has(jsonflags.Indent) {
|
||||
e.Flags.Set(jsonflags.Indent | 1)
|
||||
e.Indent = "\t"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Options returns the options used to construct the decoder and
|
||||
// may additionally contain semantic options passed to a
|
||||
// [encoding/json/v2.MarshalEncode] call.
|
||||
//
|
||||
// If operating within
|
||||
// a [encoding/json/v2.MarshalerTo.MarshalJSONTo] method call or
|
||||
// a [encoding/json/v2.MarshalToFunc] function call,
|
||||
// then the returned options are only valid within the call.
|
||||
func (e *Encoder) Options() Options {
|
||||
return &e.s.Struct
|
||||
}
|
||||
|
||||
// NeedFlush determines whether to flush at this point.
|
||||
func (e *encoderState) NeedFlush() bool {
|
||||
// NOTE: This function is carefully written to be inlinable.
|
||||
|
||||
// Avoid flushing if e.wr is nil since there is no underlying writer.
|
||||
// Flush if less than 25% of the capacity remains.
|
||||
// Flushing at some constant fraction ensures that the buffer stops growing
|
||||
// so long as the largest Token or Value fits within that unused capacity.
|
||||
return e.wr != nil && (e.Tokens.Depth() == 1 || len(e.Buf) > 3*cap(e.Buf)/4)
|
||||
}
|
||||
|
||||
// Flush flushes the buffer to the underlying io.Writer.
|
||||
// It may append a trailing newline after the top-level value.
|
||||
func (e *encoderState) Flush() error {
|
||||
if e.wr == nil || e.avoidFlush() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// In streaming mode, always emit a newline after the top-level value.
|
||||
if e.Tokens.Depth() == 1 && !e.Flags.Get(jsonflags.OmitTopLevelNewline) {
|
||||
e.Buf = append(e.Buf, '\n')
|
||||
}
|
||||
|
||||
// Inform objectNameStack that we are about to flush the buffer content.
|
||||
e.Names.copyQuotedBuffer(e.Buf)
|
||||
|
||||
// Specialize bytes.Buffer for better performance.
|
||||
if bb, ok := e.wr.(*bytes.Buffer); ok {
|
||||
// If e.buf already aliases the internal buffer of bb,
|
||||
// then the Write call simply increments the internal offset,
|
||||
// otherwise Write operates as expected.
|
||||
// See https://go.dev/issue/42986.
|
||||
n, _ := bb.Write(e.Buf) // never fails unless bb is nil
|
||||
e.baseOffset += int64(n)
|
||||
|
||||
// If the internal buffer of bytes.Buffer is too small,
|
||||
// append operations elsewhere in the Encoder may grow the buffer.
|
||||
// This would be semantically correct, but hurts performance.
|
||||
// As such, ensure 25% of the current length is always available
|
||||
// to reduce the probability that other appends must allocate.
|
||||
if avail := bb.Available(); avail < bb.Len()/4 {
|
||||
bb.Grow(avail + 1)
|
||||
}
|
||||
|
||||
e.Buf = bb.AvailableBuffer()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Flush the internal buffer to the underlying io.Writer.
|
||||
n, err := e.wr.Write(e.Buf)
|
||||
e.baseOffset += int64(n)
|
||||
if err != nil {
|
||||
// In the event of an error, preserve the unflushed portion.
|
||||
// Thus, write errors aren't fatal so long as the io.Writer
|
||||
// maintains consistent state after errors.
|
||||
if n > 0 {
|
||||
e.Buf = e.Buf[:copy(e.Buf, e.Buf[n:])]
|
||||
}
|
||||
return &ioError{action: "write", err: err}
|
||||
}
|
||||
e.Buf = e.Buf[:0]
|
||||
|
||||
// Check whether to grow the buffer.
|
||||
// Note that cap(e.buf) may already exceed maxBufferSize since
|
||||
// an append elsewhere already grew it to store a large token.
|
||||
const maxBufferSize = 4 << 10
|
||||
const growthSizeFactor = 2 // higher value is faster
|
||||
const growthRateFactor = 2 // higher value is slower
|
||||
// By default, grow if below the maximum buffer size.
|
||||
grow := cap(e.Buf) <= maxBufferSize/growthSizeFactor
|
||||
// Growing can be expensive, so only grow
|
||||
// if a sufficient number of bytes have been processed.
|
||||
grow = grow && int64(cap(e.Buf)) < e.previousOffsetEnd()/growthRateFactor
|
||||
if grow {
|
||||
e.Buf = make([]byte, 0, cap(e.Buf)*growthSizeFactor)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
func (d *encodeBuffer) offsetAt(pos int) int64 { return d.baseOffset + int64(pos) }
|
||||
func (e *encodeBuffer) previousOffsetEnd() int64 { return e.baseOffset + int64(len(e.Buf)) }
|
||||
func (e *encodeBuffer) unflushedBuffer() []byte { return e.Buf }
|
||||
|
||||
// avoidFlush indicates whether to avoid flushing to ensure there is always
|
||||
// enough in the buffer to unwrite the last object member if it were empty.
|
||||
func (e *encoderState) avoidFlush() bool {
|
||||
switch {
|
||||
case e.Tokens.Last.Length() == 0:
|
||||
// Never flush after BeginObject or BeginArray since we don't know yet
|
||||
// if the object or array will end up being empty.
|
||||
return true
|
||||
case e.Tokens.Last.needObjectValue():
|
||||
// Never flush before the object value since we don't know yet
|
||||
// if the object value will end up being empty.
|
||||
return true
|
||||
case e.Tokens.Last.NeedObjectName() && len(e.Buf) >= 2:
|
||||
// Never flush after the object value if it does turn out to be empty.
|
||||
switch string(e.Buf[len(e.Buf)-2:]) {
|
||||
case `ll`, `""`, `{}`, `[]`: // last two bytes of every empty value
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// UnwriteEmptyObjectMember unwrites the last object member if it is empty
|
||||
// and reports whether it performed an unwrite operation.
|
||||
func (e *encoderState) UnwriteEmptyObjectMember(prevName *string) bool {
|
||||
if last := e.Tokens.Last; !last.isObject() || !last.NeedObjectName() || last.Length() == 0 {
|
||||
panic("BUG: must be called on an object after writing a value")
|
||||
}
|
||||
|
||||
// The flushing logic is modified to never flush a trailing empty value.
|
||||
// The encoder never writes trailing whitespace eagerly.
|
||||
b := e.unflushedBuffer()
|
||||
|
||||
// Detect whether the last value was empty.
|
||||
var n int
|
||||
if len(b) >= 3 {
|
||||
switch string(b[len(b)-2:]) {
|
||||
case "ll": // last two bytes of `null`
|
||||
n = len(`null`)
|
||||
case `""`:
|
||||
// It is possible for a non-empty string to have `""` as a suffix
|
||||
// if the second to the last quote was escaped.
|
||||
if b[len(b)-3] == '\\' {
|
||||
return false // e.g., `"\""` is not empty
|
||||
}
|
||||
n = len(`""`)
|
||||
case `{}`:
|
||||
n = len(`{}`)
|
||||
case `[]`:
|
||||
n = len(`[]`)
|
||||
}
|
||||
}
|
||||
if n == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Unwrite the value, whitespace, colon, name, whitespace, and comma.
|
||||
b = b[:len(b)-n]
|
||||
b = jsonwire.TrimSuffixWhitespace(b)
|
||||
b = jsonwire.TrimSuffixByte(b, ':')
|
||||
b = jsonwire.TrimSuffixString(b)
|
||||
b = jsonwire.TrimSuffixWhitespace(b)
|
||||
b = jsonwire.TrimSuffixByte(b, ',')
|
||||
e.Buf = b // store back truncated unflushed buffer
|
||||
|
||||
// Undo state changes.
|
||||
e.Tokens.Last.decrement() // for object member value
|
||||
e.Tokens.Last.decrement() // for object member name
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
if e.Tokens.Last.isActiveNamespace() {
|
||||
e.Namespaces.Last().removeLast()
|
||||
}
|
||||
}
|
||||
e.Names.clearLast()
|
||||
if prevName != nil {
|
||||
e.Names.copyQuotedBuffer(e.Buf) // required by objectNameStack.replaceLastUnquotedName
|
||||
e.Names.replaceLastUnquotedName(*prevName)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// UnwriteOnlyObjectMemberName unwrites the only object member name
|
||||
// and returns the unquoted name.
|
||||
func (e *encoderState) UnwriteOnlyObjectMemberName() string {
|
||||
if last := e.Tokens.Last; !last.isObject() || last.Length() != 1 {
|
||||
panic("BUG: must be called on an object after writing first name")
|
||||
}
|
||||
|
||||
// Unwrite the name and whitespace.
|
||||
b := jsonwire.TrimSuffixString(e.Buf)
|
||||
isVerbatim := bytes.IndexByte(e.Buf[len(b):], '\\') < 0
|
||||
name := string(jsonwire.UnquoteMayCopy(e.Buf[len(b):], isVerbatim))
|
||||
e.Buf = jsonwire.TrimSuffixWhitespace(b)
|
||||
|
||||
// Undo state changes.
|
||||
e.Tokens.Last.decrement()
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
if e.Tokens.Last.isActiveNamespace() {
|
||||
e.Namespaces.Last().removeLast()
|
||||
}
|
||||
}
|
||||
e.Names.clearLast()
|
||||
return name
|
||||
}
|
||||
|
||||
// WriteToken writes the next token and advances the internal write offset.
|
||||
//
|
||||
// The provided token kind must be consistent with the JSON grammar.
|
||||
// For example, it is an error to provide a number when the encoder
|
||||
// is expecting an object name (which is always a string), or
|
||||
// to provide an end object delimiter when the encoder is finishing an array.
|
||||
// If the provided token is invalid, then it reports a [SyntacticError] and
|
||||
// the internal state remains unchanged. The offset reported
|
||||
// in [SyntacticError] will be relative to the [Encoder.OutputOffset].
|
||||
func (e *Encoder) WriteToken(t Token) error {
|
||||
return e.s.WriteToken(t)
|
||||
}
|
||||
func (e *encoderState) WriteToken(t Token) error {
|
||||
k := t.Kind()
|
||||
b := e.Buf // use local variable to avoid mutating e in case of error
|
||||
|
||||
// Append any delimiters or optional whitespace.
|
||||
b = e.Tokens.MayAppendDelim(b, k)
|
||||
if e.Flags.Get(jsonflags.AnyWhitespace) {
|
||||
b = e.appendWhitespace(b, k)
|
||||
}
|
||||
pos := len(b) // offset before the token
|
||||
|
||||
// Append the token to the output and to the state machine.
|
||||
var err error
|
||||
switch k {
|
||||
case 'n':
|
||||
b = append(b, "null"...)
|
||||
err = e.Tokens.appendLiteral()
|
||||
case 'f':
|
||||
b = append(b, "false"...)
|
||||
err = e.Tokens.appendLiteral()
|
||||
case 't':
|
||||
b = append(b, "true"...)
|
||||
err = e.Tokens.appendLiteral()
|
||||
case '"':
|
||||
if b, err = t.appendString(b, &e.Flags); err != nil {
|
||||
break
|
||||
}
|
||||
if e.Tokens.Last.NeedObjectName() {
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
if !e.Tokens.Last.isValidNamespace() {
|
||||
err = errInvalidNamespace
|
||||
break
|
||||
}
|
||||
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) {
|
||||
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
|
||||
break
|
||||
}
|
||||
}
|
||||
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
|
||||
}
|
||||
err = e.Tokens.appendString()
|
||||
case '0':
|
||||
if b, err = t.appendNumber(b, &e.Flags); err != nil {
|
||||
break
|
||||
}
|
||||
err = e.Tokens.appendNumber()
|
||||
case '{':
|
||||
b = append(b, '{')
|
||||
if err = e.Tokens.pushObject(); err != nil {
|
||||
break
|
||||
}
|
||||
e.Names.push()
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
e.Namespaces.push()
|
||||
}
|
||||
case '}':
|
||||
b = append(b, '}')
|
||||
if err = e.Tokens.popObject(); err != nil {
|
||||
break
|
||||
}
|
||||
e.Names.pop()
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
e.Namespaces.pop()
|
||||
}
|
||||
case '[':
|
||||
b = append(b, '[')
|
||||
err = e.Tokens.pushArray()
|
||||
case ']':
|
||||
b = append(b, ']')
|
||||
err = e.Tokens.popArray()
|
||||
default:
|
||||
err = errInvalidToken
|
||||
}
|
||||
if err != nil {
|
||||
return wrapSyntacticError(e, err, pos, +1)
|
||||
}
|
||||
|
||||
// Finish off the buffer and store it back into e.
|
||||
e.Buf = b
|
||||
if e.NeedFlush() {
|
||||
return e.Flush()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AppendRaw appends either a raw string (without double quotes) or number.
|
||||
// Specify safeASCII if the string output is guaranteed to be ASCII
|
||||
// without any characters (including '<', '>', and '&') that need escaping,
|
||||
// otherwise this will validate whether the string needs escaping.
|
||||
// The appended bytes for a JSON number must be valid.
|
||||
//
|
||||
// This is a specialized implementation of Encoder.WriteValue
|
||||
// that allows appending directly into the buffer.
|
||||
// It is only called from marshal logic in the "json" package.
|
||||
func (e *encoderState) AppendRaw(k Kind, safeASCII bool, appendFn func([]byte) ([]byte, error)) error {
|
||||
b := e.Buf // use local variable to avoid mutating e in case of error
|
||||
|
||||
// Append any delimiters or optional whitespace.
|
||||
b = e.Tokens.MayAppendDelim(b, k)
|
||||
if e.Flags.Get(jsonflags.AnyWhitespace) {
|
||||
b = e.appendWhitespace(b, k)
|
||||
}
|
||||
pos := len(b) // offset before the token
|
||||
|
||||
var err error
|
||||
switch k {
|
||||
case '"':
|
||||
// Append directly into the encoder buffer by assuming that
|
||||
// most of the time none of the characters need escaping.
|
||||
b = append(b, '"')
|
||||
if b, err = appendFn(b); err != nil {
|
||||
return err
|
||||
}
|
||||
b = append(b, '"')
|
||||
|
||||
// Check whether we need to escape the string and if necessary
|
||||
// copy it to a scratch buffer and then escape it back.
|
||||
isVerbatim := safeASCII || !jsonwire.NeedEscape(b[pos+len(`"`):len(b)-len(`"`)])
|
||||
if !isVerbatim {
|
||||
var err error
|
||||
b2 := append(e.availBuffer, b[pos+len(`"`):len(b)-len(`"`)]...)
|
||||
b, err = jsonwire.AppendQuote(b[:pos], string(b2), &e.Flags)
|
||||
e.availBuffer = b2[:0]
|
||||
if err != nil {
|
||||
return wrapSyntacticError(e, err, pos, +1)
|
||||
}
|
||||
}
|
||||
|
||||
// Update the state machine.
|
||||
if e.Tokens.Last.NeedObjectName() {
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
if !e.Tokens.Last.isValidNamespace() {
|
||||
return wrapSyntacticError(e, err, pos, +1)
|
||||
}
|
||||
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], isVerbatim) {
|
||||
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
|
||||
return wrapSyntacticError(e, err, pos, +1)
|
||||
}
|
||||
}
|
||||
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
|
||||
}
|
||||
if err := e.Tokens.appendString(); err != nil {
|
||||
return wrapSyntacticError(e, err, pos, +1)
|
||||
}
|
||||
case '0':
|
||||
if b, err = appendFn(b); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := e.Tokens.appendNumber(); err != nil {
|
||||
return wrapSyntacticError(e, err, pos, +1)
|
||||
}
|
||||
default:
|
||||
panic("BUG: invalid kind")
|
||||
}
|
||||
|
||||
// Finish off the buffer and store it back into e.
|
||||
e.Buf = b
|
||||
if e.NeedFlush() {
|
||||
return e.Flush()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteValue writes the next raw value and advances the internal write offset.
|
||||
// The Encoder does not simply copy the provided value verbatim, but
|
||||
// parses it to ensure that it is syntactically valid and reformats it
|
||||
// according to how the Encoder is configured to format whitespace and strings.
|
||||
// If [AllowInvalidUTF8] is specified, then any invalid UTF-8 is mangled
|
||||
// as the Unicode replacement character, U+FFFD.
|
||||
//
|
||||
// The provided value kind must be consistent with the JSON grammar
|
||||
// (see examples on [Encoder.WriteToken]). If the provided value is invalid,
|
||||
// then it reports a [SyntacticError] and the internal state remains unchanged.
|
||||
// The offset reported in [SyntacticError] will be relative to the
|
||||
// [Encoder.OutputOffset] plus the offset into v of any encountered syntax error.
|
||||
func (e *Encoder) WriteValue(v Value) error {
|
||||
return e.s.WriteValue(v)
|
||||
}
|
||||
func (e *encoderState) WriteValue(v Value) error {
|
||||
e.maxValue |= len(v) // bitwise OR is a fast approximation of max
|
||||
|
||||
k := v.Kind()
|
||||
b := e.Buf // use local variable to avoid mutating e in case of error
|
||||
|
||||
// Append any delimiters or optional whitespace.
|
||||
b = e.Tokens.MayAppendDelim(b, k)
|
||||
if e.Flags.Get(jsonflags.AnyWhitespace) {
|
||||
b = e.appendWhitespace(b, k)
|
||||
}
|
||||
pos := len(b) // offset before the value
|
||||
|
||||
// Append the value the output.
|
||||
var n int
|
||||
n += jsonwire.ConsumeWhitespace(v[n:])
|
||||
b, m, err := e.reformatValue(b, v[n:], e.Tokens.Depth())
|
||||
if err != nil {
|
||||
return wrapSyntacticError(e, err, pos+n+m, +1)
|
||||
}
|
||||
n += m
|
||||
n += jsonwire.ConsumeWhitespace(v[n:])
|
||||
if len(v) > n {
|
||||
err = jsonwire.NewInvalidCharacterError(v[n:], "after top-level value")
|
||||
return wrapSyntacticError(e, err, pos+n, 0)
|
||||
}
|
||||
|
||||
// Append the kind to the state machine.
|
||||
switch k {
|
||||
case 'n', 'f', 't':
|
||||
err = e.Tokens.appendLiteral()
|
||||
case '"':
|
||||
if e.Tokens.Last.NeedObjectName() {
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
if !e.Tokens.Last.isValidNamespace() {
|
||||
err = errInvalidNamespace
|
||||
break
|
||||
}
|
||||
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) {
|
||||
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
|
||||
break
|
||||
}
|
||||
}
|
||||
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
|
||||
}
|
||||
err = e.Tokens.appendString()
|
||||
case '0':
|
||||
err = e.Tokens.appendNumber()
|
||||
case '{':
|
||||
if err = e.Tokens.pushObject(); err != nil {
|
||||
break
|
||||
}
|
||||
if err = e.Tokens.popObject(); err != nil {
|
||||
panic("BUG: popObject should never fail immediately after pushObject: " + err.Error())
|
||||
}
|
||||
if e.Flags.Get(jsonflags.ReorderRawObjects) {
|
||||
mustReorderObjects(b[pos:])
|
||||
}
|
||||
case '[':
|
||||
if err = e.Tokens.pushArray(); err != nil {
|
||||
break
|
||||
}
|
||||
if err = e.Tokens.popArray(); err != nil {
|
||||
panic("BUG: popArray should never fail immediately after pushArray: " + err.Error())
|
||||
}
|
||||
if e.Flags.Get(jsonflags.ReorderRawObjects) {
|
||||
mustReorderObjects(b[pos:])
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return wrapSyntacticError(e, err, pos, +1)
|
||||
}
|
||||
|
||||
// Finish off the buffer and store it back into e.
|
||||
e.Buf = b
|
||||
if e.NeedFlush() {
|
||||
return e.Flush()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CountNextDelimWhitespace counts the number of bytes of delimiter and
|
||||
// whitespace bytes assuming the upcoming token is a JSON value.
|
||||
// This method is used for error reporting at the semantic layer.
|
||||
func (e *encoderState) CountNextDelimWhitespace() (n int) {
|
||||
const next = Kind('"') // arbitrary kind as next JSON value
|
||||
delim := e.Tokens.needDelim(next)
|
||||
if delim > 0 {
|
||||
n += len(",") | len(":")
|
||||
}
|
||||
if delim == ':' {
|
||||
if e.Flags.Get(jsonflags.SpaceAfterColon) {
|
||||
n += len(" ")
|
||||
}
|
||||
} else {
|
||||
if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) {
|
||||
n += len(" ")
|
||||
}
|
||||
if e.Flags.Get(jsonflags.Multiline) {
|
||||
if m := e.Tokens.NeedIndent(next); m > 0 {
|
||||
n += len("\n") + len(e.IndentPrefix) + (m-1)*len(e.Indent)
|
||||
}
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// appendWhitespace appends whitespace that immediately precedes the next token.
|
||||
func (e *encoderState) appendWhitespace(b []byte, next Kind) []byte {
|
||||
if delim := e.Tokens.needDelim(next); delim == ':' {
|
||||
if e.Flags.Get(jsonflags.SpaceAfterColon) {
|
||||
b = append(b, ' ')
|
||||
}
|
||||
} else {
|
||||
if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) {
|
||||
b = append(b, ' ')
|
||||
}
|
||||
if e.Flags.Get(jsonflags.Multiline) {
|
||||
b = e.AppendIndent(b, e.Tokens.NeedIndent(next))
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// AppendIndent appends the appropriate number of indentation characters
|
||||
// for the current nested level, n.
|
||||
func (e *encoderState) AppendIndent(b []byte, n int) []byte {
|
||||
if n == 0 {
|
||||
return b
|
||||
}
|
||||
b = append(b, '\n')
|
||||
b = append(b, e.IndentPrefix...)
|
||||
for ; n > 1; n-- {
|
||||
b = append(b, e.Indent...)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// reformatValue parses a JSON value from the start of src and
|
||||
// appends it to the end of dst, reformatting whitespace and strings as needed.
|
||||
// It returns the extended dst buffer and the number of consumed input bytes.
|
||||
func (e *encoderState) reformatValue(dst []byte, src Value, depth int) ([]byte, int, error) {
|
||||
// TODO: Should this update ValueFlags as input?
|
||||
if len(src) == 0 {
|
||||
return dst, 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
switch k := Kind(src[0]).normalize(); k {
|
||||
case 'n':
|
||||
if jsonwire.ConsumeNull(src) == 0 {
|
||||
n, err := jsonwire.ConsumeLiteral(src, "null")
|
||||
return dst, n, err
|
||||
}
|
||||
return append(dst, "null"...), len("null"), nil
|
||||
case 'f':
|
||||
if jsonwire.ConsumeFalse(src) == 0 {
|
||||
n, err := jsonwire.ConsumeLiteral(src, "false")
|
||||
return dst, n, err
|
||||
}
|
||||
return append(dst, "false"...), len("false"), nil
|
||||
case 't':
|
||||
if jsonwire.ConsumeTrue(src) == 0 {
|
||||
n, err := jsonwire.ConsumeLiteral(src, "true")
|
||||
return dst, n, err
|
||||
}
|
||||
return append(dst, "true"...), len("true"), nil
|
||||
case '"':
|
||||
if n := jsonwire.ConsumeSimpleString(src); n > 0 {
|
||||
dst = append(dst, src[:n]...) // copy simple strings verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
return jsonwire.ReformatString(dst, src, &e.Flags)
|
||||
case '0':
|
||||
if n := jsonwire.ConsumeSimpleNumber(src); n > 0 && !e.Flags.Get(jsonflags.CanonicalizeNumbers) {
|
||||
dst = append(dst, src[:n]...) // copy simple numbers verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
return jsonwire.ReformatNumber(dst, src, &e.Flags)
|
||||
case '{':
|
||||
return e.reformatObject(dst, src, depth)
|
||||
case '[':
|
||||
return e.reformatArray(dst, src, depth)
|
||||
default:
|
||||
return dst, 0, jsonwire.NewInvalidCharacterError(src, "at start of value")
|
||||
}
|
||||
}
|
||||
|
||||
// reformatObject parses a JSON object from the start of src and
|
||||
// appends it to the end of src, reformatting whitespace and strings as needed.
|
||||
// It returns the extended dst buffer and the number of consumed input bytes.
|
||||
func (e *encoderState) reformatObject(dst []byte, src Value, depth int) ([]byte, int, error) {
|
||||
// Append object begin.
|
||||
if len(src) == 0 || src[0] != '{' {
|
||||
panic("BUG: reformatObject must be called with a buffer that starts with '{'")
|
||||
} else if depth == maxNestingDepth+1 {
|
||||
return dst, 0, errMaxDepth
|
||||
}
|
||||
dst = append(dst, '{')
|
||||
n := len("{")
|
||||
|
||||
// Append (possible) object end.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, io.ErrUnexpectedEOF
|
||||
}
|
||||
if src[n] == '}' {
|
||||
dst = append(dst, '}')
|
||||
n += len("}")
|
||||
return dst, n, nil
|
||||
}
|
||||
|
||||
var err error
|
||||
var names *objectNamespace
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
e.Namespaces.push()
|
||||
defer e.Namespaces.pop()
|
||||
names = e.Namespaces.Last()
|
||||
}
|
||||
depth++
|
||||
for {
|
||||
// Append optional newline and indentation.
|
||||
if e.Flags.Get(jsonflags.Multiline) {
|
||||
dst = e.AppendIndent(dst, depth)
|
||||
}
|
||||
|
||||
// Append object name.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, io.ErrUnexpectedEOF
|
||||
}
|
||||
m := jsonwire.ConsumeSimpleString(src[n:])
|
||||
isVerbatim := m > 0
|
||||
if isVerbatim {
|
||||
dst = append(dst, src[n:n+m]...)
|
||||
} else {
|
||||
dst, m, err = jsonwire.ReformatString(dst, src[n:], &e.Flags)
|
||||
if err != nil {
|
||||
return dst, n + m, err
|
||||
}
|
||||
}
|
||||
quotedName := src[n : n+m]
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(quotedName, isVerbatim) {
|
||||
return dst, n, wrapWithObjectName(ErrDuplicateName, quotedName)
|
||||
}
|
||||
n += m
|
||||
|
||||
// Append colon.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, wrapWithObjectName(io.ErrUnexpectedEOF, quotedName)
|
||||
}
|
||||
if src[n] != ':' {
|
||||
err = jsonwire.NewInvalidCharacterError(src[n:], "after object name (expecting ':')")
|
||||
return dst, n, wrapWithObjectName(err, quotedName)
|
||||
}
|
||||
dst = append(dst, ':')
|
||||
n += len(":")
|
||||
if e.Flags.Get(jsonflags.SpaceAfterColon) {
|
||||
dst = append(dst, ' ')
|
||||
}
|
||||
|
||||
// Append object value.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, wrapWithObjectName(io.ErrUnexpectedEOF, quotedName)
|
||||
}
|
||||
dst, m, err = e.reformatValue(dst, src[n:], depth)
|
||||
if err != nil {
|
||||
return dst, n + m, wrapWithObjectName(err, quotedName)
|
||||
}
|
||||
n += m
|
||||
|
||||
// Append comma or object end.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, io.ErrUnexpectedEOF
|
||||
}
|
||||
switch src[n] {
|
||||
case ',':
|
||||
dst = append(dst, ',')
|
||||
if e.Flags.Get(jsonflags.SpaceAfterComma) {
|
||||
dst = append(dst, ' ')
|
||||
}
|
||||
n += len(",")
|
||||
continue
|
||||
case '}':
|
||||
if e.Flags.Get(jsonflags.Multiline) {
|
||||
dst = e.AppendIndent(dst, depth-1)
|
||||
}
|
||||
dst = append(dst, '}')
|
||||
n += len("}")
|
||||
return dst, n, nil
|
||||
default:
|
||||
return dst, n, jsonwire.NewInvalidCharacterError(src[n:], "after object value (expecting ',' or '}')")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reformatArray parses a JSON array from the start of src and
|
||||
// appends it to the end of dst, reformatting whitespace and strings as needed.
|
||||
// It returns the extended dst buffer and the number of consumed input bytes.
|
||||
func (e *encoderState) reformatArray(dst []byte, src Value, depth int) ([]byte, int, error) {
|
||||
// Append array begin.
|
||||
if len(src) == 0 || src[0] != '[' {
|
||||
panic("BUG: reformatArray must be called with a buffer that starts with '['")
|
||||
} else if depth == maxNestingDepth+1 {
|
||||
return dst, 0, errMaxDepth
|
||||
}
|
||||
dst = append(dst, '[')
|
||||
n := len("[")
|
||||
|
||||
// Append (possible) array end.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, io.ErrUnexpectedEOF
|
||||
}
|
||||
if src[n] == ']' {
|
||||
dst = append(dst, ']')
|
||||
n += len("]")
|
||||
return dst, n, nil
|
||||
}
|
||||
|
||||
var idx int64
|
||||
var err error
|
||||
depth++
|
||||
for {
|
||||
// Append optional newline and indentation.
|
||||
if e.Flags.Get(jsonflags.Multiline) {
|
||||
dst = e.AppendIndent(dst, depth)
|
||||
}
|
||||
|
||||
// Append array value.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, io.ErrUnexpectedEOF
|
||||
}
|
||||
var m int
|
||||
dst, m, err = e.reformatValue(dst, src[n:], depth)
|
||||
if err != nil {
|
||||
return dst, n + m, wrapWithArrayIndex(err, idx)
|
||||
}
|
||||
n += m
|
||||
|
||||
// Append comma or array end.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, io.ErrUnexpectedEOF
|
||||
}
|
||||
switch src[n] {
|
||||
case ',':
|
||||
dst = append(dst, ',')
|
||||
if e.Flags.Get(jsonflags.SpaceAfterComma) {
|
||||
dst = append(dst, ' ')
|
||||
}
|
||||
n += len(",")
|
||||
idx++
|
||||
continue
|
||||
case ']':
|
||||
if e.Flags.Get(jsonflags.Multiline) {
|
||||
dst = e.AppendIndent(dst, depth-1)
|
||||
}
|
||||
dst = append(dst, ']')
|
||||
n += len("]")
|
||||
return dst, n, nil
|
||||
default:
|
||||
return dst, n, jsonwire.NewInvalidCharacterError(src[n:], "after array value (expecting ',' or ']')")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// OutputOffset returns the current output byte offset. It gives the location
|
||||
// of the next byte immediately after the most recently written token or value.
|
||||
// The number of bytes actually written to the underlying [io.Writer] may be less
|
||||
// than this offset due to internal buffering effects.
|
||||
func (e *Encoder) OutputOffset() int64 {
|
||||
return e.s.previousOffsetEnd()
|
||||
}
|
||||
|
||||
// AvailableBuffer returns a zero-length buffer with a possible non-zero capacity.
|
||||
// This buffer is intended to be used to populate a [Value]
|
||||
// being passed to an immediately succeeding [Encoder.WriteValue] call.
|
||||
//
|
||||
// Example usage:
|
||||
//
|
||||
// b := d.AvailableBuffer()
|
||||
// b = append(b, '"')
|
||||
// b = appendString(b, v) // append the string formatting of v
|
||||
// b = append(b, '"')
|
||||
// ... := d.WriteValue(b)
|
||||
//
|
||||
// It is the user's responsibility to ensure that the value is valid JSON.
|
||||
func (e *Encoder) AvailableBuffer() []byte {
|
||||
// NOTE: We don't return e.buf[len(e.buf):cap(e.buf)] since WriteValue would
|
||||
// need to take special care to avoid mangling the data while reformatting.
|
||||
// WriteValue can't easily identify whether the input Value aliases e.buf
|
||||
// without using unsafe.Pointer. Thus, we just return a different buffer.
|
||||
// Should this ever alias e.buf, we need to consider how it operates with
|
||||
// the specialized performance optimization for bytes.Buffer.
|
||||
n := 1 << bits.Len(uint(e.s.maxValue|63)) // fast approximation for max length
|
||||
if cap(e.s.availBuffer) < n {
|
||||
e.s.availBuffer = make([]byte, 0, n)
|
||||
}
|
||||
return e.s.availBuffer
|
||||
}
|
||||
|
||||
// StackDepth returns the depth of the state machine for written JSON data.
|
||||
// Each level on the stack represents a nested JSON object or array.
|
||||
// It is incremented whenever an [BeginObject] or [BeginArray] token is encountered
|
||||
// and decremented whenever an [EndObject] or [EndArray] token is encountered.
|
||||
// The depth is zero-indexed, where zero represents the top-level JSON value.
|
||||
func (e *Encoder) StackDepth() int {
|
||||
// NOTE: Keep in sync with Decoder.StackDepth.
|
||||
return e.s.Tokens.Depth() - 1
|
||||
}
|
||||
|
||||
// StackIndex returns information about the specified stack level.
|
||||
// It must be a number between 0 and [Encoder.StackDepth], inclusive.
|
||||
// For each level, it reports the kind:
|
||||
//
|
||||
// - 0 for a level of zero,
|
||||
// - '{' for a level representing a JSON object, and
|
||||
// - '[' for a level representing a JSON array.
|
||||
//
|
||||
// It also reports the length of that JSON object or array.
|
||||
// Each name and value in a JSON object is counted separately,
|
||||
// so the effective number of members would be half the length.
|
||||
// A complete JSON object must have an even length.
|
||||
func (e *Encoder) StackIndex(i int) (Kind, int64) {
|
||||
// NOTE: Keep in sync with Decoder.StackIndex.
|
||||
switch s := e.s.Tokens.index(i); {
|
||||
case i > 0 && s.isObject():
|
||||
return '{', s.Length()
|
||||
case i > 0 && s.isArray():
|
||||
return '[', s.Length()
|
||||
default:
|
||||
return 0, s.Length()
|
||||
}
|
||||
}
|
||||
|
||||
// StackPointer returns a JSON Pointer (RFC 6901) to the most recently written value.
|
||||
func (e *Encoder) StackPointer() Pointer {
|
||||
return Pointer(e.s.AppendStackPointer(nil, -1))
|
||||
}
|
||||
|
||||
func (e *encoderState) AppendStackPointer(b []byte, where int) []byte {
|
||||
e.Names.copyQuotedBuffer(e.Buf)
|
||||
return e.state.appendStackPointer(b, where)
|
||||
}
|
||||
737
pkg/encoders/json/jsontext/encode_test.go
Normal file
737
pkg/encoders/json/jsontext/encode_test.go
Normal file
@@ -0,0 +1,737 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"io"
|
||||
"path"
|
||||
"slices"
|
||||
"testing"
|
||||
|
||||
"encoding/json/internal/jsonflags"
|
||||
"encoding/json/internal/jsontest"
|
||||
"encoding/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
// TestEncoder tests whether we can produce JSON with either tokens or raw values.
|
||||
func TestEncoder(t *testing.T) {
|
||||
for _, td := range coderTestdata {
|
||||
for _, formatName := range []string{"Compact", "Indented"} {
|
||||
for _, typeName := range []string{"Token", "Value", "TokenDelims"} {
|
||||
t.Run(path.Join(td.name.Name, typeName, formatName), func(t *testing.T) {
|
||||
testEncoder(t, td.name.Where, formatName, typeName, td)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
func testEncoder(t *testing.T, where jsontest.CasePos, formatName, typeName string, td coderTestdataEntry) {
|
||||
var want string
|
||||
var opts []Options
|
||||
dst := new(bytes.Buffer)
|
||||
opts = append(opts, jsonflags.OmitTopLevelNewline|1)
|
||||
want = td.outCompacted
|
||||
switch formatName {
|
||||
case "Indented":
|
||||
opts = append(opts, Multiline(true))
|
||||
opts = append(opts, WithIndentPrefix("\t"))
|
||||
opts = append(opts, WithIndent(" "))
|
||||
if td.outIndented != "" {
|
||||
want = td.outIndented
|
||||
}
|
||||
}
|
||||
enc := NewEncoder(dst, opts...)
|
||||
|
||||
switch typeName {
|
||||
case "Token":
|
||||
var pointers []Pointer
|
||||
for _, tok := range td.tokens {
|
||||
if err := enc.WriteToken(tok); err != nil {
|
||||
t.Fatalf("%s: Encoder.WriteToken error: %v", where, err)
|
||||
}
|
||||
if td.pointers != nil {
|
||||
pointers = append(pointers, enc.StackPointer())
|
||||
}
|
||||
}
|
||||
if !slices.Equal(pointers, td.pointers) {
|
||||
t.Fatalf("%s: pointers mismatch:\ngot %q\nwant %q", where, pointers, td.pointers)
|
||||
}
|
||||
case "Value":
|
||||
if err := enc.WriteValue(Value(td.in)); err != nil {
|
||||
t.Fatalf("%s: Encoder.WriteValue error: %v", where, err)
|
||||
}
|
||||
case "TokenDelims":
|
||||
// Use WriteToken for object/array delimiters, WriteValue otherwise.
|
||||
for _, tok := range td.tokens {
|
||||
switch tok.Kind() {
|
||||
case '{', '}', '[', ']':
|
||||
if err := enc.WriteToken(tok); err != nil {
|
||||
t.Fatalf("%s: Encoder.WriteToken error: %v", where, err)
|
||||
}
|
||||
default:
|
||||
val := Value(tok.String())
|
||||
if tok.Kind() == '"' {
|
||||
val, _ = jsonwire.AppendQuote(nil, tok.String(), &jsonflags.Flags{})
|
||||
}
|
||||
if err := enc.WriteValue(val); err != nil {
|
||||
t.Fatalf("%s: Encoder.WriteValue error: %v", where, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
got := dst.String()
|
||||
if got != want {
|
||||
t.Errorf("%s: output mismatch:\ngot %q\nwant %q", where, got, want)
|
||||
}
|
||||
}
|
||||
|
||||
// TestFaultyEncoder tests that temporary I/O errors are not fatal.
|
||||
func TestFaultyEncoder(t *testing.T) {
|
||||
for _, td := range coderTestdata {
|
||||
for _, typeName := range []string{"Token", "Value"} {
|
||||
t.Run(path.Join(td.name.Name, typeName), func(t *testing.T) {
|
||||
testFaultyEncoder(t, td.name.Where, typeName, td)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
func testFaultyEncoder(t *testing.T, where jsontest.CasePos, typeName string, td coderTestdataEntry) {
|
||||
b := &FaultyBuffer{
|
||||
MaxBytes: 1,
|
||||
MayError: io.ErrShortWrite,
|
||||
}
|
||||
|
||||
// Write all the tokens.
|
||||
// Even if the underlying io.Writer may be faulty,
|
||||
// writing a valid token or value is guaranteed to at least
|
||||
// be appended to the internal buffer.
|
||||
// In other words, syntactic errors occur before I/O errors.
|
||||
enc := NewEncoder(b)
|
||||
switch typeName {
|
||||
case "Token":
|
||||
for i, tok := range td.tokens {
|
||||
err := enc.WriteToken(tok)
|
||||
if err != nil && !errors.Is(err, io.ErrShortWrite) {
|
||||
t.Fatalf("%s: %d: Encoder.WriteToken error: %v", where, i, err)
|
||||
}
|
||||
}
|
||||
case "Value":
|
||||
err := enc.WriteValue(Value(td.in))
|
||||
if err != nil && !errors.Is(err, io.ErrShortWrite) {
|
||||
t.Fatalf("%s: Encoder.WriteValue error: %v", where, err)
|
||||
}
|
||||
}
|
||||
gotOutput := string(append(b.B, enc.s.unflushedBuffer()...))
|
||||
wantOutput := td.outCompacted + "\n"
|
||||
if gotOutput != wantOutput {
|
||||
t.Fatalf("%s: output mismatch:\ngot %s\nwant %s", where, gotOutput, wantOutput)
|
||||
}
|
||||
}
|
||||
|
||||
type encoderMethodCall struct {
|
||||
in tokOrVal
|
||||
wantErr error
|
||||
wantPointer Pointer
|
||||
}
|
||||
|
||||
var encoderErrorTestdata = []struct {
|
||||
name jsontest.CaseName
|
||||
opts []Options
|
||||
calls []encoderMethodCall
|
||||
wantOut string
|
||||
}{{
|
||||
name: jsontest.Name("InvalidToken"),
|
||||
calls: []encoderMethodCall{
|
||||
{zeroToken, E(errInvalidToken), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("InvalidValue"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`#`), newInvalidCharacterError("#", "at start of value"), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("InvalidValue/DoubleZero"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`00`), newInvalidCharacterError("0", "after top-level value").withPos(`0`, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedValue"),
|
||||
calls: []encoderMethodCall{
|
||||
{zeroValue, E(io.ErrUnexpectedEOF).withPos("", ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedNull"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`nul`), E(io.ErrUnexpectedEOF).withPos("nul", ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("InvalidNull"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`nulL`), newInvalidCharacterError("L", "in literal null (expecting 'l')").withPos(`nul`, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedFalse"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`fals`), E(io.ErrUnexpectedEOF).withPos("fals", ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("InvalidFalse"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`falsE`), newInvalidCharacterError("E", "in literal false (expecting 'e')").withPos(`fals`, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedTrue"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`tru`), E(io.ErrUnexpectedEOF).withPos(`tru`, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("InvalidTrue"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`truE`), newInvalidCharacterError("E", "in literal true (expecting 'e')").withPos(`tru`, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedString"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`"star`), E(io.ErrUnexpectedEOF).withPos(`"star`, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("InvalidString"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`"ok` + "\x00"), newInvalidCharacterError("\x00", `in string (expecting non-control character)`).withPos(`"ok`, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("ValidString/AllowInvalidUTF8/Token"),
|
||||
opts: []Options{AllowInvalidUTF8(true)},
|
||||
calls: []encoderMethodCall{
|
||||
{String("living\xde\xad\xbe\xef"), nil, ""},
|
||||
},
|
||||
wantOut: "\"living\xde\xad\ufffd\ufffd\"\n",
|
||||
}, {
|
||||
name: jsontest.Name("ValidString/AllowInvalidUTF8/Value"),
|
||||
opts: []Options{AllowInvalidUTF8(true)},
|
||||
calls: []encoderMethodCall{
|
||||
{Value("\"living\xde\xad\xbe\xef\""), nil, ""},
|
||||
},
|
||||
wantOut: "\"living\xde\xad\ufffd\ufffd\"\n",
|
||||
}, {
|
||||
name: jsontest.Name("InvalidString/RejectInvalidUTF8"),
|
||||
opts: []Options{AllowInvalidUTF8(false)},
|
||||
calls: []encoderMethodCall{
|
||||
{String("living\xde\xad\xbe\xef"), E(jsonwire.ErrInvalidUTF8), ""},
|
||||
{Value("\"living\xde\xad\xbe\xef\""), E(jsonwire.ErrInvalidUTF8).withPos("\"living\xde\xad", ""), ""},
|
||||
{BeginObject, nil, ""},
|
||||
{String("name"), nil, ""},
|
||||
{BeginArray, nil, ""},
|
||||
{String("living\xde\xad\xbe\xef"), E(jsonwire.ErrInvalidUTF8).withPos(`{"name":[`, "/name/0"), ""},
|
||||
{Value("\"living\xde\xad\xbe\xef\""), E(jsonwire.ErrInvalidUTF8).withPos("{\"name\":[\"living\xde\xad", "/name/0"), ""},
|
||||
},
|
||||
wantOut: `{"name":[`,
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedNumber"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`0.`), E(io.ErrUnexpectedEOF).withPos("0", ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("InvalidNumber"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`0.e`), newInvalidCharacterError("e", "in number (expecting digit)").withPos(`0.`, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedObject/AfterStart"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`{`), E(io.ErrUnexpectedEOF).withPos("{", ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedObject/AfterName"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`{"X"`), E(io.ErrUnexpectedEOF).withPos(`{"X"`, "/X"), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedObject/AfterColon"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`{"X":`), E(io.ErrUnexpectedEOF).withPos(`{"X":`, "/X"), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedObject/AfterValue"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`{"0":0`), E(io.ErrUnexpectedEOF).withPos(`{"0":0`, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedObject/AfterComma"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`{"0":0,`), E(io.ErrUnexpectedEOF).withPos(`{"0":0,`, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("InvalidObject/MissingColon"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` { "fizz" "buzz" } `), newInvalidCharacterError("\"", "after object name (expecting ':')").withPos(` { "fizz" `, "/fizz"), ""},
|
||||
{Value(` { "fizz" , "buzz" } `), newInvalidCharacterError(",", "after object name (expecting ':')").withPos(` { "fizz" `, "/fizz"), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("InvalidObject/MissingComma"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` { "fizz" : "buzz" "gazz" } `), newInvalidCharacterError("\"", "after object value (expecting ',' or '}')").withPos(` { "fizz" : "buzz" `, ""), ""},
|
||||
{Value(` { "fizz" : "buzz" : "gazz" } `), newInvalidCharacterError(":", "after object value (expecting ',' or '}')").withPos(` { "fizz" : "buzz" `, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("InvalidObject/ExtraComma"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` { , } `), newInvalidCharacterError(",", `at start of string (expecting '"')`).withPos(` { `, ""), ""},
|
||||
{Value(` { "fizz" : "buzz" , } `), newInvalidCharacterError("}", `at start of string (expecting '"')`).withPos(` { "fizz" : "buzz" , `, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("InvalidObject/InvalidName"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`{ null }`), newInvalidCharacterError("n", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
|
||||
{Value(`{ false }`), newInvalidCharacterError("f", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
|
||||
{Value(`{ true }`), newInvalidCharacterError("t", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
|
||||
{Value(`{ 0 }`), newInvalidCharacterError("0", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
|
||||
{Value(`{ {} }`), newInvalidCharacterError("{", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
|
||||
{Value(`{ [] }`), newInvalidCharacterError("[", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
|
||||
{BeginObject, nil, ""},
|
||||
{Null, E(ErrNonStringName).withPos(`{`, ""), ""},
|
||||
{Value(`null`), E(ErrNonStringName).withPos(`{`, ""), ""},
|
||||
{False, E(ErrNonStringName).withPos(`{`, ""), ""},
|
||||
{Value(`false`), E(ErrNonStringName).withPos(`{`, ""), ""},
|
||||
{True, E(ErrNonStringName).withPos(`{`, ""), ""},
|
||||
{Value(`true`), E(ErrNonStringName).withPos(`{`, ""), ""},
|
||||
{Uint(0), E(ErrNonStringName).withPos(`{`, ""), ""},
|
||||
{Value(`0`), E(ErrNonStringName).withPos(`{`, ""), ""},
|
||||
{BeginObject, E(ErrNonStringName).withPos(`{`, ""), ""},
|
||||
{Value(`{}`), E(ErrNonStringName).withPos(`{`, ""), ""},
|
||||
{BeginArray, E(ErrNonStringName).withPos(`{`, ""), ""},
|
||||
{Value(`[]`), E(ErrNonStringName).withPos(`{`, ""), ""},
|
||||
{EndObject, nil, ""},
|
||||
},
|
||||
wantOut: "{}\n",
|
||||
}, {
|
||||
name: jsontest.Name("InvalidObject/InvalidValue"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`{ "0": x }`), newInvalidCharacterError("x", `at start of value`).withPos(`{ "0": `, "/0"), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("InvalidObject/MismatchingDelim"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` { ] `), newInvalidCharacterError("]", `at start of string (expecting '"')`).withPos(` { `, ""), ""},
|
||||
{Value(` { "0":0 ] `), newInvalidCharacterError("]", `after object value (expecting ',' or '}')`).withPos(` { "0":0 `, ""), ""},
|
||||
{BeginObject, nil, ""},
|
||||
{EndArray, E(errMismatchDelim).withPos(`{`, ""), ""},
|
||||
{Value(`]`), newInvalidCharacterError("]", "at start of value").withPos(`{`, ""), ""},
|
||||
{EndObject, nil, ""},
|
||||
},
|
||||
wantOut: "{}\n",
|
||||
}, {
|
||||
name: jsontest.Name("ValidObject/UniqueNames"),
|
||||
calls: []encoderMethodCall{
|
||||
{BeginObject, nil, ""},
|
||||
{String("0"), nil, ""},
|
||||
{Uint(0), nil, ""},
|
||||
{String("1"), nil, ""},
|
||||
{Uint(1), nil, ""},
|
||||
{EndObject, nil, ""},
|
||||
{Value(` { "0" : 0 , "1" : 1 } `), nil, ""},
|
||||
},
|
||||
wantOut: `{"0":0,"1":1}` + "\n" + `{"0":0,"1":1}` + "\n",
|
||||
}, {
|
||||
name: jsontest.Name("ValidObject/DuplicateNames"),
|
||||
opts: []Options{AllowDuplicateNames(true)},
|
||||
calls: []encoderMethodCall{
|
||||
{BeginObject, nil, ""},
|
||||
{String("0"), nil, ""},
|
||||
{Uint(0), nil, ""},
|
||||
{String("0"), nil, ""},
|
||||
{Uint(0), nil, ""},
|
||||
{EndObject, nil, ""},
|
||||
{Value(` { "0" : 0 , "0" : 0 } `), nil, ""},
|
||||
},
|
||||
wantOut: `{"0":0,"0":0}` + "\n" + `{"0":0,"0":0}` + "\n",
|
||||
}, {
|
||||
name: jsontest.Name("InvalidObject/DuplicateNames"),
|
||||
calls: []encoderMethodCall{
|
||||
{BeginObject, nil, ""},
|
||||
{String("X"), nil, ""},
|
||||
{BeginObject, nil, ""},
|
||||
{EndObject, nil, ""},
|
||||
{String("X"), E(ErrDuplicateName).withPos(`{"X":{},`, "/X"), "/X"},
|
||||
{Value(`"X"`), E(ErrDuplicateName).withPos(`{"X":{},`, "/X"), "/X"},
|
||||
{String("Y"), nil, ""},
|
||||
{BeginObject, nil, ""},
|
||||
{EndObject, nil, ""},
|
||||
{String("X"), E(ErrDuplicateName).withPos(`{"X":{},"Y":{},`, "/X"), "/Y"},
|
||||
{Value(`"X"`), E(ErrDuplicateName).withPos(`{"X":{},"Y":{},`, "/X"), "/Y"},
|
||||
{String("Y"), E(ErrDuplicateName).withPos(`{"X":{},"Y":{},`, "/Y"), "/Y"},
|
||||
{Value(`"Y"`), E(ErrDuplicateName).withPos(`{"X":{},"Y":{},`, "/Y"), "/Y"},
|
||||
{EndObject, nil, ""},
|
||||
{Value(` { "X" : 0 , "Y" : 1 , "X" : 0 } `), E(ErrDuplicateName).withPos(`{"X":{},"Y":{}}`+"\n"+` { "X" : 0 , "Y" : 1 , `, "/X"), ""},
|
||||
},
|
||||
wantOut: `{"X":{},"Y":{}}` + "\n",
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedArray/AfterStart"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`[`), E(io.ErrUnexpectedEOF).withPos(`[`, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedArray/AfterValue"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`[0`), E(io.ErrUnexpectedEOF).withPos(`[0`, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedArray/AfterComma"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`[0,`), E(io.ErrUnexpectedEOF).withPos(`[0,`, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("TruncatedArray/MissingComma"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` [ "fizz" "buzz" ] `), newInvalidCharacterError("\"", "after array value (expecting ',' or ']')").withPos(` [ "fizz" `, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("InvalidArray/MismatchingDelim"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` [ } `), newInvalidCharacterError("}", `at start of value`).withPos(` [ `, "/0"), ""},
|
||||
{BeginArray, nil, ""},
|
||||
{EndObject, E(errMismatchDelim).withPos(`[`, "/0"), ""},
|
||||
{Value(`}`), newInvalidCharacterError("}", "at start of value").withPos(`[`, "/0"), ""},
|
||||
{EndArray, nil, ""},
|
||||
},
|
||||
wantOut: "[]\n",
|
||||
}, {
|
||||
name: jsontest.Name("Format/Object/SpaceAfterColon"),
|
||||
opts: []Options{SpaceAfterColon(true)},
|
||||
calls: []encoderMethodCall{{Value(`{"fizz":"buzz","wizz":"wuzz"}`), nil, ""}},
|
||||
wantOut: "{\"fizz\": \"buzz\",\"wizz\": \"wuzz\"}\n",
|
||||
}, {
|
||||
name: jsontest.Name("Format/Object/SpaceAfterComma"),
|
||||
opts: []Options{SpaceAfterComma(true)},
|
||||
calls: []encoderMethodCall{{Value(`{"fizz":"buzz","wizz":"wuzz"}`), nil, ""}},
|
||||
wantOut: "{\"fizz\":\"buzz\", \"wizz\":\"wuzz\"}\n",
|
||||
}, {
|
||||
name: jsontest.Name("Format/Object/SpaceAfterColonAndComma"),
|
||||
opts: []Options{SpaceAfterColon(true), SpaceAfterComma(true)},
|
||||
calls: []encoderMethodCall{{Value(`{"fizz":"buzz","wizz":"wuzz"}`), nil, ""}},
|
||||
wantOut: "{\"fizz\": \"buzz\", \"wizz\": \"wuzz\"}\n",
|
||||
}, {
|
||||
name: jsontest.Name("Format/Object/NoSpaceAfterColon+SpaceAfterComma+Multiline"),
|
||||
opts: []Options{SpaceAfterColon(false), SpaceAfterComma(true), Multiline(true)},
|
||||
calls: []encoderMethodCall{{Value(`{"fizz":"buzz","wizz":"wuzz"}`), nil, ""}},
|
||||
wantOut: "{\n\t\"fizz\":\"buzz\", \n\t\"wizz\":\"wuzz\"\n}\n",
|
||||
}, {
|
||||
name: jsontest.Name("Format/Array/SpaceAfterComma"),
|
||||
opts: []Options{SpaceAfterComma(true)},
|
||||
calls: []encoderMethodCall{{Value(`["fizz","buzz"]`), nil, ""}},
|
||||
wantOut: "[\"fizz\", \"buzz\"]\n",
|
||||
}, {
|
||||
name: jsontest.Name("Format/Array/NoSpaceAfterComma+Multiline"),
|
||||
opts: []Options{SpaceAfterComma(false), Multiline(true)},
|
||||
calls: []encoderMethodCall{{Value(`["fizz","buzz"]`), nil, ""}},
|
||||
wantOut: "[\n\t\"fizz\",\n\t\"buzz\"\n]\n",
|
||||
}, {
|
||||
name: jsontest.Name("Format/ReorderWithWhitespace"),
|
||||
opts: []Options{
|
||||
AllowDuplicateNames(true),
|
||||
AllowInvalidUTF8(true),
|
||||
ReorderRawObjects(true),
|
||||
SpaceAfterComma(true),
|
||||
SpaceAfterColon(false),
|
||||
Multiline(true),
|
||||
WithIndentPrefix(" "),
|
||||
WithIndent("\t"),
|
||||
PreserveRawStrings(true),
|
||||
},
|
||||
calls: []encoderMethodCall{
|
||||
{BeginArray, nil, ""},
|
||||
{BeginArray, nil, ""},
|
||||
{Value(` { "fizz" : "buzz" ,
|
||||
"zip" : {
|
||||
"x` + "\xfd" + `x" : 123 , "x` + "\xff" + `x" : 123, "x` + "\xfe" + `x" : 123
|
||||
},
|
||||
"zap" : {
|
||||
"xxx" : 333, "xxx": 1, "xxx": 22
|
||||
},
|
||||
"alpha" : "bravo" } `), nil, ""},
|
||||
{EndArray, nil, ""},
|
||||
{EndArray, nil, ""},
|
||||
},
|
||||
wantOut: "[\n \t[\n \t\t{\n \t\t\t\"alpha\":\"bravo\", \n \t\t\t\"fizz\":\"buzz\", \n \t\t\t\"zap\":{\n \t\t\t\t\"xxx\":1, \n \t\t\t\t\"xxx\":22, \n \t\t\t\t\"xxx\":333\n \t\t\t}, \n \t\t\t\"zip\":{\n \t\t\t\t\"x\xfdx\":123, \n \t\t\t\t\"x\xfex\":123, \n \t\t\t\t\"x\xffx\":123\n \t\t\t}\n \t\t}\n \t]\n ]\n",
|
||||
}, {
|
||||
name: jsontest.Name("Format/CanonicalizeRawInts"),
|
||||
opts: []Options{CanonicalizeRawInts(true), SpaceAfterComma(true)},
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`[0.100,5.0,1E6,-9223372036854775808,-10,-1,-0,0,1,10,9223372036854775807]`), nil, ""},
|
||||
},
|
||||
wantOut: "[0.100, 5.0, 1E6, -9223372036854776000, -10, -1, 0, 0, 1, 10, 9223372036854776000]\n",
|
||||
}, {
|
||||
name: jsontest.Name("Format/CanonicalizeRawFloats"),
|
||||
opts: []Options{CanonicalizeRawFloats(true), SpaceAfterComma(true)},
|
||||
calls: []encoderMethodCall{
|
||||
{Value(`[0.100,5.0,1E6,-9223372036854775808,-10,-1,-0,0,1,10,9223372036854775807]`), nil, ""},
|
||||
},
|
||||
wantOut: "[0.1, 5, 1000000, -9223372036854775808, -10, -1, 0, 0, 1, 10, 9223372036854775807]\n",
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` "a` + "\xff" + `0" `), E(jsonwire.ErrInvalidUTF8).withPos(` "a`, ""), ""},
|
||||
{String(`a` + "\xff" + `0`), E(jsonwire.ErrInvalidUTF8).withPos(``, ""), ""},
|
||||
},
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/0"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` [ "a` + "\xff" + `1" ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a`, "/0"), ""},
|
||||
{BeginArray, nil, ""},
|
||||
{Value(` "a` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`[ "a`, "/0"), ""},
|
||||
{String(`a` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`[`, "/0"), ""},
|
||||
},
|
||||
wantOut: `[`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/1"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` [ "a1" , "b` + "\xff" + `1" ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a1" , "b`, "/1"), ""},
|
||||
{BeginArray, nil, ""},
|
||||
{String("a1"), nil, ""},
|
||||
{Value(` "b` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1", "b`, "/1"), ""},
|
||||
{String(`b` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",`, "/1"), ""},
|
||||
},
|
||||
wantOut: `["a1"`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/0/0"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` [ [ "a` + "\xff" + `2" ] ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ [ "a`, "/0/0"), ""},
|
||||
{BeginArray, nil, ""},
|
||||
{Value(` [ "a` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`[ [ "a`, "/0/0"), ""},
|
||||
{BeginArray, nil, "/0"},
|
||||
{Value(` "a` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`[[ "a`, "/0/0"), "/0"},
|
||||
{String(`a` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`[[`, "/0/0"), "/0"},
|
||||
},
|
||||
wantOut: `[[`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/1/0"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` [ "a1" , [ "a` + "\xff" + `2" ] ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a1" , [ "a`, "/1/0"), ""},
|
||||
{BeginArray, nil, ""},
|
||||
{String("a1"), nil, "/0"},
|
||||
{Value(` [ "a` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1", [ "a`, "/1/0"), ""},
|
||||
{BeginArray, nil, "/1"},
|
||||
{Value(` "a` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",[ "a`, "/1/0"), "/1"},
|
||||
{String(`a` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",[`, "/1/0"), "/1"},
|
||||
},
|
||||
wantOut: `["a1",[`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/0/1"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` [ [ "a2" , "b` + "\xff" + `2" ] ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ [ "a2" , "b`, "/0/1"), ""},
|
||||
{BeginArray, nil, ""},
|
||||
{Value(` [ "a2" , "b` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`[ [ "a2" , "b`, "/0/1"), ""},
|
||||
{BeginArray, nil, "/0"},
|
||||
{String("a2"), nil, "/0/0"},
|
||||
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`[["a2", "b`, "/0/1"), "/0/0"},
|
||||
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`[["a2",`, "/0/1"), "/0/0"},
|
||||
},
|
||||
wantOut: `[["a2"`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/1/1"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` [ "a1" , [ "a2" , "b` + "\xff" + `2" ] ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a1" , [ "a2" , "b`, "/1/1"), ""},
|
||||
{BeginArray, nil, ""},
|
||||
{String("a1"), nil, "/0"},
|
||||
{Value(` [ "a2" , "b` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1", [ "a2" , "b`, "/1/1"), ""},
|
||||
{BeginArray, nil, "/1"},
|
||||
{String("a2"), nil, "/1/0"},
|
||||
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",["a2", "b`, "/1/1"), "/1/0"},
|
||||
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",["a2",`, "/1/1"), "/1/0"},
|
||||
},
|
||||
wantOut: `["a1",["a2"`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/a1-"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` { "a` + "\xff" + `1" : "b1" } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a`, ""), ""},
|
||||
{BeginObject, nil, ""},
|
||||
{Value(` "a` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`{ "a`, ""), ""},
|
||||
{String(`a` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`{`, ""), ""},
|
||||
},
|
||||
wantOut: `{`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/a1"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` { "a1" : "b` + "\xff" + `1" } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : "b`, "/a1"), ""},
|
||||
{BeginObject, nil, ""},
|
||||
{String("a1"), nil, "/a1"},
|
||||
{Value(` "b` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1": "b`, "/a1"), ""},
|
||||
{String(`b` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":`, "/a1"), ""},
|
||||
},
|
||||
wantOut: `{"a1"`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/c1-"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` { "a1" : "b1" , "c` + "\xff" + `1" : "d1" } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : "b1" , "c`, ""), ""},
|
||||
{BeginObject, nil, ""},
|
||||
{String("a1"), nil, "/a1"},
|
||||
{String("b1"), nil, "/a1"},
|
||||
{Value(` "c` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1": "c`, ""), "/a1"},
|
||||
{String(`c` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1":`, ""), "/a1"},
|
||||
},
|
||||
wantOut: `{"a1":"b1"`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/c1"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` { "a1" : "b1" , "c1" : "d` + "\xff" + `1" } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : "b1" , "c1" : "d`, "/c1"), ""},
|
||||
{BeginObject, nil, ""},
|
||||
{String("a1"), nil, "/a1"},
|
||||
{String("b1"), nil, "/a1"},
|
||||
{String("c1"), nil, "/c1"},
|
||||
{Value(` "d` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1":"c1": "d`, "/c1"), "/c1"},
|
||||
{String(`d` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1":"c1":`, "/c1"), "/c1"},
|
||||
},
|
||||
wantOut: `{"a1":"b1","c1"`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/a1/a2-"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` { "a1" : { "a` + "\xff" + `2" : "b2" } } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : { "a`, "/a1"), ""},
|
||||
{BeginObject, nil, ""},
|
||||
{String("a1"), nil, "/a1"},
|
||||
{Value(` { "a` + "\xff" + `2" : "b2" } `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1": { "a`, "/a1"), ""},
|
||||
{BeginObject, nil, "/a1"},
|
||||
{Value(` "a` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{ "a`, "/a1"), "/a1"},
|
||||
{String(`a` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{`, "/a1"), "/a1"},
|
||||
},
|
||||
wantOut: `{"a1":{`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/a1/a2"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` { "a1" : { "a2" : "b` + "\xff" + `2" } } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : { "a2" : "b`, "/a1/a2"), ""},
|
||||
{BeginObject, nil, ""},
|
||||
{String("a1"), nil, "/a1"},
|
||||
{Value(` { "a2" : "b` + "\xff" + `2" } `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1": { "a2" : "b`, "/a1/a2"), ""},
|
||||
{BeginObject, nil, "/a1"},
|
||||
{String("a2"), nil, "/a1/a2"},
|
||||
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2": "b`, "/a1/a2"), "/a1/a2"},
|
||||
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":`, "/a1/a2"), "/a1/a2"},
|
||||
},
|
||||
wantOut: `{"a1":{"a2"`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/a1/c2-"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` { "a1" : { "a2" : "b2" , "c` + "\xff" + `2" : "d2" } } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : { "a2" : "b2" , "c`, "/a1"), ""},
|
||||
{BeginObject, nil, ""},
|
||||
{String("a1"), nil, "/a1"},
|
||||
{BeginObject, nil, "/a1"},
|
||||
{String("a2"), nil, "/a1/a2"},
|
||||
{String("b2"), nil, "/a1/a2"},
|
||||
{Value(` "c` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":"b2", "c`, "/a1"), "/a1/a2"},
|
||||
{String(`c` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":"b2",`, "/a1"), "/a1/a2"},
|
||||
},
|
||||
wantOut: `{"a1":{"a2":"b2"`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/a1/c2"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` { "a1" : { "a2" : "b2" , "c2" : "d` + "\xff" + `2" } } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : { "a2" : "b2" , "c2" : "d`, "/a1/c2"), ""},
|
||||
{BeginObject, nil, ""},
|
||||
{String("a1"), nil, "/a1"},
|
||||
{Value(` { "a2" : "b2" , "c2" : "d` + "\xff" + `2" } `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1": { "a2" : "b2" , "c2" : "d`, "/a1/c2"), ""},
|
||||
{BeginObject, nil, ""},
|
||||
{String("a2"), nil, "/a1/a2"},
|
||||
{String("b2"), nil, "/a1/a2"},
|
||||
{String("c2"), nil, "/a1/c2"},
|
||||
{Value(` "d` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":"b2","c2": "d`, "/a1/c2"), "/a1/c2"},
|
||||
{String(`d` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":"b2","c2":`, "/a1/c2"), "/a1/c2"},
|
||||
},
|
||||
wantOut: `{"a1":{"a2":"b2","c2"`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/1/a2"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` [ "a1" , { "a2" : "b` + "\xff" + `2" } ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a1" , { "a2" : "b`, "/1/a2"), ""},
|
||||
{BeginArray, nil, ""},
|
||||
{String("a1"), nil, "/0"},
|
||||
{Value(` { "a2" : "b` + "\xff" + `2" } `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1", { "a2" : "b`, "/1/a2"), ""},
|
||||
{BeginObject, nil, "/1"},
|
||||
{String("a2"), nil, "/1/a2"},
|
||||
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",{"a2": "b`, "/1/a2"), "/1/a2"},
|
||||
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",{"a2":`, "/1/a2"), "/1/a2"},
|
||||
},
|
||||
wantOut: `["a1",{"a2"`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/c1/1"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` { "a1" : "b1" , "c1" : [ "a2" , "b` + "\xff" + `2" ] } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : "b1" , "c1" : [ "a2" , "b`, "/c1/1"), ""},
|
||||
{BeginObject, nil, ""},
|
||||
{String("a1"), nil, "/a1"},
|
||||
{String("b1"), nil, "/a1"},
|
||||
{String("c1"), nil, "/c1"},
|
||||
{Value(` [ "a2" , "b` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1","c1": [ "a2" , "b`, "/c1/1"), ""},
|
||||
{BeginArray, nil, "/c1"},
|
||||
{String("a2"), nil, "/c1/0"},
|
||||
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1","c1":["a2", "b`, "/c1/1"), "/c1/0"},
|
||||
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1","c1":["a2",`, "/c1/1"), "/c1/0"},
|
||||
},
|
||||
wantOut: `{"a1":"b1","c1":["a2"`,
|
||||
}, {
|
||||
name: jsontest.Name("ErrorPosition/0/a1/1/c3/1"),
|
||||
calls: []encoderMethodCall{
|
||||
{Value(` [ { "a1" : [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b` + "\xff" + `4" ] } ] } ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ { "a1" : [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
|
||||
{BeginArray, nil, ""},
|
||||
{Value(` { "a1" : [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b` + "\xff" + `4" ] } ] } `), E(jsonwire.ErrInvalidUTF8).withPos(`[ { "a1" : [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
|
||||
{BeginObject, nil, "/0"},
|
||||
{String("a1"), nil, "/0/a1"},
|
||||
{Value(` [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b` + "\xff" + `4" ] } ] `), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1": [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
|
||||
{BeginArray, nil, ""},
|
||||
{String("a2"), nil, "/0/a1/0"},
|
||||
{Value(` { "a3" : "b3" , "c3" : [ "a4" , "b` + "\xff" + `4" ] } `), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1":["a2", { "a3" : "b3" , "c3" : [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
|
||||
{BeginObject, nil, "/0/a1/1"},
|
||||
{String("a3"), nil, "/0/a1/1/a3"},
|
||||
{String("b3"), nil, "/0/a1/1/a3"},
|
||||
{String("c3"), nil, "/0/a1/1/c3"},
|
||||
{Value(` [ "a4" , "b` + "\xff" + `4" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1":["a2",{"a3":"b3","c3": [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
|
||||
{BeginArray, nil, "/0/a1/1/c3"},
|
||||
{String("a4"), nil, "/0/a1/1/c3/0"},
|
||||
{Value(` "b` + "\xff" + `4" `), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1":["a2",{"a3":"b3","c3":["a4", "b`, "/0/a1/1/c3/1"), "/0/a1/1/c3/0"},
|
||||
{String(`b` + "\xff" + `4`), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1":["a2",{"a3":"b3","c3":["a4",`, "/0/a1/1/c3/1"), "/0/a1/1/c3/0"},
|
||||
},
|
||||
wantOut: `[{"a1":["a2",{"a3":"b3","c3":["a4"`,
|
||||
}}
|
||||
|
||||
// TestEncoderErrors test that Encoder errors occur when we expect and
|
||||
// leaves the Encoder in a consistent state.
|
||||
func TestEncoderErrors(t *testing.T) {
|
||||
for _, td := range encoderErrorTestdata {
|
||||
t.Run(path.Join(td.name.Name), func(t *testing.T) {
|
||||
testEncoderErrors(t, td.name.Where, td.opts, td.calls, td.wantOut)
|
||||
})
|
||||
}
|
||||
}
|
||||
func testEncoderErrors(t *testing.T, where jsontest.CasePos, opts []Options, calls []encoderMethodCall, wantOut string) {
|
||||
dst := new(bytes.Buffer)
|
||||
enc := NewEncoder(dst, opts...)
|
||||
for i, call := range calls {
|
||||
var gotErr error
|
||||
switch tokVal := call.in.(type) {
|
||||
case Token:
|
||||
gotErr = enc.WriteToken(tokVal)
|
||||
case Value:
|
||||
gotErr = enc.WriteValue(tokVal)
|
||||
}
|
||||
if !equalError(gotErr, call.wantErr) {
|
||||
t.Fatalf("%s: %d: error mismatch:\ngot %v\nwant %v", where, i, gotErr, call.wantErr)
|
||||
}
|
||||
if call.wantPointer != "" {
|
||||
gotPointer := enc.StackPointer()
|
||||
if gotPointer != call.wantPointer {
|
||||
t.Fatalf("%s: %d: Encoder.StackPointer = %s, want %s", where, i, gotPointer, call.wantPointer)
|
||||
}
|
||||
}
|
||||
}
|
||||
gotOut := dst.String() + string(enc.s.unflushedBuffer())
|
||||
if gotOut != wantOut {
|
||||
t.Fatalf("%s: output mismatch:\ngot %q\nwant %q", where, gotOut, wantOut)
|
||||
}
|
||||
gotOffset := int(enc.OutputOffset())
|
||||
wantOffset := len(wantOut)
|
||||
if gotOffset != wantOffset {
|
||||
t.Fatalf("%s: Encoder.OutputOffset = %v, want %v", where, gotOffset, wantOffset)
|
||||
}
|
||||
}
|
||||
182
pkg/encoders/json/jsontext/errors.go
Normal file
182
pkg/encoders/json/jsontext/errors.go
Normal file
@@ -0,0 +1,182 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"strconv"
|
||||
|
||||
"encoding/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
const errorPrefix = "jsontext: "
|
||||
|
||||
type ioError struct {
|
||||
action string // either "read" or "write"
|
||||
err error
|
||||
}
|
||||
|
||||
func (e *ioError) Error() string {
|
||||
return errorPrefix + e.action + " error: " + e.err.Error()
|
||||
}
|
||||
func (e *ioError) Unwrap() error {
|
||||
return e.err
|
||||
}
|
||||
|
||||
// SyntacticError is a description of a syntactic error that occurred when
|
||||
// encoding or decoding JSON according to the grammar.
|
||||
//
|
||||
// The contents of this error as produced by this package may change over time.
|
||||
type SyntacticError struct {
|
||||
requireKeyedLiterals
|
||||
nonComparable
|
||||
|
||||
// ByteOffset indicates that an error occurred after this byte offset.
|
||||
ByteOffset int64
|
||||
// JSONPointer indicates that an error occurred within this JSON value
|
||||
// as indicated using the JSON Pointer notation (see RFC 6901).
|
||||
JSONPointer Pointer
|
||||
|
||||
// Err is the underlying error.
|
||||
Err error
|
||||
}
|
||||
|
||||
// wrapSyntacticError wraps an error and annotates it with a precise location
|
||||
// using the provided [encoderState] or [decoderState].
|
||||
// If err is an [ioError] or [io.EOF], then it is not wrapped.
|
||||
//
|
||||
// It takes a relative offset pos that can be resolved into
|
||||
// an absolute offset using state.offsetAt.
|
||||
//
|
||||
// It takes a where that specify how the JSON pointer is derived.
|
||||
// If the underlying error is a [pointerSuffixError],
|
||||
// then the suffix is appended to the derived pointer.
|
||||
func wrapSyntacticError(state interface {
|
||||
offsetAt(pos int) int64
|
||||
AppendStackPointer(b []byte, where int) []byte
|
||||
}, err error, pos, where int) error {
|
||||
if _, ok := err.(*ioError); err == io.EOF || ok {
|
||||
return err
|
||||
}
|
||||
offset := state.offsetAt(pos)
|
||||
ptr := state.AppendStackPointer(nil, where)
|
||||
if serr, ok := err.(*pointerSuffixError); ok {
|
||||
ptr = serr.appendPointer(ptr)
|
||||
err = serr.error
|
||||
}
|
||||
if d, ok := state.(*decoderState); ok && err == errMismatchDelim {
|
||||
where := "at start of value"
|
||||
if len(d.Tokens.Stack) > 0 && d.Tokens.Last.Length() > 0 {
|
||||
switch {
|
||||
case d.Tokens.Last.isArray():
|
||||
where = "after array element (expecting ',' or ']')"
|
||||
ptr = []byte(Pointer(ptr).Parent()) // problem is with parent array
|
||||
case d.Tokens.Last.isObject():
|
||||
where = "after object value (expecting ',' or '}')"
|
||||
ptr = []byte(Pointer(ptr).Parent()) // problem is with parent object
|
||||
}
|
||||
}
|
||||
err = jsonwire.NewInvalidCharacterError(d.buf[pos:], where)
|
||||
}
|
||||
return &SyntacticError{ByteOffset: offset, JSONPointer: Pointer(ptr), Err: err}
|
||||
}
|
||||
|
||||
func (e *SyntacticError) Error() string {
|
||||
pointer := e.JSONPointer
|
||||
offset := e.ByteOffset
|
||||
b := []byte(errorPrefix)
|
||||
if e.Err != nil {
|
||||
b = append(b, e.Err.Error()...)
|
||||
if e.Err == ErrDuplicateName {
|
||||
b = strconv.AppendQuote(append(b, ' '), pointer.LastToken())
|
||||
pointer = pointer.Parent()
|
||||
offset = 0 // not useful to print offset for duplicate names
|
||||
}
|
||||
} else {
|
||||
b = append(b, "syntactic error"...)
|
||||
}
|
||||
if pointer != "" {
|
||||
b = strconv.AppendQuote(append(b, " within "...), jsonwire.TruncatePointer(string(pointer), 100))
|
||||
}
|
||||
if offset > 0 {
|
||||
b = strconv.AppendInt(append(b, " after offset "...), offset, 10)
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
func (e *SyntacticError) Unwrap() error {
|
||||
return e.Err
|
||||
}
|
||||
|
||||
// pointerSuffixError represents a JSON pointer suffix to be appended
|
||||
// to [SyntacticError.JSONPointer]. It is an internal error type
|
||||
// used within this package and does not appear in the public API.
|
||||
//
|
||||
// This type is primarily used to annotate errors in Encoder.WriteValue
|
||||
// and Decoder.ReadValue with precise positions.
|
||||
// At the time WriteValue or ReadValue is called, a JSON pointer to the
|
||||
// upcoming value can be constructed using the Encoder/Decoder state.
|
||||
// However, tracking pointers within values during normal operation
|
||||
// would incur a performance penalty in the error-free case.
|
||||
//
|
||||
// To provide precise error locations without this overhead,
|
||||
// the error is wrapped with object names or array indices
|
||||
// as the call stack is popped when an error occurs.
|
||||
// Since this happens in reverse order, pointerSuffixError holds
|
||||
// the pointer in reverse and is only later reversed when appending to
|
||||
// the pointer prefix.
|
||||
//
|
||||
// For example, if the encoder is at "/alpha/bravo/charlie"
|
||||
// and an error occurs in WriteValue at "/xray/yankee/zulu", then
|
||||
// the final pointer should be "/alpha/bravo/charlie/xray/yankee/zulu".
|
||||
//
|
||||
// As pointerSuffixError is populated during the error return path,
|
||||
// it first contains "/zulu", then "/zulu/yankee",
|
||||
// and finally "/zulu/yankee/xray".
|
||||
// These tokens are reversed and concatenated to "/alpha/bravo/charlie"
|
||||
// to form the full pointer.
|
||||
type pointerSuffixError struct {
|
||||
error
|
||||
|
||||
// reversePointer is a JSON pointer, but with each token in reverse order.
|
||||
reversePointer []byte
|
||||
}
|
||||
|
||||
// wrapWithObjectName wraps err with a JSON object name access,
|
||||
// which must be a valid quoted JSON string.
|
||||
func wrapWithObjectName(err error, quotedName []byte) error {
|
||||
serr, _ := err.(*pointerSuffixError)
|
||||
if serr == nil {
|
||||
serr = &pointerSuffixError{error: err}
|
||||
}
|
||||
name := jsonwire.UnquoteMayCopy(quotedName, false)
|
||||
serr.reversePointer = appendEscapePointerName(append(serr.reversePointer, '/'), name)
|
||||
return serr
|
||||
}
|
||||
|
||||
// wrapWithArrayIndex wraps err with a JSON array index access.
|
||||
func wrapWithArrayIndex(err error, index int64) error {
|
||||
serr, _ := err.(*pointerSuffixError)
|
||||
if serr == nil {
|
||||
serr = &pointerSuffixError{error: err}
|
||||
}
|
||||
serr.reversePointer = strconv.AppendUint(append(serr.reversePointer, '/'), uint64(index), 10)
|
||||
return serr
|
||||
}
|
||||
|
||||
// appendPointer appends the path encoded in e to the end of pointer.
|
||||
func (e *pointerSuffixError) appendPointer(pointer []byte) []byte {
|
||||
// Copy each token in reversePointer to the end of pointer in reverse order.
|
||||
// Double reversal means that the appended suffix is now in forward order.
|
||||
bi, bo := e.reversePointer, pointer
|
||||
for len(bi) > 0 {
|
||||
i := bytes.LastIndexByte(bi, '/')
|
||||
bi, bo = bi[:i], append(bo, bi[i:]...)
|
||||
}
|
||||
return bo
|
||||
}
|
||||
130
pkg/encoders/json/jsontext/example_test.go
Normal file
130
pkg/encoders/json/jsontext/example_test.go
Normal file
@@ -0,0 +1,130 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"encoding/json/jsontext"
|
||||
"encoding/json/v2"
|
||||
)
|
||||
|
||||
// This example demonstrates the use of the [Encoder] and [Decoder] to
|
||||
// parse and modify JSON without unmarshaling it into a concrete Go type.
|
||||
func Example_stringReplace() {
|
||||
// Example input with non-idiomatic use of "Golang" instead of "Go".
|
||||
const input = `{
|
||||
"title": "Golang version 1 is released",
|
||||
"author": "Andrew Gerrand",
|
||||
"date": "2012-03-28",
|
||||
"text": "Today marks a major milestone in the development of the Golang programming language.",
|
||||
"otherArticles": [
|
||||
"Twelve Years of Golang",
|
||||
"The Laws of Reflection",
|
||||
"Learn Golang from your browser"
|
||||
]
|
||||
}`
|
||||
|
||||
// Using a Decoder and Encoder, we can parse through every token,
|
||||
// check and modify the token if necessary, and
|
||||
// write the token to the output.
|
||||
var replacements []jsontext.Pointer
|
||||
in := strings.NewReader(input)
|
||||
dec := jsontext.NewDecoder(in)
|
||||
out := new(bytes.Buffer)
|
||||
enc := jsontext.NewEncoder(out, jsontext.Multiline(true)) // expand for readability
|
||||
for {
|
||||
// Read a token from the input.
|
||||
tok, err := dec.ReadToken()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// Check whether the token contains the string "Golang" and
|
||||
// replace each occurrence with "Go" instead.
|
||||
if tok.Kind() == '"' && strings.Contains(tok.String(), "Golang") {
|
||||
replacements = append(replacements, dec.StackPointer())
|
||||
tok = jsontext.String(strings.ReplaceAll(tok.String(), "Golang", "Go"))
|
||||
}
|
||||
|
||||
// Write the (possibly modified) token to the output.
|
||||
if err := enc.WriteToken(tok); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Print the list of replacements and the adjusted JSON output.
|
||||
if len(replacements) > 0 {
|
||||
fmt.Println(`Replaced "Golang" with "Go" in:`)
|
||||
for _, where := range replacements {
|
||||
fmt.Println("\t" + where)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
fmt.Println("Result:", out.String())
|
||||
|
||||
// Output:
|
||||
// Replaced "Golang" with "Go" in:
|
||||
// /title
|
||||
// /text
|
||||
// /otherArticles/0
|
||||
// /otherArticles/2
|
||||
//
|
||||
// Result: {
|
||||
// "title": "Go version 1 is released",
|
||||
// "author": "Andrew Gerrand",
|
||||
// "date": "2012-03-28",
|
||||
// "text": "Today marks a major milestone in the development of the Go programming language.",
|
||||
// "otherArticles": [
|
||||
// "Twelve Years of Go",
|
||||
// "The Laws of Reflection",
|
||||
// "Learn Go from your browser"
|
||||
// ]
|
||||
// }
|
||||
}
|
||||
|
||||
// Directly embedding JSON within HTML requires special handling for safety.
|
||||
// Escape certain runes to prevent JSON directly treated as HTML
|
||||
// from being able to perform <script> injection.
|
||||
//
|
||||
// This example shows how to obtain equivalent behavior provided by the
|
||||
// v1 [encoding/json] package that is no longer directly supported by this package.
|
||||
// Newly written code that intermix JSON and HTML should instead be using the
|
||||
// [github.com/google/safehtml] module for safety purposes.
|
||||
func ExampleEscapeForHTML() {
|
||||
page := struct {
|
||||
Title string
|
||||
Body string
|
||||
}{
|
||||
Title: "Example Embedded Javascript",
|
||||
Body: `<script> console.log("Hello, world!"); </script>`,
|
||||
}
|
||||
|
||||
b, err := json.Marshal(&page,
|
||||
// Escape certain runes within a JSON string so that
|
||||
// JSON will be safe to directly embed inside HTML.
|
||||
jsontext.EscapeForHTML(true),
|
||||
jsontext.EscapeForJS(true),
|
||||
jsontext.Multiline(true)) // expand for readability
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
fmt.Println(string(b))
|
||||
|
||||
// Output:
|
||||
// {
|
||||
// "Title": "Example Embedded Javascript",
|
||||
// "Body": "\u003cscript\u003e console.log(\"Hello, world!\"); \u003c/script\u003e"
|
||||
// }
|
||||
}
|
||||
77
pkg/encoders/json/jsontext/export.go
Normal file
77
pkg/encoders/json/jsontext/export.go
Normal file
@@ -0,0 +1,77 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"encoding/json/internal"
|
||||
)
|
||||
|
||||
// Internal is for internal use only.
|
||||
// This is exempt from the Go compatibility agreement.
|
||||
var Internal exporter
|
||||
|
||||
type exporter struct{}
|
||||
|
||||
// Export exposes internal functionality from "jsontext" to "json".
|
||||
// This cannot be dynamically called by other packages since
|
||||
// they cannot obtain a reference to the internal.AllowInternalUse value.
|
||||
func (exporter) Export(p *internal.NotForPublicUse) export {
|
||||
if p != &internal.AllowInternalUse {
|
||||
panic("unauthorized call to Export")
|
||||
}
|
||||
return export{}
|
||||
}
|
||||
|
||||
// The export type exposes functionality to packages with visibility to
|
||||
// the internal.AllowInternalUse variable. The "json" package uses this
|
||||
// to modify low-level state in the Encoder and Decoder types.
|
||||
// It mutates the state directly instead of calling ReadToken or WriteToken
|
||||
// since this is more performant. The public APIs need to track state to ensure
|
||||
// that users are constructing a valid JSON value, but the "json" implementation
|
||||
// guarantees that it emits valid JSON by the structure of the code itself.
|
||||
type export struct{}
|
||||
|
||||
// Encoder returns a pointer to the underlying encoderState.
|
||||
func (export) Encoder(e *Encoder) *encoderState { return &e.s }
|
||||
|
||||
// Decoder returns a pointer to the underlying decoderState.
|
||||
func (export) Decoder(d *Decoder) *decoderState { return &d.s }
|
||||
|
||||
func (export) GetBufferedEncoder(o ...Options) *Encoder {
|
||||
return getBufferedEncoder(o...)
|
||||
}
|
||||
func (export) PutBufferedEncoder(e *Encoder) {
|
||||
putBufferedEncoder(e)
|
||||
}
|
||||
|
||||
func (export) GetStreamingEncoder(w io.Writer, o ...Options) *Encoder {
|
||||
return getStreamingEncoder(w, o...)
|
||||
}
|
||||
func (export) PutStreamingEncoder(e *Encoder) {
|
||||
putStreamingEncoder(e)
|
||||
}
|
||||
|
||||
func (export) GetBufferedDecoder(b []byte, o ...Options) *Decoder {
|
||||
return getBufferedDecoder(b, o...)
|
||||
}
|
||||
func (export) PutBufferedDecoder(d *Decoder) {
|
||||
putBufferedDecoder(d)
|
||||
}
|
||||
|
||||
func (export) GetStreamingDecoder(r io.Reader, o ...Options) *Decoder {
|
||||
return getStreamingDecoder(r, o...)
|
||||
}
|
||||
func (export) PutStreamingDecoder(d *Decoder) {
|
||||
putStreamingDecoder(d)
|
||||
}
|
||||
|
||||
func (export) IsIOError(err error) bool {
|
||||
_, ok := err.(*ioError)
|
||||
return ok
|
||||
}
|
||||
236
pkg/encoders/json/jsontext/fuzz_test.go
Normal file
236
pkg/encoders/json/jsontext/fuzz_test.go
Normal file
@@ -0,0 +1,236 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"io"
|
||||
"math/rand"
|
||||
"slices"
|
||||
"testing"
|
||||
|
||||
"encoding/json/internal/jsontest"
|
||||
)
|
||||
|
||||
func FuzzCoder(f *testing.F) {
|
||||
// Add a number of inputs to the corpus including valid and invalid data.
|
||||
for _, td := range coderTestdata {
|
||||
f.Add(int64(0), []byte(td.in))
|
||||
}
|
||||
for _, td := range decoderErrorTestdata {
|
||||
f.Add(int64(0), []byte(td.in))
|
||||
}
|
||||
for _, td := range encoderErrorTestdata {
|
||||
f.Add(int64(0), []byte(td.wantOut))
|
||||
}
|
||||
for _, td := range jsontest.Data {
|
||||
f.Add(int64(0), td.Data())
|
||||
}
|
||||
|
||||
f.Fuzz(func(t *testing.T, seed int64, b []byte) {
|
||||
var tokVals []tokOrVal
|
||||
rn := rand.NewSource(seed)
|
||||
|
||||
// Read a sequence of tokens or values. Skip the test for any errors
|
||||
// since we expect this with randomly generated fuzz inputs.
|
||||
src := bytes.NewReader(b)
|
||||
dec := NewDecoder(src)
|
||||
for {
|
||||
if rn.Int63()%8 > 0 {
|
||||
tok, err := dec.ReadToken()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
t.Skipf("Decoder.ReadToken error: %v", err)
|
||||
}
|
||||
tokVals = append(tokVals, tok.Clone())
|
||||
} else {
|
||||
val, err := dec.ReadValue()
|
||||
if err != nil {
|
||||
expectError := dec.PeekKind() == '}' || dec.PeekKind() == ']'
|
||||
if expectError && errors.As(err, new(*SyntacticError)) {
|
||||
continue
|
||||
}
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
t.Skipf("Decoder.ReadValue error: %v", err)
|
||||
}
|
||||
tokVals = append(tokVals, append(zeroValue, val...))
|
||||
}
|
||||
}
|
||||
|
||||
// Write a sequence of tokens or values. Fail the test for any errors
|
||||
// since the previous stage guarantees that the input is valid.
|
||||
dst := new(bytes.Buffer)
|
||||
enc := NewEncoder(dst)
|
||||
for _, tokVal := range tokVals {
|
||||
switch tokVal := tokVal.(type) {
|
||||
case Token:
|
||||
if err := enc.WriteToken(tokVal); err != nil {
|
||||
t.Fatalf("Encoder.WriteToken error: %v", err)
|
||||
}
|
||||
case Value:
|
||||
if err := enc.WriteValue(tokVal); err != nil {
|
||||
t.Fatalf("Encoder.WriteValue error: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Encoded output and original input must decode to the same thing.
|
||||
var got, want []Token
|
||||
for dec := NewDecoder(bytes.NewReader(b)); dec.PeekKind() > 0; {
|
||||
tok, err := dec.ReadToken()
|
||||
if err != nil {
|
||||
t.Fatalf("Decoder.ReadToken error: %v", err)
|
||||
}
|
||||
got = append(got, tok.Clone())
|
||||
}
|
||||
for dec := NewDecoder(dst); dec.PeekKind() > 0; {
|
||||
tok, err := dec.ReadToken()
|
||||
if err != nil {
|
||||
t.Fatalf("Decoder.ReadToken error: %v", err)
|
||||
}
|
||||
want = append(want, tok.Clone())
|
||||
}
|
||||
if !equalTokens(got, want) {
|
||||
t.Fatalf("mismatching output:\ngot %v\nwant %v", got, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func FuzzResumableDecoder(f *testing.F) {
|
||||
for _, td := range resumableDecoderTestdata {
|
||||
f.Add(int64(0), []byte(td))
|
||||
}
|
||||
|
||||
f.Fuzz(func(t *testing.T, seed int64, b []byte) {
|
||||
rn := rand.NewSource(seed)
|
||||
|
||||
// Regardless of how many bytes the underlying io.Reader produces,
|
||||
// the provided tokens, values, and errors should always be identical.
|
||||
t.Run("ReadToken", func(t *testing.T) {
|
||||
decGot := NewDecoder(&FaultyBuffer{B: b, MaxBytes: 8, Rand: rn})
|
||||
decWant := NewDecoder(bytes.NewReader(b))
|
||||
gotTok, gotErr := decGot.ReadToken()
|
||||
wantTok, wantErr := decWant.ReadToken()
|
||||
if gotTok.String() != wantTok.String() || !equalError(gotErr, wantErr) {
|
||||
t.Errorf("Decoder.ReadToken = (%v, %v), want (%v, %v)", gotTok, gotErr, wantTok, wantErr)
|
||||
}
|
||||
})
|
||||
t.Run("ReadValue", func(t *testing.T) {
|
||||
decGot := NewDecoder(&FaultyBuffer{B: b, MaxBytes: 8, Rand: rn})
|
||||
decWant := NewDecoder(bytes.NewReader(b))
|
||||
gotVal, gotErr := decGot.ReadValue()
|
||||
wantVal, wantErr := decWant.ReadValue()
|
||||
if !slices.Equal(gotVal, wantVal) || !equalError(gotErr, wantErr) {
|
||||
t.Errorf("Decoder.ReadValue = (%s, %v), want (%s, %v)", gotVal, gotErr, wantVal, wantErr)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func FuzzValueFormat(f *testing.F) {
|
||||
for _, td := range valueTestdata {
|
||||
f.Add(int64(0), []byte(td.in))
|
||||
}
|
||||
|
||||
// isValid reports whether b is valid according to the specified options.
|
||||
isValid := func(b []byte, opts ...Options) bool {
|
||||
d := NewDecoder(bytes.NewReader(b), opts...)
|
||||
_, errVal := d.ReadValue()
|
||||
_, errEOF := d.ReadToken()
|
||||
return errVal == nil && errEOF == io.EOF
|
||||
}
|
||||
|
||||
// stripWhitespace removes all JSON whitespace characters from the input.
|
||||
stripWhitespace := func(in []byte) (out []byte) {
|
||||
out = make([]byte, 0, len(in))
|
||||
for _, c := range in {
|
||||
switch c {
|
||||
case ' ', '\n', '\r', '\t':
|
||||
default:
|
||||
out = append(out, c)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
allOptions := []Options{
|
||||
AllowDuplicateNames(true),
|
||||
AllowInvalidUTF8(true),
|
||||
EscapeForHTML(true),
|
||||
EscapeForJS(true),
|
||||
PreserveRawStrings(true),
|
||||
CanonicalizeRawInts(true),
|
||||
CanonicalizeRawFloats(true),
|
||||
ReorderRawObjects(true),
|
||||
SpaceAfterColon(true),
|
||||
SpaceAfterComma(true),
|
||||
Multiline(true),
|
||||
WithIndent("\t"),
|
||||
WithIndentPrefix(" "),
|
||||
}
|
||||
|
||||
f.Fuzz(func(t *testing.T, seed int64, b []byte) {
|
||||
validRFC7159 := isValid(b, AllowInvalidUTF8(true), AllowDuplicateNames(true))
|
||||
validRFC8259 := isValid(b, AllowInvalidUTF8(false), AllowDuplicateNames(true))
|
||||
validRFC7493 := isValid(b, AllowInvalidUTF8(false), AllowDuplicateNames(false))
|
||||
switch {
|
||||
case !validRFC7159 && validRFC8259:
|
||||
t.Errorf("invalid input per RFC 7159 implies invalid per RFC 8259")
|
||||
case !validRFC8259 && validRFC7493:
|
||||
t.Errorf("invalid input per RFC 8259 implies invalid per RFC 7493")
|
||||
}
|
||||
|
||||
gotValid := Value(b).IsValid()
|
||||
wantValid := validRFC7493
|
||||
if gotValid != wantValid {
|
||||
t.Errorf("Value.IsValid = %v, want %v", gotValid, wantValid)
|
||||
}
|
||||
|
||||
gotCompacted := Value(string(b))
|
||||
gotCompactOk := gotCompacted.Compact() == nil
|
||||
wantCompactOk := validRFC7159
|
||||
if !bytes.Equal(stripWhitespace(gotCompacted), stripWhitespace(b)) {
|
||||
t.Errorf("stripWhitespace(Value.Compact) = %s, want %s", stripWhitespace(gotCompacted), stripWhitespace(b))
|
||||
}
|
||||
if gotCompactOk != wantCompactOk {
|
||||
t.Errorf("Value.Compact success mismatch: got %v, want %v", gotCompactOk, wantCompactOk)
|
||||
}
|
||||
|
||||
gotIndented := Value(string(b))
|
||||
gotIndentOk := gotIndented.Indent() == nil
|
||||
wantIndentOk := validRFC7159
|
||||
if !bytes.Equal(stripWhitespace(gotIndented), stripWhitespace(b)) {
|
||||
t.Errorf("stripWhitespace(Value.Indent) = %s, want %s", stripWhitespace(gotIndented), stripWhitespace(b))
|
||||
}
|
||||
if gotIndentOk != wantIndentOk {
|
||||
t.Errorf("Value.Indent success mismatch: got %v, want %v", gotIndentOk, wantIndentOk)
|
||||
}
|
||||
|
||||
gotCanonicalized := Value(string(b))
|
||||
gotCanonicalizeOk := gotCanonicalized.Canonicalize() == nil
|
||||
wantCanonicalizeOk := validRFC7493
|
||||
if gotCanonicalizeOk != wantCanonicalizeOk {
|
||||
t.Errorf("Value.Canonicalize success mismatch: got %v, want %v", gotCanonicalizeOk, wantCanonicalizeOk)
|
||||
}
|
||||
|
||||
// Random options should not result in a panic.
|
||||
var opts []Options
|
||||
rn := rand.New(rand.NewSource(seed))
|
||||
for _, opt := range allOptions {
|
||||
if rn.Intn(len(allOptions)/4) == 0 {
|
||||
opts = append(opts, opt)
|
||||
}
|
||||
}
|
||||
v := Value(b)
|
||||
v.Format(opts...) // should not panic
|
||||
})
|
||||
}
|
||||
304
pkg/encoders/json/jsontext/options.go
Normal file
304
pkg/encoders/json/jsontext/options.go
Normal file
@@ -0,0 +1,304 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"encoding/json/internal/jsonflags"
|
||||
"encoding/json/internal/jsonopts"
|
||||
"encoding/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
// Options configures [NewEncoder], [Encoder.Reset], [NewDecoder],
|
||||
// and [Decoder.Reset] with specific features.
|
||||
// Each function takes in a variadic list of options, where properties
|
||||
// set in latter options override the value of previously set properties.
|
||||
//
|
||||
// There is a single Options type, which is used with both encoding and decoding.
|
||||
// Some options affect both operations, while others only affect one operation:
|
||||
//
|
||||
// - [AllowDuplicateNames] affects encoding and decoding
|
||||
// - [AllowInvalidUTF8] affects encoding and decoding
|
||||
// - [EscapeForHTML] affects encoding only
|
||||
// - [EscapeForJS] affects encoding only
|
||||
// - [PreserveRawStrings] affects encoding only
|
||||
// - [CanonicalizeRawInts] affects encoding only
|
||||
// - [CanonicalizeRawFloats] affects encoding only
|
||||
// - [ReorderRawObjects] affects encoding only
|
||||
// - [SpaceAfterColon] affects encoding only
|
||||
// - [SpaceAfterComma] affects encoding only
|
||||
// - [Multiline] affects encoding only
|
||||
// - [WithIndent] affects encoding only
|
||||
// - [WithIndentPrefix] affects encoding only
|
||||
//
|
||||
// Options that do not affect a particular operation are ignored.
|
||||
//
|
||||
// The Options type is identical to [encoding/json.Options] and
|
||||
// [encoding/json/v2.Options]. Options from the other packages may
|
||||
// be passed to functionality in this package, but are ignored.
|
||||
// Options from this package may be used with the other packages.
|
||||
type Options = jsonopts.Options
|
||||
|
||||
// AllowDuplicateNames specifies that JSON objects may contain
|
||||
// duplicate member names. Disabling the duplicate name check may provide
|
||||
// performance benefits, but breaks compliance with RFC 7493, section 2.3.
|
||||
// The input or output will still be compliant with RFC 8259,
|
||||
// which leaves the handling of duplicate names as unspecified behavior.
|
||||
//
|
||||
// This affects either encoding or decoding.
|
||||
func AllowDuplicateNames(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.AllowDuplicateNames | 1
|
||||
} else {
|
||||
return jsonflags.AllowDuplicateNames | 0
|
||||
}
|
||||
}
|
||||
|
||||
// AllowInvalidUTF8 specifies that JSON strings may contain invalid UTF-8,
|
||||
// which will be mangled as the Unicode replacement character, U+FFFD.
|
||||
// This causes the encoder or decoder to break compliance with
|
||||
// RFC 7493, section 2.1, and RFC 8259, section 8.1.
|
||||
//
|
||||
// This affects either encoding or decoding.
|
||||
func AllowInvalidUTF8(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.AllowInvalidUTF8 | 1
|
||||
} else {
|
||||
return jsonflags.AllowInvalidUTF8 | 0
|
||||
}
|
||||
}
|
||||
|
||||
// EscapeForHTML specifies that '<', '>', and '&' characters within JSON strings
|
||||
// should be escaped as a hexadecimal Unicode codepoint (e.g., \u003c) so that
|
||||
// the output is safe to embed within HTML.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func EscapeForHTML(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.EscapeForHTML | 1
|
||||
} else {
|
||||
return jsonflags.EscapeForHTML | 0
|
||||
}
|
||||
}
|
||||
|
||||
// EscapeForJS specifies that U+2028 and U+2029 characters within JSON strings
|
||||
// should be escaped as a hexadecimal Unicode codepoint (e.g., \u2028) so that
|
||||
// the output is valid to embed within JavaScript. See RFC 8259, section 12.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func EscapeForJS(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.EscapeForJS | 1
|
||||
} else {
|
||||
return jsonflags.EscapeForJS | 0
|
||||
}
|
||||
}
|
||||
|
||||
// PreserveRawStrings specifies that when encoding a raw JSON string in a
|
||||
// [Token] or [Value], pre-escaped sequences
|
||||
// in a JSON string are preserved to the output.
|
||||
// However, raw strings still respect [EscapeForHTML] and [EscapeForJS]
|
||||
// such that the relevant characters are escaped.
|
||||
// If [AllowInvalidUTF8] is enabled, bytes of invalid UTF-8
|
||||
// are preserved to the output.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func PreserveRawStrings(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.PreserveRawStrings | 1
|
||||
} else {
|
||||
return jsonflags.PreserveRawStrings | 0
|
||||
}
|
||||
}
|
||||
|
||||
// CanonicalizeRawInts specifies that when encoding a raw JSON
|
||||
// integer number (i.e., a number without a fraction and exponent) in a
|
||||
// [Token] or [Value], the number is canonicalized
|
||||
// according to RFC 8785, section 3.2.2.3. As a special case,
|
||||
// the number -0 is canonicalized as 0.
|
||||
//
|
||||
// JSON numbers are treated as IEEE 754 double precision numbers.
|
||||
// Any numbers with precision beyond what is representable by that form
|
||||
// will lose their precision when canonicalized. For example,
|
||||
// integer values beyond ±2⁵³ will lose their precision.
|
||||
// For example, 1234567890123456789 is formatted as 1234567890123456800.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func CanonicalizeRawInts(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.CanonicalizeRawInts | 1
|
||||
} else {
|
||||
return jsonflags.CanonicalizeRawInts | 0
|
||||
}
|
||||
}
|
||||
|
||||
// CanonicalizeRawFloats specifies that when encoding a raw JSON
|
||||
// floating-point number (i.e., a number with a fraction or exponent) in a
|
||||
// [Token] or [Value], the number is canonicalized
|
||||
// according to RFC 8785, section 3.2.2.3. As a special case,
|
||||
// the number -0 is canonicalized as 0.
|
||||
//
|
||||
// JSON numbers are treated as IEEE 754 double precision numbers.
|
||||
// It is safe to canonicalize a serialized single precision number and
|
||||
// parse it back as a single precision number and expect the same value.
|
||||
// If a number exceeds ±1.7976931348623157e+308, which is the maximum
|
||||
// finite number, then it saturated at that value and formatted as such.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func CanonicalizeRawFloats(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.CanonicalizeRawFloats | 1
|
||||
} else {
|
||||
return jsonflags.CanonicalizeRawFloats | 0
|
||||
}
|
||||
}
|
||||
|
||||
// ReorderRawObjects specifies that when encoding a raw JSON object in a
|
||||
// [Value], the object members are reordered according to
|
||||
// RFC 8785, section 3.2.3.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func ReorderRawObjects(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.ReorderRawObjects | 1
|
||||
} else {
|
||||
return jsonflags.ReorderRawObjects | 0
|
||||
}
|
||||
}
|
||||
|
||||
// SpaceAfterColon specifies that the JSON output should emit a space character
|
||||
// after each colon separator following a JSON object name.
|
||||
// If false, then no space character appears after the colon separator.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func SpaceAfterColon(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.SpaceAfterColon | 1
|
||||
} else {
|
||||
return jsonflags.SpaceAfterColon | 0
|
||||
}
|
||||
}
|
||||
|
||||
// SpaceAfterComma specifies that the JSON output should emit a space character
|
||||
// after each comma separator following a JSON object value or array element.
|
||||
// If false, then no space character appears after the comma separator.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func SpaceAfterComma(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.SpaceAfterComma | 1
|
||||
} else {
|
||||
return jsonflags.SpaceAfterComma | 0
|
||||
}
|
||||
}
|
||||
|
||||
// Multiline specifies that the JSON output should expand to multiple lines,
|
||||
// where every JSON object member or JSON array element appears on
|
||||
// a new, indented line according to the nesting depth.
|
||||
//
|
||||
// If [SpaceAfterColon] is not specified, then the default is true.
|
||||
// If [SpaceAfterComma] is not specified, then the default is false.
|
||||
// If [WithIndent] is not specified, then the default is "\t".
|
||||
//
|
||||
// If set to false, then the output is a single-line,
|
||||
// where the only whitespace emitted is determined by the current
|
||||
// values of [SpaceAfterColon] and [SpaceAfterComma].
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func Multiline(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.Multiline | 1
|
||||
} else {
|
||||
return jsonflags.Multiline | 0
|
||||
}
|
||||
}
|
||||
|
||||
// WithIndent specifies that the encoder should emit multiline output
|
||||
// where each element in a JSON object or array begins on a new, indented line
|
||||
// beginning with the indent prefix (see [WithIndentPrefix])
|
||||
// followed by one or more copies of indent according to the nesting depth.
|
||||
// The indent must only be composed of space or tab characters.
|
||||
//
|
||||
// If the intent to emit indented output without a preference for
|
||||
// the particular indent string, then use [Multiline] instead.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
// Use of this option implies [Multiline] being set to true.
|
||||
func WithIndent(indent string) Options {
|
||||
// Fast-path: Return a constant for common indents, which avoids allocating.
|
||||
// These are derived from analyzing the Go module proxy on 2023-07-01.
|
||||
switch indent {
|
||||
case "\t":
|
||||
return jsonopts.Indent("\t") // ~14k usages
|
||||
case " ":
|
||||
return jsonopts.Indent(" ") // ~18k usages
|
||||
case " ":
|
||||
return jsonopts.Indent(" ") // ~1.7k usages
|
||||
case " ":
|
||||
return jsonopts.Indent(" ") // ~52k usages
|
||||
case " ":
|
||||
return jsonopts.Indent(" ") // ~12k usages
|
||||
case "":
|
||||
return jsonopts.Indent("") // ~1.5k usages
|
||||
}
|
||||
|
||||
// Otherwise, allocate for this unique value.
|
||||
if s := strings.Trim(indent, " \t"); len(s) > 0 {
|
||||
panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent")
|
||||
}
|
||||
return jsonopts.Indent(indent)
|
||||
}
|
||||
|
||||
// WithIndentPrefix specifies that the encoder should emit multiline output
|
||||
// where each element in a JSON object or array begins on a new, indented line
|
||||
// beginning with the indent prefix followed by one or more copies of indent
|
||||
// (see [WithIndent]) according to the nesting depth.
|
||||
// The prefix must only be composed of space or tab characters.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
// Use of this option implies [Multiline] being set to true.
|
||||
func WithIndentPrefix(prefix string) Options {
|
||||
if s := strings.Trim(prefix, " \t"); len(s) > 0 {
|
||||
panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent prefix")
|
||||
}
|
||||
return jsonopts.IndentPrefix(prefix)
|
||||
}
|
||||
|
||||
/*
|
||||
// TODO(https://go.dev/issue/56733): Implement WithByteLimit and WithDepthLimit.
|
||||
// Remember to also update the "Security Considerations" section.
|
||||
|
||||
// WithByteLimit sets a limit on the number of bytes of input or output bytes
|
||||
// that may be consumed or produced for each top-level JSON value.
|
||||
// If a [Decoder] or [Encoder] method call would need to consume/produce
|
||||
// more than a total of n bytes to make progress on the top-level JSON value,
|
||||
// then the call will report an error.
|
||||
// Whitespace before and within the top-level value are counted against the limit.
|
||||
// Whitespace after a top-level value are counted against the limit
|
||||
// for the next top-level value.
|
||||
//
|
||||
// A non-positive limit is equivalent to no limit at all.
|
||||
// If unspecified, the default limit is no limit at all.
|
||||
// This affects either encoding or decoding.
|
||||
func WithByteLimit(n int64) Options {
|
||||
return jsonopts.ByteLimit(max(n, 0))
|
||||
}
|
||||
|
||||
// WithDepthLimit sets a limit on the maximum depth of JSON nesting
|
||||
// that may be consumed or produced for each top-level JSON value.
|
||||
// If a [Decoder] or [Encoder] method call would need to consume or produce
|
||||
// a depth greater than n to make progress on the top-level JSON value,
|
||||
// then the call will report an error.
|
||||
//
|
||||
// A non-positive limit is equivalent to no limit at all.
|
||||
// If unspecified, the default limit is 10000.
|
||||
// This affects either encoding or decoding.
|
||||
func WithDepthLimit(n int) Options {
|
||||
return jsonopts.DepthLimit(max(n, 0))
|
||||
}
|
||||
*/
|
||||
152
pkg/encoders/json/jsontext/pools.go
Normal file
152
pkg/encoders/json/jsontext/pools.go
Normal file
@@ -0,0 +1,152 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"math/bits"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// TODO(https://go.dev/issue/47657): Use sync.PoolOf.
|
||||
|
||||
var (
|
||||
// This owns the internal buffer since there is no io.Writer to output to.
|
||||
// Since the buffer can get arbitrarily large in normal usage,
|
||||
// there is statistical tracking logic to determine whether to recycle
|
||||
// the internal buffer or not based on a history of utilization.
|
||||
bufferedEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
|
||||
|
||||
// This owns the internal buffer, but it is only used to temporarily store
|
||||
// buffered JSON before flushing it to the underlying io.Writer.
|
||||
// In a sufficiently efficient streaming mode, we do not expect the buffer
|
||||
// to grow arbitrarily large. Thus, we avoid recycling large buffers.
|
||||
streamingEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
|
||||
|
||||
// This does not own the internal buffer since
|
||||
// it is taken directly from the provided bytes.Buffer.
|
||||
bytesBufferEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
|
||||
)
|
||||
|
||||
// bufferStatistics is statistics to track buffer utilization.
|
||||
// It is used to determine whether to recycle a buffer or not
|
||||
// to avoid https://go.dev/issue/23199.
|
||||
type bufferStatistics struct {
|
||||
strikes int // number of times the buffer was under-utilized
|
||||
prevLen int // length of previous buffer
|
||||
}
|
||||
|
||||
func getBufferedEncoder(opts ...Options) *Encoder {
|
||||
e := bufferedEncoderPool.Get().(*Encoder)
|
||||
if e.s.Buf == nil {
|
||||
// Round up to nearest 2ⁿ to make best use of malloc size classes.
|
||||
// See runtime/sizeclasses.go on Go1.15.
|
||||
// Logical OR with 63 to ensure 64 as the minimum buffer size.
|
||||
n := 1 << bits.Len(uint(e.s.bufStats.prevLen|63))
|
||||
e.s.Buf = make([]byte, 0, n)
|
||||
}
|
||||
e.s.reset(e.s.Buf[:0], nil, opts...)
|
||||
return e
|
||||
}
|
||||
func putBufferedEncoder(e *Encoder) {
|
||||
// Recycle large buffers only if sufficiently utilized.
|
||||
// If a buffer is under-utilized enough times sequentially,
|
||||
// then it is discarded, ensuring that a single large buffer
|
||||
// won't be kept alive by a continuous stream of small usages.
|
||||
//
|
||||
// The worst case utilization is computed as:
|
||||
// MIN_UTILIZATION_THRESHOLD / (1 + MAX_NUM_STRIKES)
|
||||
//
|
||||
// For the constants chosen below, this is (25%)/(1+4) ⇒ 5%.
|
||||
// This may seem low, but it ensures a lower bound on
|
||||
// the absolute worst-case utilization. Without this check,
|
||||
// this would be theoretically 0%, which is infinitely worse.
|
||||
//
|
||||
// See https://go.dev/issue/27735.
|
||||
switch {
|
||||
case cap(e.s.Buf) <= 4<<10: // always recycle buffers smaller than 4KiB
|
||||
e.s.bufStats.strikes = 0
|
||||
case cap(e.s.Buf)/4 <= len(e.s.Buf): // at least 25% utilization
|
||||
e.s.bufStats.strikes = 0
|
||||
case e.s.bufStats.strikes < 4: // at most 4 strikes
|
||||
e.s.bufStats.strikes++
|
||||
default: // discard the buffer; too large and too often under-utilized
|
||||
e.s.bufStats.strikes = 0
|
||||
e.s.bufStats.prevLen = len(e.s.Buf) // heuristic for size to allocate next time
|
||||
e.s.Buf = nil
|
||||
}
|
||||
bufferedEncoderPool.Put(e)
|
||||
}
|
||||
|
||||
func getStreamingEncoder(w io.Writer, opts ...Options) *Encoder {
|
||||
if _, ok := w.(*bytes.Buffer); ok {
|
||||
e := bytesBufferEncoderPool.Get().(*Encoder)
|
||||
e.s.reset(nil, w, opts...) // buffer taken from bytes.Buffer
|
||||
return e
|
||||
} else {
|
||||
e := streamingEncoderPool.Get().(*Encoder)
|
||||
e.s.reset(e.s.Buf[:0], w, opts...) // preserve existing buffer
|
||||
return e
|
||||
}
|
||||
}
|
||||
func putStreamingEncoder(e *Encoder) {
|
||||
if _, ok := e.s.wr.(*bytes.Buffer); ok {
|
||||
bytesBufferEncoderPool.Put(e)
|
||||
} else {
|
||||
if cap(e.s.Buf) > 64<<10 {
|
||||
e.s.Buf = nil // avoid pinning arbitrarily large amounts of memory
|
||||
}
|
||||
streamingEncoderPool.Put(e)
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
// This does not own the internal buffer since it is externally provided.
|
||||
bufferedDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }}
|
||||
|
||||
// This owns the internal buffer, but it is only used to temporarily store
|
||||
// buffered JSON fetched from the underlying io.Reader.
|
||||
// In a sufficiently efficient streaming mode, we do not expect the buffer
|
||||
// to grow arbitrarily large. Thus, we avoid recycling large buffers.
|
||||
streamingDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }}
|
||||
|
||||
// This does not own the internal buffer since
|
||||
// it is taken directly from the provided bytes.Buffer.
|
||||
bytesBufferDecoderPool = bufferedDecoderPool
|
||||
)
|
||||
|
||||
func getBufferedDecoder(b []byte, opts ...Options) *Decoder {
|
||||
d := bufferedDecoderPool.Get().(*Decoder)
|
||||
d.s.reset(b, nil, opts...)
|
||||
return d
|
||||
}
|
||||
func putBufferedDecoder(d *Decoder) {
|
||||
bufferedDecoderPool.Put(d)
|
||||
}
|
||||
|
||||
func getStreamingDecoder(r io.Reader, opts ...Options) *Decoder {
|
||||
if _, ok := r.(*bytes.Buffer); ok {
|
||||
d := bytesBufferDecoderPool.Get().(*Decoder)
|
||||
d.s.reset(nil, r, opts...) // buffer taken from bytes.Buffer
|
||||
return d
|
||||
} else {
|
||||
d := streamingDecoderPool.Get().(*Decoder)
|
||||
d.s.reset(d.s.buf[:0], r, opts...) // preserve existing buffer
|
||||
return d
|
||||
}
|
||||
}
|
||||
func putStreamingDecoder(d *Decoder) {
|
||||
if _, ok := d.s.rd.(*bytes.Buffer); ok {
|
||||
bytesBufferDecoderPool.Put(d)
|
||||
} else {
|
||||
if cap(d.s.buf) > 64<<10 {
|
||||
d.s.buf = nil // avoid pinning arbitrarily large amounts of memory
|
||||
}
|
||||
streamingDecoderPool.Put(d)
|
||||
}
|
||||
}
|
||||
41
pkg/encoders/json/jsontext/quote.go
Normal file
41
pkg/encoders/json/jsontext/quote.go
Normal file
@@ -0,0 +1,41 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"encoding/json/internal/jsonflags"
|
||||
"encoding/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
// AppendQuote appends a double-quoted JSON string literal representing src
|
||||
// to dst and returns the extended buffer.
|
||||
// It uses the minimal string representation per RFC 8785, section 3.2.2.2.
|
||||
// Invalid UTF-8 bytes are replaced with the Unicode replacement character
|
||||
// and an error is returned at the end indicating the presence of invalid UTF-8.
|
||||
// The dst must not overlap with the src.
|
||||
func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) {
|
||||
dst, err := jsonwire.AppendQuote(dst, src, &jsonflags.Flags{})
|
||||
if err != nil {
|
||||
err = &SyntacticError{Err: err}
|
||||
}
|
||||
return dst, err
|
||||
}
|
||||
|
||||
// AppendUnquote appends the decoded interpretation of src as a
|
||||
// double-quoted JSON string literal to dst and returns the extended buffer.
|
||||
// The input src must be a JSON string without any surrounding whitespace.
|
||||
// Invalid UTF-8 bytes are replaced with the Unicode replacement character
|
||||
// and an error is returned at the end indicating the presence of invalid UTF-8.
|
||||
// Any trailing bytes after the JSON string literal results in an error.
|
||||
// The dst must not overlap with the src.
|
||||
func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) {
|
||||
dst, err := jsonwire.AppendUnquote(dst, src)
|
||||
if err != nil {
|
||||
err = &SyntacticError{Err: err}
|
||||
}
|
||||
return dst, err
|
||||
}
|
||||
828
pkg/encoders/json/jsontext/state.go
Normal file
828
pkg/encoders/json/jsontext/state.go
Normal file
@@ -0,0 +1,828 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"iter"
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"encoding/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
// ErrDuplicateName indicates that a JSON token could not be
|
||||
// encoded or decoded because it results in a duplicate JSON object name.
|
||||
// This error is directly wrapped within a [SyntacticError] when produced.
|
||||
//
|
||||
// The name of a duplicate JSON object member can be extracted as:
|
||||
//
|
||||
// err := ...
|
||||
// var serr jsontext.SyntacticError
|
||||
// if errors.As(err, &serr) && serr.Err == jsontext.ErrDuplicateName {
|
||||
// ptr := serr.JSONPointer // JSON pointer to duplicate name
|
||||
// name := ptr.LastToken() // duplicate name itself
|
||||
// ...
|
||||
// }
|
||||
//
|
||||
// This error is only returned if [AllowDuplicateNames] is false.
|
||||
var ErrDuplicateName = errors.New("duplicate object member name")
|
||||
|
||||
// ErrNonStringName indicates that a JSON token could not be
|
||||
// encoded or decoded because it is not a string,
|
||||
// as required for JSON object names according to RFC 8259, section 4.
|
||||
// This error is directly wrapped within a [SyntacticError] when produced.
|
||||
var ErrNonStringName = errors.New("object member name must be a string")
|
||||
|
||||
var (
|
||||
errMissingValue = errors.New("missing value after object name")
|
||||
errMismatchDelim = errors.New("mismatching structural token for object or array")
|
||||
errMaxDepth = errors.New("exceeded max depth")
|
||||
|
||||
errInvalidNamespace = errors.New("object namespace is in an invalid state")
|
||||
)
|
||||
|
||||
// Per RFC 8259, section 9, implementations may enforce a maximum depth.
|
||||
// Such a limit is necessary to prevent stack overflows.
|
||||
const maxNestingDepth = 10000
|
||||
|
||||
type state struct {
|
||||
// Tokens validates whether the next token kind is valid.
|
||||
Tokens stateMachine
|
||||
|
||||
// Names is a stack of object names.
|
||||
Names objectNameStack
|
||||
|
||||
// Namespaces is a stack of object namespaces.
|
||||
// For performance reasons, Encoder or Decoder may not update this
|
||||
// if Marshal or Unmarshal is able to track names in a more efficient way.
|
||||
// See makeMapArshaler and makeStructArshaler.
|
||||
// Not used if AllowDuplicateNames is true.
|
||||
Namespaces objectNamespaceStack
|
||||
}
|
||||
|
||||
// needObjectValue reports whether the next token should be an object value.
|
||||
// This method is used by [wrapSyntacticError].
|
||||
func (s *state) needObjectValue() bool {
|
||||
return s.Tokens.Last.needObjectValue()
|
||||
}
|
||||
|
||||
func (s *state) reset() {
|
||||
s.Tokens.reset()
|
||||
s.Names.reset()
|
||||
s.Namespaces.reset()
|
||||
}
|
||||
|
||||
// Pointer is a JSON Pointer (RFC 6901) that references a particular JSON value
|
||||
// relative to the root of the top-level JSON value.
|
||||
//
|
||||
// A Pointer is a slash-separated list of tokens, where each token is
|
||||
// either a JSON object name or an index to a JSON array element
|
||||
// encoded as a base-10 integer value.
|
||||
// It is impossible to distinguish between an array index and an object name
|
||||
// (that happens to be an base-10 encoded integer) without also knowing
|
||||
// the structure of the top-level JSON value that the pointer refers to.
|
||||
//
|
||||
// There is exactly one representation of a pointer to a particular value,
|
||||
// so comparability of Pointer values is equivalent to checking whether
|
||||
// they both point to the exact same value.
|
||||
type Pointer string
|
||||
|
||||
// IsValid reports whether p is a valid JSON Pointer according to RFC 6901.
|
||||
// Note that the concatenation of two valid pointers produces a valid pointer.
|
||||
func (p Pointer) IsValid() bool {
|
||||
for i, r := range p {
|
||||
switch {
|
||||
case r == '~' && (i+1 == len(p) || (p[i+1] != '0' && p[i+1] != '1')):
|
||||
return false // invalid escape
|
||||
case r == '\ufffd' && !strings.HasPrefix(string(p[i:]), "\ufffd"):
|
||||
return false // invalid UTF-8
|
||||
}
|
||||
}
|
||||
return len(p) == 0 || p[0] == '/'
|
||||
}
|
||||
|
||||
// Contains reports whether the JSON value that p points to
|
||||
// is equal to or contains the JSON value that pc points to.
|
||||
func (p Pointer) Contains(pc Pointer) bool {
|
||||
// Invariant: len(p) <= len(pc) if p.Contains(pc)
|
||||
suffix, ok := strings.CutPrefix(string(pc), string(p))
|
||||
return ok && (suffix == "" || suffix[0] == '/')
|
||||
}
|
||||
|
||||
// Parent strips off the last token and returns the remaining pointer.
|
||||
// The parent of an empty p is an empty string.
|
||||
func (p Pointer) Parent() Pointer {
|
||||
return p[:max(strings.LastIndexByte(string(p), '/'), 0)]
|
||||
}
|
||||
|
||||
// LastToken returns the last token in the pointer.
|
||||
// The last token of an empty p is an empty string.
|
||||
func (p Pointer) LastToken() string {
|
||||
last := p[max(strings.LastIndexByte(string(p), '/'), 0):]
|
||||
return unescapePointerToken(strings.TrimPrefix(string(last), "/"))
|
||||
}
|
||||
|
||||
// AppendToken appends a token to the end of p and returns the full pointer.
|
||||
func (p Pointer) AppendToken(tok string) Pointer {
|
||||
return Pointer(appendEscapePointerName([]byte(p+"/"), tok))
|
||||
}
|
||||
|
||||
// TODO: Add Pointer.AppendTokens,
|
||||
// but should this take in a ...string or an iter.Seq[string]?
|
||||
|
||||
// Tokens returns an iterator over the reference tokens in the JSON pointer,
|
||||
// starting from the first token until the last token (unless stopped early).
|
||||
func (p Pointer) Tokens() iter.Seq[string] {
|
||||
return func(yield func(string) bool) {
|
||||
for len(p) > 0 {
|
||||
p = Pointer(strings.TrimPrefix(string(p), "/"))
|
||||
i := min(uint(strings.IndexByte(string(p), '/')), uint(len(p)))
|
||||
if !yield(unescapePointerToken(string(p)[:i])) {
|
||||
return
|
||||
}
|
||||
p = p[i:]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func unescapePointerToken(token string) string {
|
||||
if strings.Contains(token, "~") {
|
||||
// Per RFC 6901, section 3, unescape '~' and '/' characters.
|
||||
token = strings.ReplaceAll(token, "~1", "/")
|
||||
token = strings.ReplaceAll(token, "~0", "~")
|
||||
}
|
||||
return token
|
||||
}
|
||||
|
||||
// appendStackPointer appends a JSON Pointer (RFC 6901) to the current value.
|
||||
//
|
||||
// - If where is -1, then it points to the previously processed token.
|
||||
//
|
||||
// - If where is 0, then it points to the parent JSON object or array,
|
||||
// or an object member if in-between an object member key and value.
|
||||
// This is useful when the position is ambiguous whether
|
||||
// we are interested in the previous or next token, or
|
||||
// when we are uncertain whether the next token
|
||||
// continues or terminates the current object or array.
|
||||
//
|
||||
// - If where is +1, then it points to the next expected value,
|
||||
// assuming that it continues the current JSON object or array.
|
||||
// As a special case, if the next token is a JSON object name,
|
||||
// then it points to the parent JSON object.
|
||||
//
|
||||
// Invariant: Must call s.names.copyQuotedBuffer beforehand.
|
||||
func (s state) appendStackPointer(b []byte, where int) []byte {
|
||||
var objectDepth int
|
||||
for i := 1; i < s.Tokens.Depth(); i++ {
|
||||
e := s.Tokens.index(i)
|
||||
arrayDelta := -1 // by default point to previous array element
|
||||
if isLast := i == s.Tokens.Depth()-1; isLast {
|
||||
switch {
|
||||
case where < 0 && e.Length() == 0 || where == 0 && !e.needObjectValue() || where > 0 && e.NeedObjectName():
|
||||
return b
|
||||
case where > 0 && e.isArray():
|
||||
arrayDelta = 0 // point to next array element
|
||||
}
|
||||
}
|
||||
switch {
|
||||
case e.isObject():
|
||||
b = appendEscapePointerName(append(b, '/'), s.Names.getUnquoted(objectDepth))
|
||||
objectDepth++
|
||||
case e.isArray():
|
||||
b = strconv.AppendUint(append(b, '/'), uint64(e.Length()+int64(arrayDelta)), 10)
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func appendEscapePointerName[Bytes ~[]byte | ~string](b []byte, name Bytes) []byte {
|
||||
for _, r := range string(name) {
|
||||
// Per RFC 6901, section 3, escape '~' and '/' characters.
|
||||
switch r {
|
||||
case '~':
|
||||
b = append(b, "~0"...)
|
||||
case '/':
|
||||
b = append(b, "~1"...)
|
||||
default:
|
||||
b = utf8.AppendRune(b, r)
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// stateMachine is a push-down automaton that validates whether
|
||||
// a sequence of tokens is valid or not according to the JSON grammar.
|
||||
// It is useful for both encoding and decoding.
|
||||
//
|
||||
// It is a stack where each entry represents a nested JSON object or array.
|
||||
// The stack has a minimum depth of 1 where the first level is a
|
||||
// virtual JSON array to handle a stream of top-level JSON values.
|
||||
// The top-level virtual JSON array is special in that it doesn't require commas
|
||||
// between each JSON value.
|
||||
//
|
||||
// For performance, most methods are carefully written to be inlinable.
|
||||
// The zero value is a valid state machine ready for use.
|
||||
type stateMachine struct {
|
||||
Stack []stateEntry
|
||||
Last stateEntry
|
||||
}
|
||||
|
||||
// reset resets the state machine.
|
||||
// The machine always starts with a minimum depth of 1.
|
||||
func (m *stateMachine) reset() {
|
||||
m.Stack = m.Stack[:0]
|
||||
if cap(m.Stack) > 1<<10 {
|
||||
m.Stack = nil
|
||||
}
|
||||
m.Last = stateTypeArray
|
||||
}
|
||||
|
||||
// Depth is the current nested depth of JSON objects and arrays.
|
||||
// It is one-indexed (i.e., top-level values have a depth of 1).
|
||||
func (m stateMachine) Depth() int {
|
||||
return len(m.Stack) + 1
|
||||
}
|
||||
|
||||
// index returns a reference to the ith entry.
|
||||
// It is only valid until the next push method call.
|
||||
func (m *stateMachine) index(i int) *stateEntry {
|
||||
if i == len(m.Stack) {
|
||||
return &m.Last
|
||||
}
|
||||
return &m.Stack[i]
|
||||
}
|
||||
|
||||
// DepthLength reports the current nested depth and
|
||||
// the length of the last JSON object or array.
|
||||
func (m stateMachine) DepthLength() (int, int64) {
|
||||
return m.Depth(), m.Last.Length()
|
||||
}
|
||||
|
||||
// appendLiteral appends a JSON literal as the next token in the sequence.
|
||||
// If an error is returned, the state is not mutated.
|
||||
func (m *stateMachine) appendLiteral() error {
|
||||
switch {
|
||||
case m.Last.NeedObjectName():
|
||||
return ErrNonStringName
|
||||
case !m.Last.isValidNamespace():
|
||||
return errInvalidNamespace
|
||||
default:
|
||||
m.Last.Increment()
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// appendString appends a JSON string as the next token in the sequence.
|
||||
// If an error is returned, the state is not mutated.
|
||||
func (m *stateMachine) appendString() error {
|
||||
switch {
|
||||
case !m.Last.isValidNamespace():
|
||||
return errInvalidNamespace
|
||||
default:
|
||||
m.Last.Increment()
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// appendNumber appends a JSON number as the next token in the sequence.
|
||||
// If an error is returned, the state is not mutated.
|
||||
func (m *stateMachine) appendNumber() error {
|
||||
return m.appendLiteral()
|
||||
}
|
||||
|
||||
// pushObject appends a JSON begin object token as next in the sequence.
|
||||
// If an error is returned, the state is not mutated.
|
||||
func (m *stateMachine) pushObject() error {
|
||||
switch {
|
||||
case m.Last.NeedObjectName():
|
||||
return ErrNonStringName
|
||||
case !m.Last.isValidNamespace():
|
||||
return errInvalidNamespace
|
||||
case len(m.Stack) == maxNestingDepth:
|
||||
return errMaxDepth
|
||||
default:
|
||||
m.Last.Increment()
|
||||
m.Stack = append(m.Stack, m.Last)
|
||||
m.Last = stateTypeObject
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// popObject appends a JSON end object token as next in the sequence.
|
||||
// If an error is returned, the state is not mutated.
|
||||
func (m *stateMachine) popObject() error {
|
||||
switch {
|
||||
case !m.Last.isObject():
|
||||
return errMismatchDelim
|
||||
case m.Last.needObjectValue():
|
||||
return errMissingValue
|
||||
case !m.Last.isValidNamespace():
|
||||
return errInvalidNamespace
|
||||
default:
|
||||
m.Last = m.Stack[len(m.Stack)-1]
|
||||
m.Stack = m.Stack[:len(m.Stack)-1]
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// pushArray appends a JSON begin array token as next in the sequence.
|
||||
// If an error is returned, the state is not mutated.
|
||||
func (m *stateMachine) pushArray() error {
|
||||
switch {
|
||||
case m.Last.NeedObjectName():
|
||||
return ErrNonStringName
|
||||
case !m.Last.isValidNamespace():
|
||||
return errInvalidNamespace
|
||||
case len(m.Stack) == maxNestingDepth:
|
||||
return errMaxDepth
|
||||
default:
|
||||
m.Last.Increment()
|
||||
m.Stack = append(m.Stack, m.Last)
|
||||
m.Last = stateTypeArray
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// popArray appends a JSON end array token as next in the sequence.
|
||||
// If an error is returned, the state is not mutated.
|
||||
func (m *stateMachine) popArray() error {
|
||||
switch {
|
||||
case !m.Last.isArray() || len(m.Stack) == 0: // forbid popping top-level virtual JSON array
|
||||
return errMismatchDelim
|
||||
case !m.Last.isValidNamespace():
|
||||
return errInvalidNamespace
|
||||
default:
|
||||
m.Last = m.Stack[len(m.Stack)-1]
|
||||
m.Stack = m.Stack[:len(m.Stack)-1]
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// NeedIndent reports whether indent whitespace should be injected.
|
||||
// A zero value means that no whitespace should be injected.
|
||||
// A positive value means '\n', indentPrefix, and (n-1) copies of indentBody
|
||||
// should be appended to the output immediately before the next token.
|
||||
func (m stateMachine) NeedIndent(next Kind) (n int) {
|
||||
willEnd := next == '}' || next == ']'
|
||||
switch {
|
||||
case m.Depth() == 1:
|
||||
return 0 // top-level values are never indented
|
||||
case m.Last.Length() == 0 && willEnd:
|
||||
return 0 // an empty object or array is never indented
|
||||
case m.Last.Length() == 0 || m.Last.needImplicitComma(next):
|
||||
return m.Depth()
|
||||
case willEnd:
|
||||
return m.Depth() - 1
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// MayAppendDelim appends a colon or comma that may precede the next token.
|
||||
func (m stateMachine) MayAppendDelim(b []byte, next Kind) []byte {
|
||||
switch {
|
||||
case m.Last.needImplicitColon():
|
||||
return append(b, ':')
|
||||
case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
|
||||
return append(b, ',')
|
||||
default:
|
||||
return b
|
||||
}
|
||||
}
|
||||
|
||||
// needDelim reports whether a colon or comma token should be implicitly emitted
|
||||
// before the next token of the specified kind.
|
||||
// A zero value means no delimiter should be emitted.
|
||||
func (m stateMachine) needDelim(next Kind) (delim byte) {
|
||||
switch {
|
||||
case m.Last.needImplicitColon():
|
||||
return ':'
|
||||
case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
|
||||
return ','
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// InvalidateDisabledNamespaces marks all disabled namespaces as invalid.
|
||||
//
|
||||
// For efficiency, Marshal and Unmarshal may disable namespaces since there are
|
||||
// more efficient ways to track duplicate names. However, if an error occurs,
|
||||
// the namespaces in Encoder or Decoder will be left in an inconsistent state.
|
||||
// Mark the namespaces as invalid so that future method calls on
|
||||
// Encoder or Decoder will return an error.
|
||||
func (m *stateMachine) InvalidateDisabledNamespaces() {
|
||||
for i := range m.Depth() {
|
||||
e := m.index(i)
|
||||
if !e.isActiveNamespace() {
|
||||
e.invalidateNamespace()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// stateEntry encodes several artifacts within a single unsigned integer:
|
||||
// - whether this represents a JSON object or array,
|
||||
// - whether this object should check for duplicate names, and
|
||||
// - how many elements are in this JSON object or array.
|
||||
type stateEntry uint64
|
||||
|
||||
const (
|
||||
// The type mask (1 bit) records whether this is a JSON object or array.
|
||||
stateTypeMask stateEntry = 0x8000_0000_0000_0000
|
||||
stateTypeObject stateEntry = 0x8000_0000_0000_0000
|
||||
stateTypeArray stateEntry = 0x0000_0000_0000_0000
|
||||
|
||||
// The name check mask (2 bit) records whether to update
|
||||
// the namespaces for the current JSON object and
|
||||
// whether the namespace is valid.
|
||||
stateNamespaceMask stateEntry = 0x6000_0000_0000_0000
|
||||
stateDisableNamespace stateEntry = 0x4000_0000_0000_0000
|
||||
stateInvalidNamespace stateEntry = 0x2000_0000_0000_0000
|
||||
|
||||
// The count mask (61 bits) records the number of elements.
|
||||
stateCountMask stateEntry = 0x1fff_ffff_ffff_ffff
|
||||
stateCountLSBMask stateEntry = 0x0000_0000_0000_0001
|
||||
stateCountOdd stateEntry = 0x0000_0000_0000_0001
|
||||
stateCountEven stateEntry = 0x0000_0000_0000_0000
|
||||
)
|
||||
|
||||
// Length reports the number of elements in the JSON object or array.
|
||||
// Each name and value in an object entry is treated as a separate element.
|
||||
func (e stateEntry) Length() int64 {
|
||||
return int64(e & stateCountMask)
|
||||
}
|
||||
|
||||
// isObject reports whether this is a JSON object.
|
||||
func (e stateEntry) isObject() bool {
|
||||
return e&stateTypeMask == stateTypeObject
|
||||
}
|
||||
|
||||
// isArray reports whether this is a JSON array.
|
||||
func (e stateEntry) isArray() bool {
|
||||
return e&stateTypeMask == stateTypeArray
|
||||
}
|
||||
|
||||
// NeedObjectName reports whether the next token must be a JSON string,
|
||||
// which is necessary for JSON object names.
|
||||
func (e stateEntry) NeedObjectName() bool {
|
||||
return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven
|
||||
}
|
||||
|
||||
// needImplicitColon reports whether an colon should occur next,
|
||||
// which always occurs after JSON object names.
|
||||
func (e stateEntry) needImplicitColon() bool {
|
||||
return e.needObjectValue()
|
||||
}
|
||||
|
||||
// needObjectValue reports whether the next token must be a JSON value,
|
||||
// which is necessary after every JSON object name.
|
||||
func (e stateEntry) needObjectValue() bool {
|
||||
return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountOdd
|
||||
}
|
||||
|
||||
// needImplicitComma reports whether an comma should occur next,
|
||||
// which always occurs after a value in a JSON object or array
|
||||
// before the next value (or name).
|
||||
func (e stateEntry) needImplicitComma(next Kind) bool {
|
||||
return !e.needObjectValue() && e.Length() > 0 && next != '}' && next != ']'
|
||||
}
|
||||
|
||||
// Increment increments the number of elements for the current object or array.
|
||||
// This assumes that overflow won't practically be an issue since
|
||||
// 1<<bits.OnesCount(stateCountMask) is sufficiently large.
|
||||
func (e *stateEntry) Increment() {
|
||||
(*e)++
|
||||
}
|
||||
|
||||
// decrement decrements the number of elements for the current object or array.
|
||||
// It is the callers responsibility to ensure that e.length > 0.
|
||||
func (e *stateEntry) decrement() {
|
||||
(*e)--
|
||||
}
|
||||
|
||||
// DisableNamespace disables the JSON object namespace such that the
|
||||
// Encoder or Decoder no longer updates the namespace.
|
||||
func (e *stateEntry) DisableNamespace() {
|
||||
*e |= stateDisableNamespace
|
||||
}
|
||||
|
||||
// isActiveNamespace reports whether the JSON object namespace is actively
|
||||
// being updated and used for duplicate name checks.
|
||||
func (e stateEntry) isActiveNamespace() bool {
|
||||
return e&(stateDisableNamespace) == 0
|
||||
}
|
||||
|
||||
// invalidateNamespace marks the JSON object namespace as being invalid.
|
||||
func (e *stateEntry) invalidateNamespace() {
|
||||
*e |= stateInvalidNamespace
|
||||
}
|
||||
|
||||
// isValidNamespace reports whether the JSON object namespace is valid.
|
||||
func (e stateEntry) isValidNamespace() bool {
|
||||
return e&(stateInvalidNamespace) == 0
|
||||
}
|
||||
|
||||
// objectNameStack is a stack of names when descending into a JSON object.
|
||||
// In contrast to objectNamespaceStack, this only has to remember a single name
|
||||
// per JSON object.
|
||||
//
|
||||
// This data structure may contain offsets to encodeBuffer or decodeBuffer.
|
||||
// It violates clean abstraction of layers, but is significantly more efficient.
|
||||
// This ensures that popping and pushing in the common case is a trivial
|
||||
// push/pop of an offset integer.
|
||||
//
|
||||
// The zero value is an empty names stack ready for use.
|
||||
type objectNameStack struct {
|
||||
// offsets is a stack of offsets for each name.
|
||||
// A non-negative offset is the ending offset into the local names buffer.
|
||||
// A negative offset is the bit-wise inverse of a starting offset into
|
||||
// a remote buffer (e.g., encodeBuffer or decodeBuffer).
|
||||
// A math.MinInt offset at the end implies that the last object is empty.
|
||||
// Invariant: Positive offsets always occur before negative offsets.
|
||||
offsets []int
|
||||
// unquotedNames is a back-to-back concatenation of names.
|
||||
unquotedNames []byte
|
||||
}
|
||||
|
||||
func (ns *objectNameStack) reset() {
|
||||
ns.offsets = ns.offsets[:0]
|
||||
ns.unquotedNames = ns.unquotedNames[:0]
|
||||
if cap(ns.offsets) > 1<<6 {
|
||||
ns.offsets = nil // avoid pinning arbitrarily large amounts of memory
|
||||
}
|
||||
if cap(ns.unquotedNames) > 1<<10 {
|
||||
ns.unquotedNames = nil // avoid pinning arbitrarily large amounts of memory
|
||||
}
|
||||
}
|
||||
|
||||
func (ns *objectNameStack) length() int {
|
||||
return len(ns.offsets)
|
||||
}
|
||||
|
||||
// getUnquoted retrieves the ith unquoted name in the stack.
|
||||
// It returns an empty string if the last object is empty.
|
||||
//
|
||||
// Invariant: Must call copyQuotedBuffer beforehand.
|
||||
func (ns *objectNameStack) getUnquoted(i int) []byte {
|
||||
ns.ensureCopiedBuffer()
|
||||
if i == 0 {
|
||||
return ns.unquotedNames[:ns.offsets[0]]
|
||||
} else {
|
||||
return ns.unquotedNames[ns.offsets[i-1]:ns.offsets[i-0]]
|
||||
}
|
||||
}
|
||||
|
||||
// invalidOffset indicates that the last JSON object currently has no name.
|
||||
const invalidOffset = math.MinInt
|
||||
|
||||
// push descends into a nested JSON object.
|
||||
func (ns *objectNameStack) push() {
|
||||
ns.offsets = append(ns.offsets, invalidOffset)
|
||||
}
|
||||
|
||||
// ReplaceLastQuotedOffset replaces the last name with the starting offset
|
||||
// to the quoted name in some remote buffer. All offsets provided must be
|
||||
// relative to the same buffer until copyQuotedBuffer is called.
|
||||
func (ns *objectNameStack) ReplaceLastQuotedOffset(i int) {
|
||||
// Use bit-wise inversion instead of naive multiplication by -1 to avoid
|
||||
// ambiguity regarding zero (which is a valid offset into the names field).
|
||||
// Bit-wise inversion is mathematically equivalent to -i-1,
|
||||
// such that 0 becomes -1, 1 becomes -2, and so forth.
|
||||
// This ensures that remote offsets are always negative.
|
||||
ns.offsets[len(ns.offsets)-1] = ^i
|
||||
}
|
||||
|
||||
// replaceLastUnquotedName replaces the last name with the provided name.
|
||||
//
|
||||
// Invariant: Must call copyQuotedBuffer beforehand.
|
||||
func (ns *objectNameStack) replaceLastUnquotedName(s string) {
|
||||
ns.ensureCopiedBuffer()
|
||||
var startOffset int
|
||||
if len(ns.offsets) > 1 {
|
||||
startOffset = ns.offsets[len(ns.offsets)-2]
|
||||
}
|
||||
ns.unquotedNames = append(ns.unquotedNames[:startOffset], s...)
|
||||
ns.offsets[len(ns.offsets)-1] = len(ns.unquotedNames)
|
||||
}
|
||||
|
||||
// clearLast removes any name in the last JSON object.
|
||||
// It is semantically equivalent to ns.push followed by ns.pop.
|
||||
func (ns *objectNameStack) clearLast() {
|
||||
ns.offsets[len(ns.offsets)-1] = invalidOffset
|
||||
}
|
||||
|
||||
// pop ascends out of a nested JSON object.
|
||||
func (ns *objectNameStack) pop() {
|
||||
ns.offsets = ns.offsets[:len(ns.offsets)-1]
|
||||
}
|
||||
|
||||
// copyQuotedBuffer copies names from the remote buffer into the local names
|
||||
// buffer so that there are no more offset references into the remote buffer.
|
||||
// This allows the remote buffer to change contents without affecting
|
||||
// the names that this data structure is trying to remember.
|
||||
func (ns *objectNameStack) copyQuotedBuffer(b []byte) {
|
||||
// Find the first negative offset.
|
||||
var i int
|
||||
for i = len(ns.offsets) - 1; i >= 0 && ns.offsets[i] < 0; i-- {
|
||||
continue
|
||||
}
|
||||
|
||||
// Copy each name from the remote buffer into the local buffer.
|
||||
for i = i + 1; i < len(ns.offsets); i++ {
|
||||
if i == len(ns.offsets)-1 && ns.offsets[i] == invalidOffset {
|
||||
if i == 0 {
|
||||
ns.offsets[i] = 0
|
||||
} else {
|
||||
ns.offsets[i] = ns.offsets[i-1]
|
||||
}
|
||||
break // last JSON object had a push without any names
|
||||
}
|
||||
|
||||
// As a form of Hyrum proofing, we write an invalid character into the
|
||||
// buffer to make misuse of Decoder.ReadToken more obvious.
|
||||
// We need to undo that mutation here.
|
||||
quotedName := b[^ns.offsets[i]:]
|
||||
if quotedName[0] == invalidateBufferByte {
|
||||
quotedName[0] = '"'
|
||||
}
|
||||
|
||||
// Append the unquoted name to the local buffer.
|
||||
var startOffset int
|
||||
if i > 0 {
|
||||
startOffset = ns.offsets[i-1]
|
||||
}
|
||||
if n := jsonwire.ConsumeSimpleString(quotedName); n > 0 {
|
||||
ns.unquotedNames = append(ns.unquotedNames[:startOffset], quotedName[len(`"`):n-len(`"`)]...)
|
||||
} else {
|
||||
ns.unquotedNames, _ = jsonwire.AppendUnquote(ns.unquotedNames[:startOffset], quotedName)
|
||||
}
|
||||
ns.offsets[i] = len(ns.unquotedNames)
|
||||
}
|
||||
}
|
||||
|
||||
func (ns *objectNameStack) ensureCopiedBuffer() {
|
||||
if len(ns.offsets) > 0 && ns.offsets[len(ns.offsets)-1] < 0 {
|
||||
panic("BUG: copyQuotedBuffer not called beforehand")
|
||||
}
|
||||
}
|
||||
|
||||
// objectNamespaceStack is a stack of object namespaces.
|
||||
// This data structure assists in detecting duplicate names.
|
||||
type objectNamespaceStack []objectNamespace
|
||||
|
||||
// reset resets the object namespace stack.
|
||||
func (nss *objectNamespaceStack) reset() {
|
||||
if cap(*nss) > 1<<10 {
|
||||
*nss = nil
|
||||
}
|
||||
*nss = (*nss)[:0]
|
||||
}
|
||||
|
||||
// push starts a new namespace for a nested JSON object.
|
||||
func (nss *objectNamespaceStack) push() {
|
||||
if cap(*nss) > len(*nss) {
|
||||
*nss = (*nss)[:len(*nss)+1]
|
||||
nss.Last().reset()
|
||||
} else {
|
||||
*nss = append(*nss, objectNamespace{})
|
||||
}
|
||||
}
|
||||
|
||||
// Last returns a pointer to the last JSON object namespace.
|
||||
func (nss objectNamespaceStack) Last() *objectNamespace {
|
||||
return &nss[len(nss)-1]
|
||||
}
|
||||
|
||||
// pop terminates the namespace for a nested JSON object.
|
||||
func (nss *objectNamespaceStack) pop() {
|
||||
*nss = (*nss)[:len(*nss)-1]
|
||||
}
|
||||
|
||||
// objectNamespace is the namespace for a JSON object.
|
||||
// In contrast to objectNameStack, this needs to remember a all names
|
||||
// per JSON object.
|
||||
//
|
||||
// The zero value is an empty namespace ready for use.
|
||||
type objectNamespace struct {
|
||||
// It relies on a linear search over all the names before switching
|
||||
// to use a Go map for direct lookup.
|
||||
|
||||
// endOffsets is a list of offsets to the end of each name in buffers.
|
||||
// The length of offsets is the number of names in the namespace.
|
||||
endOffsets []uint
|
||||
// allUnquotedNames is a back-to-back concatenation of every name in the namespace.
|
||||
allUnquotedNames []byte
|
||||
// mapNames is a Go map containing every name in the namespace.
|
||||
// Only valid if non-nil.
|
||||
mapNames map[string]struct{}
|
||||
}
|
||||
|
||||
// reset resets the namespace to be empty.
|
||||
func (ns *objectNamespace) reset() {
|
||||
ns.endOffsets = ns.endOffsets[:0]
|
||||
ns.allUnquotedNames = ns.allUnquotedNames[:0]
|
||||
ns.mapNames = nil
|
||||
if cap(ns.endOffsets) > 1<<6 {
|
||||
ns.endOffsets = nil // avoid pinning arbitrarily large amounts of memory
|
||||
}
|
||||
if cap(ns.allUnquotedNames) > 1<<10 {
|
||||
ns.allUnquotedNames = nil // avoid pinning arbitrarily large amounts of memory
|
||||
}
|
||||
}
|
||||
|
||||
// length reports the number of names in the namespace.
|
||||
func (ns *objectNamespace) length() int {
|
||||
return len(ns.endOffsets)
|
||||
}
|
||||
|
||||
// getUnquoted retrieves the ith unquoted name in the namespace.
|
||||
func (ns *objectNamespace) getUnquoted(i int) []byte {
|
||||
if i == 0 {
|
||||
return ns.allUnquotedNames[:ns.endOffsets[0]]
|
||||
} else {
|
||||
return ns.allUnquotedNames[ns.endOffsets[i-1]:ns.endOffsets[i-0]]
|
||||
}
|
||||
}
|
||||
|
||||
// lastUnquoted retrieves the last name in the namespace.
|
||||
func (ns *objectNamespace) lastUnquoted() []byte {
|
||||
return ns.getUnquoted(ns.length() - 1)
|
||||
}
|
||||
|
||||
// insertQuoted inserts a name and reports whether it was inserted,
|
||||
// which only occurs if name is not already in the namespace.
|
||||
// The provided name must be a valid JSON string.
|
||||
func (ns *objectNamespace) insertQuoted(name []byte, isVerbatim bool) bool {
|
||||
if isVerbatim {
|
||||
name = name[len(`"`) : len(name)-len(`"`)]
|
||||
}
|
||||
return ns.insert(name, !isVerbatim)
|
||||
}
|
||||
func (ns *objectNamespace) InsertUnquoted(name []byte) bool {
|
||||
return ns.insert(name, false)
|
||||
}
|
||||
func (ns *objectNamespace) insert(name []byte, quoted bool) bool {
|
||||
var allNames []byte
|
||||
if quoted {
|
||||
allNames, _ = jsonwire.AppendUnquote(ns.allUnquotedNames, name)
|
||||
} else {
|
||||
allNames = append(ns.allUnquotedNames, name...)
|
||||
}
|
||||
name = allNames[len(ns.allUnquotedNames):]
|
||||
|
||||
// Switch to a map if the buffer is too large for linear search.
|
||||
// This does not add the current name to the map.
|
||||
if ns.mapNames == nil && (ns.length() > 64 || len(ns.allUnquotedNames) > 1024) {
|
||||
ns.mapNames = make(map[string]struct{})
|
||||
var startOffset uint
|
||||
for _, endOffset := range ns.endOffsets {
|
||||
name := ns.allUnquotedNames[startOffset:endOffset]
|
||||
ns.mapNames[string(name)] = struct{}{} // allocates a new string
|
||||
startOffset = endOffset
|
||||
}
|
||||
}
|
||||
|
||||
if ns.mapNames == nil {
|
||||
// Perform linear search over the buffer to find matching names.
|
||||
// It provides O(n) lookup, but does not require any allocations.
|
||||
var startOffset uint
|
||||
for _, endOffset := range ns.endOffsets {
|
||||
if string(ns.allUnquotedNames[startOffset:endOffset]) == string(name) {
|
||||
return false
|
||||
}
|
||||
startOffset = endOffset
|
||||
}
|
||||
} else {
|
||||
// Use the map if it is populated.
|
||||
// It provides O(1) lookup, but requires a string allocation per name.
|
||||
if _, ok := ns.mapNames[string(name)]; ok {
|
||||
return false
|
||||
}
|
||||
ns.mapNames[string(name)] = struct{}{} // allocates a new string
|
||||
}
|
||||
|
||||
ns.allUnquotedNames = allNames
|
||||
ns.endOffsets = append(ns.endOffsets, uint(len(ns.allUnquotedNames)))
|
||||
return true
|
||||
}
|
||||
|
||||
// removeLast removes the last name in the namespace.
|
||||
func (ns *objectNamespace) removeLast() {
|
||||
if ns.mapNames != nil {
|
||||
delete(ns.mapNames, string(ns.lastUnquoted()))
|
||||
}
|
||||
if ns.length()-1 == 0 {
|
||||
ns.endOffsets = ns.endOffsets[:0]
|
||||
ns.allUnquotedNames = ns.allUnquotedNames[:0]
|
||||
} else {
|
||||
ns.endOffsets = ns.endOffsets[:ns.length()-1]
|
||||
ns.allUnquotedNames = ns.allUnquotedNames[:ns.endOffsets[ns.length()-1]]
|
||||
}
|
||||
}
|
||||
396
pkg/encoders/json/jsontext/state_test.go
Normal file
396
pkg/encoders/json/jsontext/state_test.go
Normal file
@@ -0,0 +1,396 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func TestPointer(t *testing.T) {
|
||||
tests := []struct {
|
||||
in Pointer
|
||||
wantParent Pointer
|
||||
wantLast string
|
||||
wantTokens []string
|
||||
wantValid bool
|
||||
}{
|
||||
{"", "", "", nil, true},
|
||||
{"a", "", "a", []string{"a"}, false},
|
||||
{"~", "", "~", []string{"~"}, false},
|
||||
{"/a", "", "a", []string{"a"}, true},
|
||||
{"/foo/bar", "/foo", "bar", []string{"foo", "bar"}, true},
|
||||
{"///", "//", "", []string{"", "", ""}, true},
|
||||
{"/~0~1", "", "~/", []string{"~/"}, true},
|
||||
{"/\xde\xad\xbe\xef", "", "\xde\xad\xbe\xef", []string{"\xde\xad\xbe\xef"}, false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
if got := tt.in.Parent(); got != tt.wantParent {
|
||||
t.Errorf("Pointer(%q).Parent = %q, want %q", tt.in, got, tt.wantParent)
|
||||
}
|
||||
if got := tt.in.LastToken(); got != tt.wantLast {
|
||||
t.Errorf("Pointer(%q).Last = %q, want %q", tt.in, got, tt.wantLast)
|
||||
}
|
||||
if strings.HasPrefix(string(tt.in), "/") {
|
||||
wantRoundtrip := tt.in
|
||||
if !utf8.ValidString(string(wantRoundtrip)) {
|
||||
// Replace bytes of invalid UTF-8 with Unicode replacement character.
|
||||
wantRoundtrip = Pointer([]rune(wantRoundtrip))
|
||||
}
|
||||
if got := tt.in.Parent().AppendToken(tt.in.LastToken()); got != wantRoundtrip {
|
||||
t.Errorf("Pointer(%q).Parent().AppendToken(LastToken()) = %q, want %q", tt.in, got, tt.in)
|
||||
}
|
||||
in := tt.in
|
||||
for {
|
||||
if (in + "x").Contains(tt.in) {
|
||||
t.Errorf("Pointer(%q).Contains(%q) = true, want false", in+"x", tt.in)
|
||||
}
|
||||
if !in.Contains(tt.in) {
|
||||
t.Errorf("Pointer(%q).Contains(%q) = false, want true", in, tt.in)
|
||||
}
|
||||
if in == in.Parent() {
|
||||
break
|
||||
}
|
||||
in = in.Parent()
|
||||
}
|
||||
}
|
||||
if got := slices.Collect(tt.in.Tokens()); !slices.Equal(got, tt.wantTokens) {
|
||||
t.Errorf("Pointer(%q).Tokens = %q, want %q", tt.in, got, tt.wantTokens)
|
||||
}
|
||||
if got := tt.in.IsValid(); got != tt.wantValid {
|
||||
t.Errorf("Pointer(%q).IsValid = %v, want %v", tt.in, got, tt.wantValid)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestStateMachine(t *testing.T) {
|
||||
// To test a state machine, we pass an ordered sequence of operations and
|
||||
// check whether the current state is as expected.
|
||||
// The operation type is a union type of various possible operations,
|
||||
// which either call mutating methods on the state machine or
|
||||
// call accessor methods on state machine and verify the results.
|
||||
type operation any
|
||||
type (
|
||||
// stackLengths checks the results of stateEntry.length accessors.
|
||||
stackLengths []int64
|
||||
|
||||
// appendTokens is sequence of token kinds to append where
|
||||
// none of them are expected to fail.
|
||||
//
|
||||
// For example: `[nft]` is equivalent to the following sequence:
|
||||
//
|
||||
// pushArray()
|
||||
// appendLiteral()
|
||||
// appendString()
|
||||
// appendNumber()
|
||||
// popArray()
|
||||
//
|
||||
appendTokens string
|
||||
|
||||
// appendToken is a single token kind to append with the expected error.
|
||||
appendToken struct {
|
||||
kind Kind
|
||||
want error
|
||||
}
|
||||
|
||||
// needDelim checks the result of the needDelim accessor.
|
||||
needDelim struct {
|
||||
next Kind
|
||||
want byte
|
||||
}
|
||||
)
|
||||
|
||||
// Each entry is a sequence of tokens to pass to the state machine.
|
||||
tests := []struct {
|
||||
label string
|
||||
ops []operation
|
||||
}{{
|
||||
"TopLevelValues",
|
||||
[]operation{
|
||||
stackLengths{0},
|
||||
needDelim{'n', 0},
|
||||
appendTokens(`nft`),
|
||||
stackLengths{3},
|
||||
needDelim{'"', 0},
|
||||
appendTokens(`"0[]{}`),
|
||||
stackLengths{7},
|
||||
},
|
||||
}, {
|
||||
"ArrayValues",
|
||||
[]operation{
|
||||
stackLengths{0},
|
||||
needDelim{'[', 0},
|
||||
appendTokens(`[`),
|
||||
stackLengths{1, 0},
|
||||
needDelim{'n', 0},
|
||||
appendTokens(`nft`),
|
||||
stackLengths{1, 3},
|
||||
needDelim{'"', ','},
|
||||
appendTokens(`"0[]{}`),
|
||||
stackLengths{1, 7},
|
||||
needDelim{']', 0},
|
||||
appendTokens(`]`),
|
||||
stackLengths{1},
|
||||
},
|
||||
}, {
|
||||
"ObjectValues",
|
||||
[]operation{
|
||||
stackLengths{0},
|
||||
needDelim{'{', 0},
|
||||
appendTokens(`{`),
|
||||
stackLengths{1, 0},
|
||||
needDelim{'"', 0},
|
||||
appendTokens(`"`),
|
||||
stackLengths{1, 1},
|
||||
needDelim{'n', ':'},
|
||||
appendTokens(`n`),
|
||||
stackLengths{1, 2},
|
||||
needDelim{'"', ','},
|
||||
appendTokens(`"f"t`),
|
||||
stackLengths{1, 6},
|
||||
appendTokens(`"""0"[]"{}`),
|
||||
stackLengths{1, 14},
|
||||
needDelim{'}', 0},
|
||||
appendTokens(`}`),
|
||||
stackLengths{1},
|
||||
},
|
||||
}, {
|
||||
"ObjectCardinality",
|
||||
[]operation{
|
||||
appendTokens(`{`),
|
||||
|
||||
// Appending any kind other than string for object name is an error.
|
||||
appendToken{'n', ErrNonStringName},
|
||||
appendToken{'f', ErrNonStringName},
|
||||
appendToken{'t', ErrNonStringName},
|
||||
appendToken{'0', ErrNonStringName},
|
||||
appendToken{'{', ErrNonStringName},
|
||||
appendToken{'[', ErrNonStringName},
|
||||
appendTokens(`"`),
|
||||
|
||||
// Appending '}' without first appending any value is an error.
|
||||
appendToken{'}', errMissingValue},
|
||||
appendTokens(`"`),
|
||||
|
||||
appendTokens(`}`),
|
||||
},
|
||||
}, {
|
||||
"MismatchingDelims",
|
||||
[]operation{
|
||||
appendToken{'}', errMismatchDelim}, // appending '}' without preceding '{'
|
||||
appendTokens(`[[{`),
|
||||
appendToken{']', errMismatchDelim}, // appending ']' that mismatches preceding '{'
|
||||
appendTokens(`}]`),
|
||||
appendToken{'}', errMismatchDelim}, // appending '}' that mismatches preceding '['
|
||||
appendTokens(`]`),
|
||||
appendToken{']', errMismatchDelim}, // appending ']' without preceding '['
|
||||
},
|
||||
}}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.label, func(t *testing.T) {
|
||||
// Flatten appendTokens to sequence of appendToken entries.
|
||||
var ops []operation
|
||||
for _, op := range tt.ops {
|
||||
if toks, ok := op.(appendTokens); ok {
|
||||
for _, k := range []byte(toks) {
|
||||
ops = append(ops, appendToken{Kind(k), nil})
|
||||
}
|
||||
continue
|
||||
}
|
||||
ops = append(ops, op)
|
||||
}
|
||||
|
||||
// Append each token to the state machine and check the output.
|
||||
var state stateMachine
|
||||
state.reset()
|
||||
var sequence []Kind
|
||||
for _, op := range ops {
|
||||
switch op := op.(type) {
|
||||
case stackLengths:
|
||||
var got []int64
|
||||
for i := range state.Depth() {
|
||||
e := state.index(i)
|
||||
got = append(got, e.Length())
|
||||
}
|
||||
want := []int64(op)
|
||||
if !slices.Equal(got, want) {
|
||||
t.Fatalf("%s: stack lengths mismatch:\ngot %v\nwant %v", sequence, got, want)
|
||||
}
|
||||
case appendToken:
|
||||
got := state.append(op.kind)
|
||||
if !equalError(got, op.want) {
|
||||
t.Fatalf("%s: append('%c') = %v, want %v", sequence, op.kind, got, op.want)
|
||||
}
|
||||
if got == nil {
|
||||
sequence = append(sequence, op.kind)
|
||||
}
|
||||
case needDelim:
|
||||
if got := state.needDelim(op.next); got != op.want {
|
||||
t.Fatalf("%s: needDelim('%c') = '%c', want '%c'", sequence, op.next, got, op.want)
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown operation: %T", op))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// append is a thin wrapper over the other append, pop, or push methods
|
||||
// based on the token kind.
|
||||
func (s *stateMachine) append(k Kind) error {
|
||||
switch k {
|
||||
case 'n', 'f', 't':
|
||||
return s.appendLiteral()
|
||||
case '"':
|
||||
return s.appendString()
|
||||
case '0':
|
||||
return s.appendNumber()
|
||||
case '{':
|
||||
return s.pushObject()
|
||||
case '}':
|
||||
return s.popObject()
|
||||
case '[':
|
||||
return s.pushArray()
|
||||
case ']':
|
||||
return s.popArray()
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid token kind: '%c'", k))
|
||||
}
|
||||
}
|
||||
|
||||
func TestObjectNamespace(t *testing.T) {
|
||||
type operation any
|
||||
type (
|
||||
insert struct {
|
||||
name string
|
||||
wantInserted bool
|
||||
}
|
||||
removeLast struct{}
|
||||
)
|
||||
|
||||
// Sequence of insert operations to perform (order matters).
|
||||
ops := []operation{
|
||||
insert{`""`, true},
|
||||
removeLast{},
|
||||
insert{`""`, true},
|
||||
insert{`""`, false},
|
||||
|
||||
// Test insertion of the same name with different formatting.
|
||||
insert{`"alpha"`, true},
|
||||
insert{`"ALPHA"`, true}, // case-sensitive matching
|
||||
insert{`"alpha"`, false},
|
||||
insert{`"\u0061\u006c\u0070\u0068\u0061"`, false}, // unescapes to "alpha"
|
||||
removeLast{}, // removes "ALPHA"
|
||||
insert{`"alpha"`, false},
|
||||
removeLast{}, // removes "alpha"
|
||||
insert{`"alpha"`, true},
|
||||
removeLast{},
|
||||
|
||||
// Bulk insert simple names.
|
||||
insert{`"alpha"`, true},
|
||||
insert{`"bravo"`, true},
|
||||
insert{`"charlie"`, true},
|
||||
insert{`"delta"`, true},
|
||||
insert{`"echo"`, true},
|
||||
insert{`"foxtrot"`, true},
|
||||
insert{`"golf"`, true},
|
||||
insert{`"hotel"`, true},
|
||||
insert{`"india"`, true},
|
||||
insert{`"juliet"`, true},
|
||||
insert{`"kilo"`, true},
|
||||
insert{`"lima"`, true},
|
||||
insert{`"mike"`, true},
|
||||
insert{`"november"`, true},
|
||||
insert{`"oscar"`, true},
|
||||
insert{`"papa"`, true},
|
||||
insert{`"quebec"`, true},
|
||||
insert{`"romeo"`, true},
|
||||
insert{`"sierra"`, true},
|
||||
insert{`"tango"`, true},
|
||||
insert{`"uniform"`, true},
|
||||
insert{`"victor"`, true},
|
||||
insert{`"whiskey"`, true},
|
||||
insert{`"xray"`, true},
|
||||
insert{`"yankee"`, true},
|
||||
insert{`"zulu"`, true},
|
||||
|
||||
// Test insertion of invalid UTF-8.
|
||||
insert{`"` + "\ufffd" + `"`, true},
|
||||
insert{`"` + "\ufffd" + `"`, false},
|
||||
insert{`"\ufffd"`, false}, // unescapes to Unicode replacement character
|
||||
insert{`"\uFFFD"`, false}, // unescapes to Unicode replacement character
|
||||
insert{`"` + "\xff" + `"`, false}, // mangles as Unicode replacement character
|
||||
removeLast{},
|
||||
insert{`"` + "\ufffd" + `"`, true},
|
||||
|
||||
// Test insertion of unicode characters.
|
||||
insert{`"☺☻☹"`, true},
|
||||
insert{`"☺☻☹"`, false},
|
||||
removeLast{},
|
||||
insert{`"☺☻☹"`, true},
|
||||
}
|
||||
|
||||
// Execute the sequence of operations twice:
|
||||
// 1) on a fresh namespace and 2) on a namespace that has been reset.
|
||||
var ns objectNamespace
|
||||
wantNames := []string{}
|
||||
for _, reset := range []bool{false, true} {
|
||||
if reset {
|
||||
ns.reset()
|
||||
wantNames = nil
|
||||
}
|
||||
|
||||
// Execute the operations and ensure the state is consistent.
|
||||
for i, op := range ops {
|
||||
switch op := op.(type) {
|
||||
case insert:
|
||||
gotInserted := ns.insertQuoted([]byte(op.name), false)
|
||||
if gotInserted != op.wantInserted {
|
||||
t.Fatalf("%d: objectNamespace{%v}.insert(%v) = %v, want %v", i, strings.Join(wantNames, " "), op.name, gotInserted, op.wantInserted)
|
||||
}
|
||||
if gotInserted {
|
||||
b, _ := AppendUnquote(nil, []byte(op.name))
|
||||
wantNames = append(wantNames, string(b))
|
||||
}
|
||||
case removeLast:
|
||||
ns.removeLast()
|
||||
wantNames = wantNames[:len(wantNames)-1]
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown operation: %T", op))
|
||||
}
|
||||
|
||||
// Check that the namespace is consistent.
|
||||
gotNames := []string{}
|
||||
for i := range ns.length() {
|
||||
gotNames = append(gotNames, string(ns.getUnquoted(i)))
|
||||
}
|
||||
if !slices.Equal(gotNames, wantNames) {
|
||||
t.Fatalf("%d: objectNamespace = {%v}, want {%v}", i, strings.Join(gotNames, " "), strings.Join(wantNames, " "))
|
||||
}
|
||||
}
|
||||
|
||||
// Verify that we have not switched to using a Go map.
|
||||
if ns.mapNames != nil {
|
||||
t.Errorf("objectNamespace.mapNames = non-nil, want nil")
|
||||
}
|
||||
|
||||
// Insert a large number of names.
|
||||
for i := range 64 {
|
||||
ns.InsertUnquoted([]byte(fmt.Sprintf(`name%d`, i)))
|
||||
}
|
||||
|
||||
// Verify that we did switch to using a Go map.
|
||||
if ns.mapNames == nil {
|
||||
t.Errorf("objectNamespace.mapNames = nil, want non-nil")
|
||||
}
|
||||
}
|
||||
}
|
||||
527
pkg/encoders/json/jsontext/token.go
Normal file
527
pkg/encoders/json/jsontext/token.go
Normal file
@@ -0,0 +1,527 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
"encoding/json/internal/jsonflags"
|
||||
"encoding/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
// NOTE: Token is analogous to v1 json.Token.
|
||||
|
||||
const (
|
||||
maxInt64 = math.MaxInt64
|
||||
minInt64 = math.MinInt64
|
||||
maxUint64 = math.MaxUint64
|
||||
minUint64 = 0 // for consistency and readability purposes
|
||||
|
||||
invalidTokenPanic = "invalid jsontext.Token; it has been voided by a subsequent json.Decoder call"
|
||||
)
|
||||
|
||||
var errInvalidToken = errors.New("invalid jsontext.Token")
|
||||
|
||||
// Token represents a lexical JSON token, which may be one of the following:
|
||||
// - a JSON literal (i.e., null, true, or false)
|
||||
// - a JSON string (e.g., "hello, world!")
|
||||
// - a JSON number (e.g., 123.456)
|
||||
// - a begin or end delimiter for a JSON object (i.e., { or } )
|
||||
// - a begin or end delimiter for a JSON array (i.e., [ or ] )
|
||||
//
|
||||
// A Token cannot represent entire array or object values, while a [Value] can.
|
||||
// There is no Token to represent commas and colons since
|
||||
// these structural tokens can be inferred from the surrounding context.
|
||||
type Token struct {
|
||||
nonComparable
|
||||
|
||||
// Tokens can exist in either a "raw" or an "exact" form.
|
||||
// Tokens produced by the Decoder are in the "raw" form.
|
||||
// Tokens returned by constructors are usually in the "exact" form.
|
||||
// The Encoder accepts Tokens in either the "raw" or "exact" form.
|
||||
//
|
||||
// The following chart shows the possible values for each Token type:
|
||||
// ╔═════════════════╦════════════╤════════════╤════════════╗
|
||||
// ║ Token type ║ raw field │ str field │ num field ║
|
||||
// ╠═════════════════╬════════════╪════════════╪════════════╣
|
||||
// ║ null (raw) ║ "null" │ "" │ 0 ║
|
||||
// ║ false (raw) ║ "false" │ "" │ 0 ║
|
||||
// ║ true (raw) ║ "true" │ "" │ 0 ║
|
||||
// ║ string (raw) ║ non-empty │ "" │ offset ║
|
||||
// ║ string (string) ║ nil │ non-empty │ 0 ║
|
||||
// ║ number (raw) ║ non-empty │ "" │ offset ║
|
||||
// ║ number (float) ║ nil │ "f" │ non-zero ║
|
||||
// ║ number (int64) ║ nil │ "i" │ non-zero ║
|
||||
// ║ number (uint64) ║ nil │ "u" │ non-zero ║
|
||||
// ║ object (delim) ║ "{" or "}" │ "" │ 0 ║
|
||||
// ║ array (delim) ║ "[" or "]" │ "" │ 0 ║
|
||||
// ╚═════════════════╩════════════╧════════════╧════════════╝
|
||||
//
|
||||
// Notes:
|
||||
// - For tokens stored in "raw" form, the num field contains the
|
||||
// absolute offset determined by raw.previousOffsetStart().
|
||||
// The buffer itself is stored in raw.previousBuffer().
|
||||
// - JSON literals and structural characters are always in the "raw" form.
|
||||
// - JSON strings and numbers can be in either "raw" or "exact" forms.
|
||||
// - The exact zero value of JSON strings and numbers in the "exact" forms
|
||||
// have ambiguous representation. Thus, they are always represented
|
||||
// in the "raw" form.
|
||||
|
||||
// raw contains a reference to the raw decode buffer.
|
||||
// If non-nil, then its value takes precedence over str and num.
|
||||
// It is only valid if num == raw.previousOffsetStart().
|
||||
raw *decodeBuffer
|
||||
|
||||
// str is the unescaped JSON string if num is zero.
|
||||
// Otherwise, it is "f", "i", or "u" if num should be interpreted
|
||||
// as a float64, int64, or uint64, respectively.
|
||||
str string
|
||||
|
||||
// num is a float64, int64, or uint64 stored as a uint64 value.
|
||||
// It is non-zero for any JSON number in the "exact" form.
|
||||
num uint64
|
||||
}
|
||||
|
||||
// TODO: Does representing 1-byte delimiters as *decodeBuffer cause performance issues?
|
||||
|
||||
var (
|
||||
Null Token = rawToken("null")
|
||||
False Token = rawToken("false")
|
||||
True Token = rawToken("true")
|
||||
|
||||
BeginObject Token = rawToken("{")
|
||||
EndObject Token = rawToken("}")
|
||||
BeginArray Token = rawToken("[")
|
||||
EndArray Token = rawToken("]")
|
||||
|
||||
zeroString Token = rawToken(`""`)
|
||||
zeroNumber Token = rawToken(`0`)
|
||||
|
||||
nanString Token = String("NaN")
|
||||
pinfString Token = String("Infinity")
|
||||
ninfString Token = String("-Infinity")
|
||||
)
|
||||
|
||||
func rawToken(s string) Token {
|
||||
return Token{raw: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}}
|
||||
}
|
||||
|
||||
// Bool constructs a Token representing a JSON boolean.
|
||||
func Bool(b bool) Token {
|
||||
if b {
|
||||
return True
|
||||
}
|
||||
return False
|
||||
}
|
||||
|
||||
// String constructs a Token representing a JSON string.
|
||||
// The provided string should contain valid UTF-8, otherwise invalid characters
|
||||
// may be mangled as the Unicode replacement character.
|
||||
func String(s string) Token {
|
||||
if len(s) == 0 {
|
||||
return zeroString
|
||||
}
|
||||
return Token{str: s}
|
||||
}
|
||||
|
||||
// Float constructs a Token representing a JSON number.
|
||||
// The values NaN, +Inf, and -Inf will be represented
|
||||
// as a JSON string with the values "NaN", "Infinity", and "-Infinity".
|
||||
func Float(n float64) Token {
|
||||
switch {
|
||||
case math.Float64bits(n) == 0:
|
||||
return zeroNumber
|
||||
case math.IsNaN(n):
|
||||
return nanString
|
||||
case math.IsInf(n, +1):
|
||||
return pinfString
|
||||
case math.IsInf(n, -1):
|
||||
return ninfString
|
||||
}
|
||||
return Token{str: "f", num: math.Float64bits(n)}
|
||||
}
|
||||
|
||||
// Int constructs a Token representing a JSON number from an int64.
|
||||
func Int(n int64) Token {
|
||||
if n == 0 {
|
||||
return zeroNumber
|
||||
}
|
||||
return Token{str: "i", num: uint64(n)}
|
||||
}
|
||||
|
||||
// Uint constructs a Token representing a JSON number from a uint64.
|
||||
func Uint(n uint64) Token {
|
||||
if n == 0 {
|
||||
return zeroNumber
|
||||
}
|
||||
return Token{str: "u", num: uint64(n)}
|
||||
}
|
||||
|
||||
// Clone makes a copy of the Token such that its value remains valid
|
||||
// even after a subsequent [Decoder.Read] call.
|
||||
func (t Token) Clone() Token {
|
||||
// TODO: Allow caller to avoid any allocations?
|
||||
if raw := t.raw; raw != nil {
|
||||
// Avoid copying globals.
|
||||
if t.raw.prevStart == 0 {
|
||||
switch t.raw {
|
||||
case Null.raw:
|
||||
return Null
|
||||
case False.raw:
|
||||
return False
|
||||
case True.raw:
|
||||
return True
|
||||
case BeginObject.raw:
|
||||
return BeginObject
|
||||
case EndObject.raw:
|
||||
return EndObject
|
||||
case BeginArray.raw:
|
||||
return BeginArray
|
||||
case EndArray.raw:
|
||||
return EndArray
|
||||
}
|
||||
}
|
||||
|
||||
if uint64(raw.previousOffsetStart()) != t.num {
|
||||
panic(invalidTokenPanic)
|
||||
}
|
||||
buf := bytes.Clone(raw.previousBuffer())
|
||||
return Token{raw: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}}
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// Bool returns the value for a JSON boolean.
|
||||
// It panics if the token kind is not a JSON boolean.
|
||||
func (t Token) Bool() bool {
|
||||
switch t.raw {
|
||||
case True.raw:
|
||||
return true
|
||||
case False.raw:
|
||||
return false
|
||||
default:
|
||||
panic("invalid JSON token kind: " + t.Kind().String())
|
||||
}
|
||||
}
|
||||
|
||||
// appendString appends a JSON string to dst and returns it.
|
||||
// It panics if t is not a JSON string.
|
||||
func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
|
||||
if raw := t.raw; raw != nil {
|
||||
// Handle raw string value.
|
||||
buf := raw.previousBuffer()
|
||||
if Kind(buf[0]) == '"' {
|
||||
if jsonwire.ConsumeSimpleString(buf) == len(buf) {
|
||||
return append(dst, buf...), nil
|
||||
}
|
||||
dst, _, err := jsonwire.ReformatString(dst, buf, flags)
|
||||
return dst, err
|
||||
}
|
||||
} else if len(t.str) != 0 && t.num == 0 {
|
||||
// Handle exact string value.
|
||||
return jsonwire.AppendQuote(dst, t.str, flags)
|
||||
}
|
||||
|
||||
panic("invalid JSON token kind: " + t.Kind().String())
|
||||
}
|
||||
|
||||
// String returns the unescaped string value for a JSON string.
|
||||
// For other JSON kinds, this returns the raw JSON representation.
|
||||
func (t Token) String() string {
|
||||
// This is inlinable to take advantage of "function outlining".
|
||||
// This avoids an allocation for the string(b) conversion
|
||||
// if the caller does not use the string in an escaping manner.
|
||||
// See https://blog.filippo.io/efficient-go-apis-with-the-inliner/
|
||||
s, b := t.string()
|
||||
if len(b) > 0 {
|
||||
return string(b)
|
||||
}
|
||||
return s
|
||||
}
|
||||
func (t Token) string() (string, []byte) {
|
||||
if raw := t.raw; raw != nil {
|
||||
if uint64(raw.previousOffsetStart()) != t.num {
|
||||
panic(invalidTokenPanic)
|
||||
}
|
||||
buf := raw.previousBuffer()
|
||||
if buf[0] == '"' {
|
||||
// TODO: Preserve ValueFlags in Token?
|
||||
isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf)
|
||||
return "", jsonwire.UnquoteMayCopy(buf, isVerbatim)
|
||||
}
|
||||
// Handle tokens that are not JSON strings for fmt.Stringer.
|
||||
return "", buf
|
||||
}
|
||||
if len(t.str) != 0 && t.num == 0 {
|
||||
return t.str, nil
|
||||
}
|
||||
// Handle tokens that are not JSON strings for fmt.Stringer.
|
||||
if t.num > 0 {
|
||||
switch t.str[0] {
|
||||
case 'f':
|
||||
return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.num), 64)), nil
|
||||
case 'i':
|
||||
return strconv.FormatInt(int64(t.num), 10), nil
|
||||
case 'u':
|
||||
return strconv.FormatUint(uint64(t.num), 10), nil
|
||||
}
|
||||
}
|
||||
return "<invalid jsontext.Token>", nil
|
||||
}
|
||||
|
||||
// appendNumber appends a JSON number to dst and returns it.
|
||||
// It panics if t is not a JSON number.
|
||||
func (t Token) appendNumber(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
|
||||
if raw := t.raw; raw != nil {
|
||||
// Handle raw number value.
|
||||
buf := raw.previousBuffer()
|
||||
if Kind(buf[0]).normalize() == '0' {
|
||||
dst, _, err := jsonwire.ReformatNumber(dst, buf, flags)
|
||||
return dst, err
|
||||
}
|
||||
} else if t.num != 0 {
|
||||
// Handle exact number value.
|
||||
switch t.str[0] {
|
||||
case 'f':
|
||||
return jsonwire.AppendFloat(dst, math.Float64frombits(t.num), 64), nil
|
||||
case 'i':
|
||||
return strconv.AppendInt(dst, int64(t.num), 10), nil
|
||||
case 'u':
|
||||
return strconv.AppendUint(dst, uint64(t.num), 10), nil
|
||||
}
|
||||
}
|
||||
|
||||
panic("invalid JSON token kind: " + t.Kind().String())
|
||||
}
|
||||
|
||||
// Float returns the floating-point value for a JSON number.
|
||||
// It returns a NaN, +Inf, or -Inf value for any JSON string
|
||||
// with the values "NaN", "Infinity", or "-Infinity".
|
||||
// It panics for all other cases.
|
||||
func (t Token) Float() float64 {
|
||||
if raw := t.raw; raw != nil {
|
||||
// Handle raw number value.
|
||||
if uint64(raw.previousOffsetStart()) != t.num {
|
||||
panic(invalidTokenPanic)
|
||||
}
|
||||
buf := raw.previousBuffer()
|
||||
if Kind(buf[0]).normalize() == '0' {
|
||||
fv, _ := jsonwire.ParseFloat(buf, 64)
|
||||
return fv
|
||||
}
|
||||
} else if t.num != 0 {
|
||||
// Handle exact number value.
|
||||
switch t.str[0] {
|
||||
case 'f':
|
||||
return math.Float64frombits(t.num)
|
||||
case 'i':
|
||||
return float64(int64(t.num))
|
||||
case 'u':
|
||||
return float64(uint64(t.num))
|
||||
}
|
||||
}
|
||||
|
||||
// Handle string values with "NaN", "Infinity", or "-Infinity".
|
||||
if t.Kind() == '"' {
|
||||
switch t.String() {
|
||||
case "NaN":
|
||||
return math.NaN()
|
||||
case "Infinity":
|
||||
return math.Inf(+1)
|
||||
case "-Infinity":
|
||||
return math.Inf(-1)
|
||||
}
|
||||
}
|
||||
|
||||
panic("invalid JSON token kind: " + t.Kind().String())
|
||||
}
|
||||
|
||||
// Int returns the signed integer value for a JSON number.
|
||||
// The fractional component of any number is ignored (truncation toward zero).
|
||||
// Any number beyond the representation of an int64 will be saturated
|
||||
// to the closest representable value.
|
||||
// It panics if the token kind is not a JSON number.
|
||||
func (t Token) Int() int64 {
|
||||
if raw := t.raw; raw != nil {
|
||||
// Handle raw integer value.
|
||||
if uint64(raw.previousOffsetStart()) != t.num {
|
||||
panic(invalidTokenPanic)
|
||||
}
|
||||
neg := false
|
||||
buf := raw.previousBuffer()
|
||||
if len(buf) > 0 && buf[0] == '-' {
|
||||
neg, buf = true, buf[1:]
|
||||
}
|
||||
if numAbs, ok := jsonwire.ParseUint(buf); ok {
|
||||
if neg {
|
||||
if numAbs > -minInt64 {
|
||||
return minInt64
|
||||
}
|
||||
return -1 * int64(numAbs)
|
||||
} else {
|
||||
if numAbs > +maxInt64 {
|
||||
return maxInt64
|
||||
}
|
||||
return +1 * int64(numAbs)
|
||||
}
|
||||
}
|
||||
} else if t.num != 0 {
|
||||
// Handle exact integer value.
|
||||
switch t.str[0] {
|
||||
case 'i':
|
||||
return int64(t.num)
|
||||
case 'u':
|
||||
if t.num > maxInt64 {
|
||||
return maxInt64
|
||||
}
|
||||
return int64(t.num)
|
||||
}
|
||||
}
|
||||
|
||||
// Handle JSON number that is a floating-point value.
|
||||
if t.Kind() == '0' {
|
||||
switch fv := t.Float(); {
|
||||
case fv >= maxInt64:
|
||||
return maxInt64
|
||||
case fv <= minInt64:
|
||||
return minInt64
|
||||
default:
|
||||
return int64(fv) // truncation toward zero
|
||||
}
|
||||
}
|
||||
|
||||
panic("invalid JSON token kind: " + t.Kind().String())
|
||||
}
|
||||
|
||||
// Uint returns the unsigned integer value for a JSON number.
|
||||
// The fractional component of any number is ignored (truncation toward zero).
|
||||
// Any number beyond the representation of an uint64 will be saturated
|
||||
// to the closest representable value.
|
||||
// It panics if the token kind is not a JSON number.
|
||||
func (t Token) Uint() uint64 {
|
||||
// NOTE: This accessor returns 0 for any negative JSON number,
|
||||
// which might be surprising, but is at least consistent with the behavior
|
||||
// of saturating out-of-bounds numbers to the closest representable number.
|
||||
|
||||
if raw := t.raw; raw != nil {
|
||||
// Handle raw integer value.
|
||||
if uint64(raw.previousOffsetStart()) != t.num {
|
||||
panic(invalidTokenPanic)
|
||||
}
|
||||
neg := false
|
||||
buf := raw.previousBuffer()
|
||||
if len(buf) > 0 && buf[0] == '-' {
|
||||
neg, buf = true, buf[1:]
|
||||
}
|
||||
if num, ok := jsonwire.ParseUint(buf); ok {
|
||||
if neg {
|
||||
return minUint64
|
||||
}
|
||||
return num
|
||||
}
|
||||
} else if t.num != 0 {
|
||||
// Handle exact integer value.
|
||||
switch t.str[0] {
|
||||
case 'u':
|
||||
return t.num
|
||||
case 'i':
|
||||
if int64(t.num) < minUint64 {
|
||||
return minUint64
|
||||
}
|
||||
return uint64(int64(t.num))
|
||||
}
|
||||
}
|
||||
|
||||
// Handle JSON number that is a floating-point value.
|
||||
if t.Kind() == '0' {
|
||||
switch fv := t.Float(); {
|
||||
case fv >= maxUint64:
|
||||
return maxUint64
|
||||
case fv <= minUint64:
|
||||
return minUint64
|
||||
default:
|
||||
return uint64(fv) // truncation toward zero
|
||||
}
|
||||
}
|
||||
|
||||
panic("invalid JSON token kind: " + t.Kind().String())
|
||||
}
|
||||
|
||||
// Kind returns the token kind.
|
||||
func (t Token) Kind() Kind {
|
||||
switch {
|
||||
case t.raw != nil:
|
||||
raw := t.raw
|
||||
if uint64(raw.previousOffsetStart()) != t.num {
|
||||
panic(invalidTokenPanic)
|
||||
}
|
||||
return Kind(t.raw.buf[raw.prevStart]).normalize()
|
||||
case t.num != 0:
|
||||
return '0'
|
||||
case len(t.str) != 0:
|
||||
return '"'
|
||||
default:
|
||||
return invalidKind
|
||||
}
|
||||
}
|
||||
|
||||
// Kind represents each possible JSON token kind with a single byte,
|
||||
// which is conveniently the first byte of that kind's grammar
|
||||
// with the restriction that numbers always be represented with '0':
|
||||
//
|
||||
// - 'n': null
|
||||
// - 'f': false
|
||||
// - 't': true
|
||||
// - '"': string
|
||||
// - '0': number
|
||||
// - '{': object begin
|
||||
// - '}': object end
|
||||
// - '[': array begin
|
||||
// - ']': array end
|
||||
//
|
||||
// An invalid kind is usually represented using 0,
|
||||
// but may be non-zero due to invalid JSON data.
|
||||
type Kind byte
|
||||
|
||||
const invalidKind Kind = 0
|
||||
|
||||
// String prints the kind in a humanly readable fashion.
|
||||
func (k Kind) String() string {
|
||||
switch k {
|
||||
case 'n':
|
||||
return "null"
|
||||
case 'f':
|
||||
return "false"
|
||||
case 't':
|
||||
return "true"
|
||||
case '"':
|
||||
return "string"
|
||||
case '0':
|
||||
return "number"
|
||||
case '{':
|
||||
return "{"
|
||||
case '}':
|
||||
return "}"
|
||||
case '[':
|
||||
return "["
|
||||
case ']':
|
||||
return "]"
|
||||
default:
|
||||
return "<invalid jsontext.Kind: " + jsonwire.QuoteRune(string(k)) + ">"
|
||||
}
|
||||
}
|
||||
|
||||
// normalize coalesces all possible starting characters of a number as just '0'.
|
||||
func (k Kind) normalize() Kind {
|
||||
if k == '-' || ('0' <= k && k <= '9') {
|
||||
return '0'
|
||||
}
|
||||
return k
|
||||
}
|
||||
168
pkg/encoders/json/jsontext/token_test.go
Normal file
168
pkg/encoders/json/jsontext/token_test.go
Normal file
@@ -0,0 +1,168 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"math"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTokenStringAllocations(t *testing.T) {
|
||||
if testing.CoverMode() != "" {
|
||||
t.Skip("coverage mode breaks the compiler optimization this depends on")
|
||||
}
|
||||
|
||||
tok := rawToken(`"hello"`)
|
||||
var m map[string]bool
|
||||
got := int(testing.AllocsPerRun(10, func() {
|
||||
// This function uses tok.String() is a non-escaping manner
|
||||
// (i.e., looking it up in a Go map). It should not allocate.
|
||||
if m[tok.String()] {
|
||||
panic("never executed")
|
||||
}
|
||||
}))
|
||||
if got > 0 {
|
||||
t.Errorf("Token.String allocated %d times, want 0", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokenAccessors(t *testing.T) {
|
||||
type token struct {
|
||||
Bool bool
|
||||
String string
|
||||
Float float64
|
||||
Int int64
|
||||
Uint uint64
|
||||
Kind Kind
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
in Token
|
||||
want token
|
||||
}{
|
||||
{Token{}, token{String: "<invalid jsontext.Token>"}},
|
||||
{Null, token{String: "null", Kind: 'n'}},
|
||||
{False, token{Bool: false, String: "false", Kind: 'f'}},
|
||||
{True, token{Bool: true, String: "true", Kind: 't'}},
|
||||
{Bool(false), token{Bool: false, String: "false", Kind: 'f'}},
|
||||
{Bool(true), token{Bool: true, String: "true", Kind: 't'}},
|
||||
{BeginObject, token{String: "{", Kind: '{'}},
|
||||
{EndObject, token{String: "}", Kind: '}'}},
|
||||
{BeginArray, token{String: "[", Kind: '['}},
|
||||
{EndArray, token{String: "]", Kind: ']'}},
|
||||
{String(""), token{String: "", Kind: '"'}},
|
||||
{String("hello, world!"), token{String: "hello, world!", Kind: '"'}},
|
||||
{rawToken(`"hello, world!"`), token{String: "hello, world!", Kind: '"'}},
|
||||
{Float(0), token{String: "0", Float: 0, Int: 0, Uint: 0, Kind: '0'}},
|
||||
{Float(math.Copysign(0, -1)), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}},
|
||||
{Float(math.NaN()), token{String: "NaN", Float: math.NaN(), Int: 0, Uint: 0, Kind: '"'}},
|
||||
{Float(math.Inf(+1)), token{String: "Infinity", Float: math.Inf(+1), Kind: '"'}},
|
||||
{Float(math.Inf(-1)), token{String: "-Infinity", Float: math.Inf(-1), Kind: '"'}},
|
||||
{Int(minInt64), token{String: "-9223372036854775808", Float: minInt64, Int: minInt64, Uint: minUint64, Kind: '0'}},
|
||||
{Int(minInt64 + 1), token{String: "-9223372036854775807", Float: minInt64 + 1, Int: minInt64 + 1, Uint: minUint64, Kind: '0'}},
|
||||
{Int(-1), token{String: "-1", Float: -1, Int: -1, Uint: minUint64, Kind: '0'}},
|
||||
{Int(0), token{String: "0", Float: 0, Int: 0, Uint: 0, Kind: '0'}},
|
||||
{Int(+1), token{String: "1", Float: +1, Int: +1, Uint: +1, Kind: '0'}},
|
||||
{Int(maxInt64 - 1), token{String: "9223372036854775806", Float: maxInt64 - 1, Int: maxInt64 - 1, Uint: maxInt64 - 1, Kind: '0'}},
|
||||
{Int(maxInt64), token{String: "9223372036854775807", Float: maxInt64, Int: maxInt64, Uint: maxInt64, Kind: '0'}},
|
||||
{Uint(minUint64), token{String: "0", Kind: '0'}},
|
||||
{Uint(minUint64 + 1), token{String: "1", Float: minUint64 + 1, Int: minUint64 + 1, Uint: minUint64 + 1, Kind: '0'}},
|
||||
{Uint(maxUint64 - 1), token{String: "18446744073709551614", Float: maxUint64 - 1, Int: maxInt64, Uint: maxUint64 - 1, Kind: '0'}},
|
||||
{Uint(maxUint64), token{String: "18446744073709551615", Float: maxUint64, Int: maxInt64, Uint: maxUint64, Kind: '0'}},
|
||||
{rawToken(`-0`), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}},
|
||||
{rawToken(`1e1000`), token{String: "1e1000", Float: math.MaxFloat64, Int: maxInt64, Uint: maxUint64, Kind: '0'}},
|
||||
{rawToken(`-1e1000`), token{String: "-1e1000", Float: -math.MaxFloat64, Int: minInt64, Uint: minUint64, Kind: '0'}},
|
||||
{rawToken(`0.1`), token{String: "0.1", Float: 0.1, Int: 0, Uint: 0, Kind: '0'}},
|
||||
{rawToken(`0.5`), token{String: "0.5", Float: 0.5, Int: 0, Uint: 0, Kind: '0'}},
|
||||
{rawToken(`0.9`), token{String: "0.9", Float: 0.9, Int: 0, Uint: 0, Kind: '0'}},
|
||||
{rawToken(`1.1`), token{String: "1.1", Float: 1.1, Int: 1, Uint: 1, Kind: '0'}},
|
||||
{rawToken(`-0.1`), token{String: "-0.1", Float: -0.1, Int: 0, Uint: 0, Kind: '0'}},
|
||||
{rawToken(`-0.5`), token{String: "-0.5", Float: -0.5, Int: 0, Uint: 0, Kind: '0'}},
|
||||
{rawToken(`-0.9`), token{String: "-0.9", Float: -0.9, Int: 0, Uint: 0, Kind: '0'}},
|
||||
{rawToken(`-1.1`), token{String: "-1.1", Float: -1.1, Int: -1, Uint: 0, Kind: '0'}},
|
||||
{rawToken(`99999999999999999999`), token{String: "99999999999999999999", Float: 1e20 - 1, Int: maxInt64, Uint: maxUint64, Kind: '0'}},
|
||||
{rawToken(`-99999999999999999999`), token{String: "-99999999999999999999", Float: -1e20 - 1, Int: minInt64, Uint: minUint64, Kind: '0'}},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("", func(t *testing.T) {
|
||||
got := token{
|
||||
Bool: func() bool {
|
||||
defer func() { recover() }()
|
||||
return tt.in.Bool()
|
||||
}(),
|
||||
String: tt.in.String(),
|
||||
Float: func() float64 {
|
||||
defer func() { recover() }()
|
||||
return tt.in.Float()
|
||||
}(),
|
||||
Int: func() int64 {
|
||||
defer func() { recover() }()
|
||||
return tt.in.Int()
|
||||
}(),
|
||||
Uint: func() uint64 {
|
||||
defer func() { recover() }()
|
||||
return tt.in.Uint()
|
||||
}(),
|
||||
Kind: tt.in.Kind(),
|
||||
}
|
||||
|
||||
if got.Bool != tt.want.Bool {
|
||||
t.Errorf("Token(%s).Bool() = %v, want %v", tt.in, got.Bool, tt.want.Bool)
|
||||
}
|
||||
if got.String != tt.want.String {
|
||||
t.Errorf("Token(%s).String() = %v, want %v", tt.in, got.String, tt.want.String)
|
||||
}
|
||||
if math.Float64bits(got.Float) != math.Float64bits(tt.want.Float) {
|
||||
t.Errorf("Token(%s).Float() = %v, want %v", tt.in, got.Float, tt.want.Float)
|
||||
}
|
||||
if got.Int != tt.want.Int {
|
||||
t.Errorf("Token(%s).Int() = %v, want %v", tt.in, got.Int, tt.want.Int)
|
||||
}
|
||||
if got.Uint != tt.want.Uint {
|
||||
t.Errorf("Token(%s).Uint() = %v, want %v", tt.in, got.Uint, tt.want.Uint)
|
||||
}
|
||||
if got.Kind != tt.want.Kind {
|
||||
t.Errorf("Token(%s).Kind() = %v, want %v", tt.in, got.Kind, tt.want.Kind)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokenClone(t *testing.T) {
|
||||
tests := []struct {
|
||||
in Token
|
||||
wantExactRaw bool
|
||||
}{
|
||||
{Token{}, true},
|
||||
{Null, true},
|
||||
{False, true},
|
||||
{True, true},
|
||||
{BeginObject, true},
|
||||
{EndObject, true},
|
||||
{BeginArray, true},
|
||||
{EndArray, true},
|
||||
{String("hello, world!"), true},
|
||||
{rawToken(`"hello, world!"`), false},
|
||||
{Float(3.14159), true},
|
||||
{rawToken(`3.14159`), false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("", func(t *testing.T) {
|
||||
got := tt.in.Clone()
|
||||
if !reflect.DeepEqual(got, tt.in) {
|
||||
t.Errorf("Token(%s) == Token(%s).Clone() = false, want true", tt.in, tt.in)
|
||||
}
|
||||
gotExactRaw := got.raw == tt.in.raw
|
||||
if gotExactRaw != tt.wantExactRaw {
|
||||
t.Errorf("Token(%s).raw == Token(%s).Clone().raw = %v, want %v", tt.in, tt.in, gotExactRaw, tt.wantExactRaw)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
395
pkg/encoders/json/jsontext/value.go
Normal file
395
pkg/encoders/json/jsontext/value.go
Normal file
@@ -0,0 +1,395 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"io"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
"encoding/json/internal/jsonflags"
|
||||
"encoding/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
// NOTE: Value is analogous to v1 json.RawMessage.
|
||||
|
||||
// AppendFormat formats the JSON value in src and appends it to dst
|
||||
// according to the specified options.
|
||||
// See [Value.Format] for more details about the formatting behavior.
|
||||
//
|
||||
// The dst and src may overlap.
|
||||
// If an error is reported, then the entirety of src is appended to dst.
|
||||
func AppendFormat(dst, src []byte, opts ...Options) ([]byte, error) {
|
||||
e := getBufferedEncoder(opts...)
|
||||
defer putBufferedEncoder(e)
|
||||
e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
|
||||
if err := e.s.WriteValue(src); err != nil {
|
||||
return append(dst, src...), err
|
||||
}
|
||||
return append(dst, e.s.Buf...), nil
|
||||
}
|
||||
|
||||
// Value represents a single raw JSON value, which may be one of the following:
|
||||
// - a JSON literal (i.e., null, true, or false)
|
||||
// - a JSON string (e.g., "hello, world!")
|
||||
// - a JSON number (e.g., 123.456)
|
||||
// - an entire JSON object (e.g., {"fizz":"buzz"} )
|
||||
// - an entire JSON array (e.g., [1,2,3] )
|
||||
//
|
||||
// Value can represent entire array or object values, while [Token] cannot.
|
||||
// Value may contain leading and/or trailing whitespace.
|
||||
type Value []byte
|
||||
|
||||
// Clone returns a copy of v.
|
||||
func (v Value) Clone() Value {
|
||||
return bytes.Clone(v)
|
||||
}
|
||||
|
||||
// String returns the string formatting of v.
|
||||
func (v Value) String() string {
|
||||
if v == nil {
|
||||
return "null"
|
||||
}
|
||||
return string(v)
|
||||
}
|
||||
|
||||
// IsValid reports whether the raw JSON value is syntactically valid
|
||||
// according to the specified options.
|
||||
//
|
||||
// By default (if no options are specified), it validates according to RFC 7493.
|
||||
// It verifies whether the input is properly encoded as UTF-8,
|
||||
// that escape sequences within strings decode to valid Unicode codepoints, and
|
||||
// that all names in each object are unique.
|
||||
// It does not verify whether numbers are representable within the limits
|
||||
// of any common numeric type (e.g., float64, int64, or uint64).
|
||||
//
|
||||
// Relevant options include:
|
||||
// - [AllowDuplicateNames]
|
||||
// - [AllowInvalidUTF8]
|
||||
//
|
||||
// All other options are ignored.
|
||||
func (v Value) IsValid(opts ...Options) bool {
|
||||
// TODO: Document support for [WithByteLimit] and [WithDepthLimit].
|
||||
d := getBufferedDecoder(v, opts...)
|
||||
defer putBufferedDecoder(d)
|
||||
_, errVal := d.ReadValue()
|
||||
_, errEOF := d.ReadToken()
|
||||
return errVal == nil && errEOF == io.EOF
|
||||
}
|
||||
|
||||
// Format formats the raw JSON value in place.
|
||||
//
|
||||
// By default (if no options are specified), it validates according to RFC 7493
|
||||
// and produces the minimal JSON representation, where
|
||||
// all whitespace is elided and JSON strings use the shortest encoding.
|
||||
//
|
||||
// Relevant options include:
|
||||
// - [AllowDuplicateNames]
|
||||
// - [AllowInvalidUTF8]
|
||||
// - [EscapeForHTML]
|
||||
// - [EscapeForJS]
|
||||
// - [PreserveRawStrings]
|
||||
// - [CanonicalizeRawInts]
|
||||
// - [CanonicalizeRawFloats]
|
||||
// - [ReorderRawObjects]
|
||||
// - [SpaceAfterColon]
|
||||
// - [SpaceAfterComma]
|
||||
// - [Multiline]
|
||||
// - [WithIndent]
|
||||
// - [WithIndentPrefix]
|
||||
//
|
||||
// All other options are ignored.
|
||||
//
|
||||
// It is guaranteed to succeed if the value is valid according to the same options.
|
||||
// If the value is already formatted, then the buffer is not mutated.
|
||||
func (v *Value) Format(opts ...Options) error {
|
||||
// TODO: Document support for [WithByteLimit] and [WithDepthLimit].
|
||||
return v.format(opts, nil)
|
||||
}
|
||||
|
||||
// format accepts two []Options to avoid the allocation appending them together.
|
||||
// It is equivalent to v.Format(append(opts1, opts2...)...).
|
||||
func (v *Value) format(opts1, opts2 []Options) error {
|
||||
e := getBufferedEncoder(opts1...)
|
||||
defer putBufferedEncoder(e)
|
||||
e.s.Join(opts2...)
|
||||
e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
|
||||
if err := e.s.WriteValue(*v); err != nil {
|
||||
return err
|
||||
}
|
||||
if !bytes.Equal(*v, e.s.Buf) {
|
||||
*v = append((*v)[:0], e.s.Buf...)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Compact removes all whitespace from the raw JSON value.
|
||||
//
|
||||
// It does not reformat JSON strings or numbers to use any other representation.
|
||||
// To maximize the set of JSON values that can be formatted,
|
||||
// this permits values with duplicate names and invalid UTF-8.
|
||||
//
|
||||
// Compact is equivalent to calling [Value.Format] with the following options:
|
||||
// - [AllowDuplicateNames](true)
|
||||
// - [AllowInvalidUTF8](true)
|
||||
// - [PreserveRawStrings](true)
|
||||
//
|
||||
// Any options specified by the caller are applied after the initial set
|
||||
// and may deliberately override prior options.
|
||||
func (v *Value) Compact(opts ...Options) error {
|
||||
return v.format([]Options{
|
||||
AllowDuplicateNames(true),
|
||||
AllowInvalidUTF8(true),
|
||||
PreserveRawStrings(true),
|
||||
}, opts)
|
||||
}
|
||||
|
||||
// Indent reformats the whitespace in the raw JSON value so that each element
|
||||
// in a JSON object or array begins on a indented line according to the nesting.
|
||||
//
|
||||
// It does not reformat JSON strings or numbers to use any other representation.
|
||||
// To maximize the set of JSON values that can be formatted,
|
||||
// this permits values with duplicate names and invalid UTF-8.
|
||||
//
|
||||
// Indent is equivalent to calling [Value.Format] with the following options:
|
||||
// - [AllowDuplicateNames](true)
|
||||
// - [AllowInvalidUTF8](true)
|
||||
// - [PreserveRawStrings](true)
|
||||
// - [Multiline](true)
|
||||
//
|
||||
// Any options specified by the caller are applied after the initial set
|
||||
// and may deliberately override prior options.
|
||||
func (v *Value) Indent(opts ...Options) error {
|
||||
return v.format([]Options{
|
||||
AllowDuplicateNames(true),
|
||||
AllowInvalidUTF8(true),
|
||||
PreserveRawStrings(true),
|
||||
Multiline(true),
|
||||
}, opts)
|
||||
}
|
||||
|
||||
// Canonicalize canonicalizes the raw JSON value according to the
|
||||
// JSON Canonicalization Scheme (JCS) as defined by RFC 8785
|
||||
// where it produces a stable representation of a JSON value.
|
||||
//
|
||||
// JSON strings are formatted to use their minimal representation,
|
||||
// JSON numbers are formatted as double precision numbers according
|
||||
// to some stable serialization algorithm.
|
||||
// JSON object members are sorted in ascending order by name.
|
||||
// All whitespace is removed.
|
||||
//
|
||||
// The output stability is dependent on the stability of the application data
|
||||
// (see RFC 8785, Appendix E). It cannot produce stable output from
|
||||
// fundamentally unstable input. For example, if the JSON value
|
||||
// contains ephemeral data (e.g., a frequently changing timestamp),
|
||||
// then the value is still unstable regardless of whether this is called.
|
||||
//
|
||||
// Canonicalize is equivalent to calling [Value.Format] with the following options:
|
||||
// - [CanonicalizeRawInts](true)
|
||||
// - [CanonicalizeRawFloats](true)
|
||||
// - [ReorderRawObjects](true)
|
||||
//
|
||||
// Any options specified by the caller are applied after the initial set
|
||||
// and may deliberately override prior options.
|
||||
//
|
||||
// Note that JCS treats all JSON numbers as IEEE 754 double precision numbers.
|
||||
// Any numbers with precision beyond what is representable by that form
|
||||
// will lose their precision when canonicalized. For example, integer values
|
||||
// beyond ±2⁵³ will lose their precision. To preserve the original representation
|
||||
// of JSON integers, additionally set [CanonicalizeRawInts] to false:
|
||||
//
|
||||
// v.Canonicalize(jsontext.CanonicalizeRawInts(false))
|
||||
func (v *Value) Canonicalize(opts ...Options) error {
|
||||
return v.format([]Options{
|
||||
CanonicalizeRawInts(true),
|
||||
CanonicalizeRawFloats(true),
|
||||
ReorderRawObjects(true),
|
||||
}, opts)
|
||||
}
|
||||
|
||||
// MarshalJSON returns v as the JSON encoding of v.
|
||||
// It returns the stored value as the raw JSON output without any validation.
|
||||
// If v is nil, then this returns a JSON null.
|
||||
func (v Value) MarshalJSON() ([]byte, error) {
|
||||
// NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON.
|
||||
if v == nil {
|
||||
return []byte("null"), nil
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// UnmarshalJSON sets v as the JSON encoding of b.
|
||||
// It stores a copy of the provided raw JSON input without any validation.
|
||||
func (v *Value) UnmarshalJSON(b []byte) error {
|
||||
// NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON.
|
||||
if v == nil {
|
||||
return errors.New("jsontext.Value: UnmarshalJSON on nil pointer")
|
||||
}
|
||||
*v = append((*v)[:0], b...)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Kind returns the starting token kind.
|
||||
// For a valid value, this will never include '}' or ']'.
|
||||
func (v Value) Kind() Kind {
|
||||
if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 {
|
||||
return Kind(v[0]).normalize()
|
||||
}
|
||||
return invalidKind
|
||||
}
|
||||
|
||||
const commaAndWhitespace = ", \n\r\t"
|
||||
|
||||
type objectMember struct {
|
||||
// name is the unquoted name.
|
||||
name []byte // e.g., "name"
|
||||
// buffer is the entirety of the raw JSON object member
|
||||
// starting from right after the previous member (or opening '{')
|
||||
// until right after the member value.
|
||||
buffer []byte // e.g., `, \n\r\t"name": "value"`
|
||||
}
|
||||
|
||||
func (x objectMember) Compare(y objectMember) int {
|
||||
if c := jsonwire.CompareUTF16(x.name, y.name); c != 0 {
|
||||
return c
|
||||
}
|
||||
// With [AllowDuplicateNames] or [AllowInvalidUTF8],
|
||||
// names could be identical, so also sort using the member value.
|
||||
return jsonwire.CompareUTF16(
|
||||
bytes.TrimLeft(x.buffer, commaAndWhitespace),
|
||||
bytes.TrimLeft(y.buffer, commaAndWhitespace))
|
||||
}
|
||||
|
||||
var objectMemberPool = sync.Pool{New: func() any { return new([]objectMember) }}
|
||||
|
||||
func getObjectMembers() *[]objectMember {
|
||||
ns := objectMemberPool.Get().(*[]objectMember)
|
||||
*ns = (*ns)[:0]
|
||||
return ns
|
||||
}
|
||||
func putObjectMembers(ns *[]objectMember) {
|
||||
if cap(*ns) < 1<<10 {
|
||||
clear(*ns) // avoid pinning name and buffer
|
||||
objectMemberPool.Put(ns)
|
||||
}
|
||||
}
|
||||
|
||||
// mustReorderObjects reorders in-place all object members in a JSON value,
|
||||
// which must be valid otherwise it panics.
|
||||
func mustReorderObjects(b []byte) {
|
||||
// Obtain a buffered encoder just to use its internal buffer as
|
||||
// a scratch buffer for reordering object members.
|
||||
e2 := getBufferedEncoder()
|
||||
defer putBufferedEncoder(e2)
|
||||
|
||||
// Disable unnecessary checks to syntactically parse the JSON value.
|
||||
d := getBufferedDecoder(b)
|
||||
defer putBufferedDecoder(d)
|
||||
d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1)
|
||||
mustReorderObjectsFromDecoder(d, &e2.s.Buf) // per RFC 8785, section 3.2.3
|
||||
}
|
||||
|
||||
// mustReorderObjectsFromDecoder recursively reorders all object members in place
|
||||
// according to the ordering specified in RFC 8785, section 3.2.3.
|
||||
//
|
||||
// Pre-conditions:
|
||||
// - The value is valid (i.e., no decoder errors should ever occur).
|
||||
// - Initial call is provided a Decoder reading from the start of v.
|
||||
//
|
||||
// Post-conditions:
|
||||
// - Exactly one JSON value is read from the Decoder.
|
||||
// - All fully-parsed JSON objects are reordered by directly moving
|
||||
// the members in the value buffer.
|
||||
//
|
||||
// The runtime is approximately O(n·log(n)) + O(m·log(m)),
|
||||
// where n is len(v) and m is the total number of object members.
|
||||
func mustReorderObjectsFromDecoder(d *Decoder, scratch *[]byte) {
|
||||
switch tok, err := d.ReadToken(); tok.Kind() {
|
||||
case '{':
|
||||
// Iterate and collect the name and offsets for every object member.
|
||||
members := getObjectMembers()
|
||||
defer putObjectMembers(members)
|
||||
var prevMember objectMember
|
||||
isSorted := true
|
||||
|
||||
beforeBody := d.InputOffset() // offset after '{'
|
||||
for d.PeekKind() != '}' {
|
||||
beforeName := d.InputOffset()
|
||||
var flags jsonwire.ValueFlags
|
||||
name, _ := d.s.ReadValue(&flags)
|
||||
name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim())
|
||||
mustReorderObjectsFromDecoder(d, scratch)
|
||||
afterValue := d.InputOffset()
|
||||
|
||||
currMember := objectMember{name, d.s.buf[beforeName:afterValue]}
|
||||
if isSorted && len(*members) > 0 {
|
||||
isSorted = objectMember.Compare(prevMember, currMember) < 0
|
||||
}
|
||||
*members = append(*members, currMember)
|
||||
prevMember = currMember
|
||||
}
|
||||
afterBody := d.InputOffset() // offset before '}'
|
||||
d.ReadToken()
|
||||
|
||||
// Sort the members; return early if it's already sorted.
|
||||
if isSorted {
|
||||
return
|
||||
}
|
||||
firstBufferBeforeSorting := (*members)[0].buffer
|
||||
slices.SortFunc(*members, objectMember.Compare)
|
||||
firstBufferAfterSorting := (*members)[0].buffer
|
||||
|
||||
// Append the reordered members to a new buffer,
|
||||
// then copy the reordered members back over the original members.
|
||||
// Avoid swapping in place since each member may be a different size
|
||||
// where moving a member over a smaller member may corrupt the data
|
||||
// for subsequent members before they have been moved.
|
||||
//
|
||||
// The following invariant must hold:
|
||||
// sum([m.after-m.before for m in members]) == afterBody-beforeBody
|
||||
commaAndWhitespacePrefix := func(b []byte) []byte {
|
||||
return b[:len(b)-len(bytes.TrimLeft(b, commaAndWhitespace))]
|
||||
}
|
||||
sorted := (*scratch)[:0]
|
||||
for i, member := range *members {
|
||||
switch {
|
||||
case i == 0 && &member.buffer[0] != &firstBufferBeforeSorting[0]:
|
||||
// First member after sorting is not the first member before sorting,
|
||||
// so use the prefix of the first member before sorting.
|
||||
sorted = append(sorted, commaAndWhitespacePrefix(firstBufferBeforeSorting)...)
|
||||
sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
|
||||
case i != 0 && &member.buffer[0] == &firstBufferBeforeSorting[0]:
|
||||
// Later member after sorting is the first member before sorting,
|
||||
// so use the prefix of the first member after sorting.
|
||||
sorted = append(sorted, commaAndWhitespacePrefix(firstBufferAfterSorting)...)
|
||||
sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
|
||||
default:
|
||||
sorted = append(sorted, member.buffer...)
|
||||
}
|
||||
}
|
||||
if int(afterBody-beforeBody) != len(sorted) {
|
||||
panic("BUG: length invariant violated")
|
||||
}
|
||||
copy(d.s.buf[beforeBody:afterBody], sorted)
|
||||
|
||||
// Update scratch buffer to the largest amount ever used.
|
||||
if len(sorted) > len(*scratch) {
|
||||
*scratch = sorted
|
||||
}
|
||||
case '[':
|
||||
for d.PeekKind() != ']' {
|
||||
mustReorderObjectsFromDecoder(d, scratch)
|
||||
}
|
||||
d.ReadToken()
|
||||
default:
|
||||
if err != nil {
|
||||
panic("BUG: " + err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
200
pkg/encoders/json/jsontext/value_test.go
Normal file
200
pkg/encoders/json/jsontext/value_test.go
Normal file
@@ -0,0 +1,200 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"encoding/json/internal/jsontest"
|
||||
"encoding/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
type valueTestdataEntry struct {
|
||||
name jsontest.CaseName
|
||||
in string
|
||||
wantValid bool
|
||||
wantCompacted string
|
||||
wantCompactErr error // implies wantCompacted is in
|
||||
wantIndented string // wantCompacted if empty; uses "\t" for indent prefix and " " for indent
|
||||
wantIndentErr error // implies wantCompacted is in
|
||||
wantCanonicalized string // wantCompacted if empty
|
||||
wantCanonicalizeErr error // implies wantCompacted is in
|
||||
}
|
||||
|
||||
var valueTestdata = append(func() (out []valueTestdataEntry) {
|
||||
// Initialize valueTestdata from coderTestdata.
|
||||
for _, td := range coderTestdata {
|
||||
// NOTE: The Compact method preserves the raw formatting of strings,
|
||||
// while the Encoder (by default) does not.
|
||||
if td.name.Name == "ComplicatedString" {
|
||||
td.outCompacted = strings.TrimSpace(td.in)
|
||||
}
|
||||
out = append(out, valueTestdataEntry{
|
||||
name: td.name,
|
||||
in: td.in,
|
||||
wantValid: true,
|
||||
wantCompacted: td.outCompacted,
|
||||
wantIndented: td.outIndented,
|
||||
wantCanonicalized: td.outCanonicalized,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}(), []valueTestdataEntry{{
|
||||
name: jsontest.Name("RFC8785/Primitives"),
|
||||
in: `{
|
||||
"numbers": [333333333.33333329, 1E30, 4.50,
|
||||
2e-3, 0.000000000000000000000000001, -0],
|
||||
"string": "\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/",
|
||||
"literals": [null, true, false]
|
||||
}`,
|
||||
wantValid: true,
|
||||
wantCompacted: `{"numbers":[333333333.33333329,1E30,4.50,2e-3,0.000000000000000000000000001,-0],"string":"\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/","literals":[null,true,false]}`,
|
||||
wantIndented: `{
|
||||
"numbers": [
|
||||
333333333.33333329,
|
||||
1E30,
|
||||
4.50,
|
||||
2e-3,
|
||||
0.000000000000000000000000001,
|
||||
-0
|
||||
],
|
||||
"string": "\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/",
|
||||
"literals": [
|
||||
null,
|
||||
true,
|
||||
false
|
||||
]
|
||||
}`,
|
||||
wantCanonicalized: `{"literals":[null,true,false],"numbers":[333333333.3333333,1e+30,4.5,0.002,1e-27,0],"string":"€$\u000f\nA'B\"\\\\\"/"}`,
|
||||
}, {
|
||||
name: jsontest.Name("RFC8785/ObjectOrdering"),
|
||||
in: `{
|
||||
"\u20ac": "Euro Sign",
|
||||
"\r": "Carriage Return",
|
||||
"\ufb33": "Hebrew Letter Dalet With Dagesh",
|
||||
"1": "One",
|
||||
"\ud83d\ude00": "Emoji: Grinning Face",
|
||||
"\u0080": "Control",
|
||||
"\u00f6": "Latin Small Letter O With Diaeresis"
|
||||
}`,
|
||||
wantValid: true,
|
||||
wantCompacted: `{"\u20ac":"Euro Sign","\r":"Carriage Return","\ufb33":"Hebrew Letter Dalet With Dagesh","1":"One","\ud83d\ude00":"Emoji: Grinning Face","\u0080":"Control","\u00f6":"Latin Small Letter O With Diaeresis"}`,
|
||||
wantIndented: `{
|
||||
"\u20ac": "Euro Sign",
|
||||
"\r": "Carriage Return",
|
||||
"\ufb33": "Hebrew Letter Dalet With Dagesh",
|
||||
"1": "One",
|
||||
"\ud83d\ude00": "Emoji: Grinning Face",
|
||||
"\u0080": "Control",
|
||||
"\u00f6": "Latin Small Letter O With Diaeresis"
|
||||
}`,
|
||||
wantCanonicalized: `{"\r":"Carriage Return","1":"One","":"Control","ö":"Latin Small Letter O With Diaeresis","€":"Euro Sign","😀":"Emoji: Grinning Face","דּ":"Hebrew Letter Dalet With Dagesh"}`,
|
||||
}, {
|
||||
name: jsontest.Name("LargeIntegers"),
|
||||
in: ` [ -9223372036854775808 , 9223372036854775807 ] `,
|
||||
wantValid: true,
|
||||
wantCompacted: `[-9223372036854775808,9223372036854775807]`,
|
||||
wantIndented: `[
|
||||
-9223372036854775808,
|
||||
9223372036854775807
|
||||
]`,
|
||||
wantCanonicalized: `[-9223372036854776000,9223372036854776000]`, // NOTE: Loss of precision due to numbers being treated as floats.
|
||||
}, {
|
||||
name: jsontest.Name("InvalidUTF8"),
|
||||
in: ` "living` + "\xde\xad\xbe\xef" + `\ufffd<66>" `,
|
||||
wantValid: false, // uses RFC 7493 as the definition; which validates UTF-8
|
||||
wantCompacted: `"living` + "\xde\xad\xbe\xef" + `\ufffd<66>"`,
|
||||
wantCanonicalizeErr: E(jsonwire.ErrInvalidUTF8).withPos(` "living`+"\xde\xad", ""),
|
||||
}, {
|
||||
name: jsontest.Name("InvalidUTF8/SurrogateHalf"),
|
||||
in: `"\ud800"`,
|
||||
wantValid: false, // uses RFC 7493 as the definition; which validates UTF-8
|
||||
wantCompacted: `"\ud800"`,
|
||||
wantCanonicalizeErr: newInvalidEscapeSequenceError(`\ud800"`).withPos(`"`, ""),
|
||||
}, {
|
||||
name: jsontest.Name("UppercaseEscaped"),
|
||||
in: `"\u000B"`,
|
||||
wantValid: true,
|
||||
wantCompacted: `"\u000B"`,
|
||||
wantCanonicalized: `"\u000b"`,
|
||||
}, {
|
||||
name: jsontest.Name("DuplicateNames"),
|
||||
in: ` { "0" : 0 , "1" : 1 , "0" : 0 }`,
|
||||
wantValid: false, // uses RFC 7493 as the definition; which does check for object uniqueness
|
||||
wantCompacted: `{"0":0,"1":1,"0":0}`,
|
||||
wantIndented: `{
|
||||
"0": 0,
|
||||
"1": 1,
|
||||
"0": 0
|
||||
}`,
|
||||
wantCanonicalizeErr: E(ErrDuplicateName).withPos(` { "0" : 0 , "1" : 1 , `, "/0"),
|
||||
}, {
|
||||
name: jsontest.Name("Whitespace"),
|
||||
in: " \n\r\t",
|
||||
wantValid: false,
|
||||
wantCompacted: " \n\r\t",
|
||||
wantCompactErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""),
|
||||
wantIndentErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""),
|
||||
wantCanonicalizeErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""),
|
||||
}}...)
|
||||
|
||||
func TestValueMethods(t *testing.T) {
|
||||
for _, td := range valueTestdata {
|
||||
t.Run(td.name.Name, func(t *testing.T) {
|
||||
if td.wantIndented == "" {
|
||||
td.wantIndented = td.wantCompacted
|
||||
}
|
||||
if td.wantCanonicalized == "" {
|
||||
td.wantCanonicalized = td.wantCompacted
|
||||
}
|
||||
if td.wantCompactErr != nil {
|
||||
td.wantCompacted = td.in
|
||||
}
|
||||
if td.wantIndentErr != nil {
|
||||
td.wantIndented = td.in
|
||||
}
|
||||
if td.wantCanonicalizeErr != nil {
|
||||
td.wantCanonicalized = td.in
|
||||
}
|
||||
|
||||
v := Value(td.in)
|
||||
gotValid := v.IsValid()
|
||||
if gotValid != td.wantValid {
|
||||
t.Errorf("%s: Value.IsValid = %v, want %v", td.name.Where, gotValid, td.wantValid)
|
||||
}
|
||||
|
||||
gotCompacted := Value(td.in)
|
||||
gotCompactErr := gotCompacted.Compact()
|
||||
if string(gotCompacted) != td.wantCompacted {
|
||||
t.Errorf("%s: Value.Compact = %s, want %s", td.name.Where, gotCompacted, td.wantCompacted)
|
||||
}
|
||||
if !equalError(gotCompactErr, td.wantCompactErr) {
|
||||
t.Errorf("%s: Value.Compact error mismatch:\ngot %v\nwant %v", td.name.Where, gotCompactErr, td.wantCompactErr)
|
||||
}
|
||||
|
||||
gotIndented := Value(td.in)
|
||||
gotIndentErr := gotIndented.Indent(WithIndentPrefix("\t"), WithIndent(" "))
|
||||
if string(gotIndented) != td.wantIndented {
|
||||
t.Errorf("%s: Value.Indent = %s, want %s", td.name.Where, gotIndented, td.wantIndented)
|
||||
}
|
||||
if !equalError(gotIndentErr, td.wantIndentErr) {
|
||||
t.Errorf("%s: Value.Indent error mismatch:\ngot %v\nwant %v", td.name.Where, gotIndentErr, td.wantIndentErr)
|
||||
}
|
||||
|
||||
gotCanonicalized := Value(td.in)
|
||||
gotCanonicalizeErr := gotCanonicalized.Canonicalize()
|
||||
if string(gotCanonicalized) != td.wantCanonicalized {
|
||||
t.Errorf("%s: Value.Canonicalize = %s, want %s", td.name.Where, gotCanonicalized, td.wantCanonicalized)
|
||||
}
|
||||
if !equalError(gotCanonicalizeErr, td.wantCanonicalizeErr) {
|
||||
t.Errorf("%s: Value.Canonicalize error mismatch:\ngot %v\nwant %v", td.name.Where, gotCanonicalizeErr, td.wantCanonicalizeErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user