diff --git a/cmd/eventpool/eventpool.go b/cmd/eventpool/eventpool.go
index bfd7079..9e3d4d4 100644
--- a/cmd/eventpool/eventpool.go
+++ b/cmd/eventpool/eventpool.go
@@ -9,6 +9,7 @@ import (
"lukechampine.com/frand"
"next.orly.dev/pkg/encoders/event"
"next.orly.dev/pkg/encoders/hex"
+ "next.orly.dev/pkg/encoders/json"
"next.orly.dev/pkg/encoders/tag"
"next.orly.dev/pkg/utils"
"next.orly.dev/pkg/utils/bufpool"
@@ -36,7 +37,7 @@ func main() {
ev.Content = frand.Bytes(frand.Intn(1024) + 1)
ev.Sig = frand.Bytes(64)
// log.I.S(ev)
- b, err := ev.MarshalJSON()
+ b, err := json.Marshal(ev)
if chk.E(err) {
return
}
@@ -44,11 +45,11 @@ func main() {
bc = append(bc, b...)
// log.I.F("%s", bc)
ev2 := event.New()
- if err = ev2.UnmarshalJSON(b); chk.E(err) {
+ if err = json.Unmarshal(b, ev2); chk.E(err) {
return
}
var b2 []byte
- if b2, err = ev.MarshalJSON(); err != nil {
+ if b2, err = json.Marshal(ev); err != nil {
return
}
if !utils.FastEqual(bc, b2) {
diff --git a/pkg/encoders/event/event.go b/pkg/encoders/event/event.go
index 1b1e6a1..cdb154c 100644
--- a/pkg/encoders/event/event.go
+++ b/pkg/encoders/event/event.go
@@ -27,6 +27,9 @@ import (
// encode <, >, and & characters due to legacy bullcrap in the encoding/json
// library. Either call MarshalJSON directly or use a json.Encoder with html
// escaping disabled.
+//
+// Or import "next.orly.dev/pkg/encoders/json" and use json.Marshal which is the
+// same as go 1.25 json v1 except with this one stupidity removed.
type E struct {
// ID is the SHA256 hash of the canonical encoding of the event in binary format
diff --git a/pkg/encoders/event/event_test.go b/pkg/encoders/event/event_test.go
index 3ad2503..8b41994 100644
--- a/pkg/encoders/event/event_test.go
+++ b/pkg/encoders/event/event_test.go
@@ -3,7 +3,6 @@ package event
import (
"bufio"
"bytes"
- "encoding/json"
"testing"
"time"
@@ -12,6 +11,7 @@ import (
"lukechampine.com/frand"
"next.orly.dev/pkg/encoders/event/examples"
"next.orly.dev/pkg/encoders/hex"
+ "next.orly.dev/pkg/encoders/json"
"next.orly.dev/pkg/encoders/tag"
"next.orly.dev/pkg/utils"
"next.orly.dev/pkg/utils/bufpool"
@@ -39,11 +39,9 @@ func TestMarshalJSONUnmarshalJSON(t *testing.T) {
with line breaks and tabs and other stuff
`)
ev.Sig = frand.Bytes(64)
- // log.I.S(ev)
- // b, err := ev.MarshalJSON()
var err error
var b []byte
- if b, err = ev.MarshalJSON(); chk.E(err) {
+ if b, err = json.Marshal(ev); chk.E(err) {
t.Fatal(err)
}
var bc []byte
@@ -53,7 +51,7 @@ func TestMarshalJSONUnmarshalJSON(t *testing.T) {
t.Fatal(err)
}
var b2 []byte
- if b2, err = ev2.MarshalJSON(); err != nil {
+ if b2, err = json.Marshal(ev2); err != nil {
t.Fatal(err)
}
if !utils.FastEqual(bc, b2) {
@@ -82,8 +80,8 @@ func TestExamplesCache(t *testing.T) {
t.Fatal(err)
}
var b2 []byte
- // can't use json.Marshal as it improperly escapes <, > and &.
- if b2, err = ev.MarshalJSON(); err != nil {
+ // can't use encoding/json.Marshal as it improperly escapes <, > and &.
+ if b2, err = json.Marshal(ev); err != nil {
t.Fatal(err)
}
if !utils.FastEqual(c, b2) {
diff --git a/pkg/encoders/json/bench_test.go b/pkg/encoders/json/bench_test.go
new file mode 100644
index 0000000..0471881
--- /dev/null
+++ b/pkg/encoders/json/bench_test.go
@@ -0,0 +1,583 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Large data benchmark.
+// The JSON data is a summary of agl's changes in the
+// go, webkit, and chromium open source projects.
+// We benchmark converting between the JSON form
+// and in-memory data structures.
+
+//go:build !goexperiment.jsonv2
+
+package json
+
+import (
+ "bytes"
+ "fmt"
+ "internal/testenv"
+ "internal/zstd"
+ "io"
+ "os"
+ "reflect"
+ "regexp"
+ "runtime"
+ "strings"
+ "sync"
+ "testing"
+)
+
+type codeResponse struct {
+ Tree *codeNode `json:"tree"`
+ Username string `json:"username"`
+}
+
+type codeNode struct {
+ Name string `json:"name"`
+ Kids []*codeNode `json:"kids"`
+ CLWeight float64 `json:"cl_weight"`
+ Touches int `json:"touches"`
+ MinT int64 `json:"min_t"`
+ MaxT int64 `json:"max_t"`
+ MeanT int64 `json:"mean_t"`
+}
+
+var codeJSON []byte
+var codeStruct codeResponse
+
+func codeInit() {
+ f, err := os.Open("internal/jsontest/testdata/golang_source.json.zst")
+ if err != nil {
+ panic(err)
+ }
+ defer f.Close()
+ gz := zstd.NewReader(f)
+ data, err := io.ReadAll(gz)
+ if err != nil {
+ panic(err)
+ }
+
+ codeJSON = data
+
+ if err := Unmarshal(codeJSON, &codeStruct); err != nil {
+ panic("unmarshal code.json: " + err.Error())
+ }
+
+ if data, err = Marshal(&codeStruct); err != nil {
+ panic("marshal code.json: " + err.Error())
+ }
+
+ if !bytes.Equal(data, codeJSON) {
+ println("different lengths", len(data), len(codeJSON))
+ for i := 0; i < len(data) && i < len(codeJSON); i++ {
+ if data[i] != codeJSON[i] {
+ println("re-marshal: changed at byte", i)
+ println("orig: ", string(codeJSON[i-10:i+10]))
+ println("new: ", string(data[i-10:i+10]))
+ break
+ }
+ }
+ panic("re-marshal code.json: different result")
+ }
+}
+
+func BenchmarkCodeEncoder(b *testing.B) {
+ b.ReportAllocs()
+ if codeJSON == nil {
+ b.StopTimer()
+ codeInit()
+ b.StartTimer()
+ }
+ b.RunParallel(func(pb *testing.PB) {
+ enc := NewEncoder(io.Discard)
+ for pb.Next() {
+ if err := enc.Encode(&codeStruct); err != nil {
+ b.Fatalf("Encode error: %v", err)
+ }
+ }
+ })
+ b.SetBytes(int64(len(codeJSON)))
+}
+
+func BenchmarkCodeEncoderError(b *testing.B) {
+ b.ReportAllocs()
+ if codeJSON == nil {
+ b.StopTimer()
+ codeInit()
+ b.StartTimer()
+ }
+
+ // Trigger an error in Marshal with cyclic data.
+ type Dummy struct {
+ Name string
+ Next *Dummy
+ }
+ dummy := Dummy{Name: "Dummy"}
+ dummy.Next = &dummy
+
+ b.RunParallel(func(pb *testing.PB) {
+ enc := NewEncoder(io.Discard)
+ for pb.Next() {
+ if err := enc.Encode(&codeStruct); err != nil {
+ b.Fatalf("Encode error: %v", err)
+ }
+ if _, err := Marshal(dummy); err == nil {
+ b.Fatal("Marshal error: got nil, want non-nil")
+ }
+ }
+ })
+ b.SetBytes(int64(len(codeJSON)))
+}
+
+func BenchmarkCodeMarshal(b *testing.B) {
+ b.ReportAllocs()
+ if codeJSON == nil {
+ b.StopTimer()
+ codeInit()
+ b.StartTimer()
+ }
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ if _, err := Marshal(&codeStruct); err != nil {
+ b.Fatalf("Marshal error: %v", err)
+ }
+ }
+ })
+ b.SetBytes(int64(len(codeJSON)))
+}
+
+func BenchmarkCodeMarshalError(b *testing.B) {
+ b.ReportAllocs()
+ if codeJSON == nil {
+ b.StopTimer()
+ codeInit()
+ b.StartTimer()
+ }
+
+ // Trigger an error in Marshal with cyclic data.
+ type Dummy struct {
+ Name string
+ Next *Dummy
+ }
+ dummy := Dummy{Name: "Dummy"}
+ dummy.Next = &dummy
+
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ if _, err := Marshal(&codeStruct); err != nil {
+ b.Fatalf("Marshal error: %v", err)
+ }
+ if _, err := Marshal(dummy); err == nil {
+ b.Fatal("Marshal error: got nil, want non-nil")
+ }
+ }
+ })
+ b.SetBytes(int64(len(codeJSON)))
+}
+
+func benchMarshalBytes(n int) func(*testing.B) {
+ sample := []byte("hello world")
+ // Use a struct pointer, to avoid an allocation when passing it as an
+ // interface parameter to Marshal.
+ v := &struct {
+ Bytes []byte
+ }{
+ bytes.Repeat(sample, (n/len(sample))+1)[:n],
+ }
+ return func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ if _, err := Marshal(v); err != nil {
+ b.Fatalf("Marshal error: %v", err)
+ }
+ }
+ }
+}
+
+func benchMarshalBytesError(n int) func(*testing.B) {
+ sample := []byte("hello world")
+ // Use a struct pointer, to avoid an allocation when passing it as an
+ // interface parameter to Marshal.
+ v := &struct {
+ Bytes []byte
+ }{
+ bytes.Repeat(sample, (n/len(sample))+1)[:n],
+ }
+
+ // Trigger an error in Marshal with cyclic data.
+ type Dummy struct {
+ Name string
+ Next *Dummy
+ }
+ dummy := Dummy{Name: "Dummy"}
+ dummy.Next = &dummy
+
+ return func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ if _, err := Marshal(v); err != nil {
+ b.Fatalf("Marshal error: %v", err)
+ }
+ if _, err := Marshal(dummy); err == nil {
+ b.Fatal("Marshal error: got nil, want non-nil")
+ }
+ }
+ }
+}
+
+func BenchmarkMarshalBytes(b *testing.B) {
+ b.ReportAllocs()
+ // 32 fits within encodeState.scratch.
+ b.Run("32", benchMarshalBytes(32))
+ // 256 doesn't fit in encodeState.scratch, but is small enough to
+ // allocate and avoid the slower base64.NewEncoder.
+ b.Run("256", benchMarshalBytes(256))
+ // 4096 is large enough that we want to avoid allocating for it.
+ b.Run("4096", benchMarshalBytes(4096))
+}
+
+func BenchmarkMarshalBytesError(b *testing.B) {
+ b.ReportAllocs()
+ // 32 fits within encodeState.scratch.
+ b.Run("32", benchMarshalBytesError(32))
+ // 256 doesn't fit in encodeState.scratch, but is small enough to
+ // allocate and avoid the slower base64.NewEncoder.
+ b.Run("256", benchMarshalBytesError(256))
+ // 4096 is large enough that we want to avoid allocating for it.
+ b.Run("4096", benchMarshalBytesError(4096))
+}
+
+func BenchmarkMarshalMap(b *testing.B) {
+ b.ReportAllocs()
+ m := map[string]int{
+ "key3": 3,
+ "key2": 2,
+ "key1": 1,
+ }
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ if _, err := Marshal(m); err != nil {
+ b.Fatal("Marshal:", err)
+ }
+ }
+ })
+}
+
+func BenchmarkCodeDecoder(b *testing.B) {
+ b.ReportAllocs()
+ if codeJSON == nil {
+ b.StopTimer()
+ codeInit()
+ b.StartTimer()
+ }
+ b.RunParallel(func(pb *testing.PB) {
+ var buf bytes.Buffer
+ dec := NewDecoder(&buf)
+ var r codeResponse
+ for pb.Next() {
+ buf.Write(codeJSON)
+ // hide EOF
+ buf.WriteByte('\n')
+ buf.WriteByte('\n')
+ buf.WriteByte('\n')
+ if err := dec.Decode(&r); err != nil {
+ b.Fatalf("Decode error: %v", err)
+ }
+ }
+ })
+ b.SetBytes(int64(len(codeJSON)))
+}
+
+func BenchmarkUnicodeDecoder(b *testing.B) {
+ b.ReportAllocs()
+ j := []byte(`"\uD83D\uDE01"`)
+ b.SetBytes(int64(len(j)))
+ r := bytes.NewReader(j)
+ dec := NewDecoder(r)
+ var out string
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ if err := dec.Decode(&out); err != nil {
+ b.Fatalf("Decode error: %v", err)
+ }
+ r.Seek(0, 0)
+ }
+}
+
+func BenchmarkDecoderStream(b *testing.B) {
+ b.ReportAllocs()
+ b.StopTimer()
+ var buf bytes.Buffer
+ dec := NewDecoder(&buf)
+ buf.WriteString(`"` + strings.Repeat("x", 1000000) + `"` + "\n\n\n")
+ var x any
+ if err := dec.Decode(&x); err != nil {
+ b.Fatalf("Decode error: %v", err)
+ }
+ ones := strings.Repeat(" 1\n", 300000) + "\n\n\n"
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ if i%300000 == 0 {
+ buf.WriteString(ones)
+ }
+ x = nil
+ switch err := dec.Decode(&x); {
+ case err != nil:
+ b.Fatalf("Decode error: %v", err)
+ case x != 1.0:
+ b.Fatalf("Decode: got %v want 1.0", i)
+ }
+ }
+}
+
+func BenchmarkCodeUnmarshal(b *testing.B) {
+ b.ReportAllocs()
+ if codeJSON == nil {
+ b.StopTimer()
+ codeInit()
+ b.StartTimer()
+ }
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ var r codeResponse
+ if err := Unmarshal(codeJSON, &r); err != nil {
+ b.Fatalf("Unmarshal error: %v", err)
+ }
+ }
+ })
+ b.SetBytes(int64(len(codeJSON)))
+}
+
+func BenchmarkCodeUnmarshalReuse(b *testing.B) {
+ b.ReportAllocs()
+ if codeJSON == nil {
+ b.StopTimer()
+ codeInit()
+ b.StartTimer()
+ }
+ b.RunParallel(func(pb *testing.PB) {
+ var r codeResponse
+ for pb.Next() {
+ if err := Unmarshal(codeJSON, &r); err != nil {
+ b.Fatalf("Unmarshal error: %v", err)
+ }
+ }
+ })
+ b.SetBytes(int64(len(codeJSON)))
+}
+
+func BenchmarkUnmarshalString(b *testing.B) {
+ b.ReportAllocs()
+ data := []byte(`"hello, world"`)
+ b.RunParallel(func(pb *testing.PB) {
+ var s string
+ for pb.Next() {
+ if err := Unmarshal(data, &s); err != nil {
+ b.Fatalf("Unmarshal error: %v", err)
+ }
+ }
+ })
+}
+
+func BenchmarkUnmarshalFloat64(b *testing.B) {
+ b.ReportAllocs()
+ data := []byte(`3.14`)
+ b.RunParallel(func(pb *testing.PB) {
+ var f float64
+ for pb.Next() {
+ if err := Unmarshal(data, &f); err != nil {
+ b.Fatalf("Unmarshal error: %v", err)
+ }
+ }
+ })
+}
+
+func BenchmarkUnmarshalInt64(b *testing.B) {
+ b.ReportAllocs()
+ data := []byte(`3`)
+ b.RunParallel(func(pb *testing.PB) {
+ var x int64
+ for pb.Next() {
+ if err := Unmarshal(data, &x); err != nil {
+ b.Fatalf("Unmarshal error: %v", err)
+ }
+ }
+ })
+}
+
+func BenchmarkUnmarshalMap(b *testing.B) {
+ b.ReportAllocs()
+ data := []byte(`{"key1":"value1","key2":"value2","key3":"value3"}`)
+ b.RunParallel(func(pb *testing.PB) {
+ x := make(map[string]string, 3)
+ for pb.Next() {
+ if err := Unmarshal(data, &x); err != nil {
+ b.Fatalf("Unmarshal error: %v", err)
+ }
+ }
+ })
+}
+
+func BenchmarkIssue10335(b *testing.B) {
+ b.ReportAllocs()
+ j := []byte(`{"a":{ }}`)
+ b.RunParallel(func(pb *testing.PB) {
+ var s struct{}
+ for pb.Next() {
+ if err := Unmarshal(j, &s); err != nil {
+ b.Fatalf("Unmarshal error: %v", err)
+ }
+ }
+ })
+}
+
+func BenchmarkIssue34127(b *testing.B) {
+ b.ReportAllocs()
+ j := struct {
+ Bar string `json:"bar,string"`
+ }{
+ Bar: `foobar`,
+ }
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ if _, err := Marshal(&j); err != nil {
+ b.Fatalf("Marshal error: %v", err)
+ }
+ }
+ })
+}
+
+func BenchmarkUnmapped(b *testing.B) {
+ b.ReportAllocs()
+ j := []byte(`{"s": "hello", "y": 2, "o": {"x": 0}, "a": [1, 99, {"x": 1}]}`)
+ b.RunParallel(func(pb *testing.PB) {
+ var s struct{}
+ for pb.Next() {
+ if err := Unmarshal(j, &s); err != nil {
+ b.Fatalf("Unmarshal error: %v", err)
+ }
+ }
+ })
+}
+
+func BenchmarkTypeFieldsCache(b *testing.B) {
+ b.ReportAllocs()
+ var maxTypes int = 1e6
+ if testenv.Builder() != "" {
+ maxTypes = 1e3 // restrict cache sizes on builders
+ }
+
+ // Dynamically generate many new types.
+ types := make([]reflect.Type, maxTypes)
+ fs := []reflect.StructField{{
+ Type: reflect.TypeFor[string](),
+ Index: []int{0},
+ }}
+ for i := range types {
+ fs[0].Name = fmt.Sprintf("TypeFieldsCache%d", i)
+ types[i] = reflect.StructOf(fs)
+ }
+
+ // clearClear clears the cache. Other JSON operations, must not be running.
+ clearCache := func() {
+ fieldCache = sync.Map{}
+ }
+
+ // MissTypes tests the performance of repeated cache misses.
+ // This measures the time to rebuild a cache of size nt.
+ for nt := 1; nt <= maxTypes; nt *= 10 {
+ ts := types[:nt]
+ b.Run(fmt.Sprintf("MissTypes%d", nt), func(b *testing.B) {
+ nc := runtime.GOMAXPROCS(0)
+ for i := 0; i < b.N; i++ {
+ clearCache()
+ var wg sync.WaitGroup
+ for j := 0; j < nc; j++ {
+ wg.Add(1)
+ go func(j int) {
+ for _, t := range ts[(j*len(ts))/nc : ((j+1)*len(ts))/nc] {
+ cachedTypeFields(t)
+ }
+ wg.Done()
+ }(j)
+ }
+ wg.Wait()
+ }
+ })
+ }
+
+ // HitTypes tests the performance of repeated cache hits.
+ // This measures the average time of each cache lookup.
+ for nt := 1; nt <= maxTypes; nt *= 10 {
+ // Pre-warm a cache of size nt.
+ clearCache()
+ for _, t := range types[:nt] {
+ cachedTypeFields(t)
+ }
+ b.Run(fmt.Sprintf("HitTypes%d", nt), func(b *testing.B) {
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ cachedTypeFields(types[0])
+ }
+ })
+ })
+ }
+}
+
+func BenchmarkEncodeMarshaler(b *testing.B) {
+ b.ReportAllocs()
+
+ m := struct {
+ A int
+ B RawMessage
+ }{}
+
+ b.RunParallel(func(pb *testing.PB) {
+ enc := NewEncoder(io.Discard)
+
+ for pb.Next() {
+ if err := enc.Encode(&m); err != nil {
+ b.Fatalf("Encode error: %v", err)
+ }
+ }
+ })
+}
+
+func BenchmarkEncoderEncode(b *testing.B) {
+ b.ReportAllocs()
+ type T struct {
+ X, Y string
+ }
+ v := &T{"foo", "bar"}
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ if err := NewEncoder(io.Discard).Encode(v); err != nil {
+ b.Fatalf("Encode error: %v", err)
+ }
+ }
+ })
+}
+
+func BenchmarkNumberIsValid(b *testing.B) {
+ s := "-61657.61667E+61673"
+ for i := 0; i < b.N; i++ {
+ isValidNumber(s)
+ }
+}
+
+func BenchmarkNumberIsValidRegexp(b *testing.B) {
+ var jsonNumberRegexp = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`)
+ s := "-61657.61667E+61673"
+ for i := 0; i < b.N; i++ {
+ jsonNumberRegexp.MatchString(s)
+ }
+}
+
+func BenchmarkUnmarshalNumber(b *testing.B) {
+ b.ReportAllocs()
+ data := []byte(`"-61657.61667E+61673"`)
+ var number Number
+ for i := 0; i < b.N; i++ {
+ if err := Unmarshal(data, &number); err != nil {
+ b.Fatal("Unmarshal:", err)
+ }
+ }
+}
diff --git a/pkg/encoders/json/decode.go b/pkg/encoders/json/decode.go
new file mode 100644
index 0000000..70885a5
--- /dev/null
+++ b/pkg/encoders/json/decode.go
@@ -0,0 +1,1314 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Represents JSON data structure using native Go types: booleans, floats,
+// strings, arrays, and maps.
+
+//go:build !goexperiment.jsonv2
+
+package json
+
+import (
+ "encoding"
+ "encoding/base64"
+ "fmt"
+ "reflect"
+ "strconv"
+ "strings"
+ "unicode"
+ "unicode/utf16"
+ "unicode/utf8"
+ _ "unsafe" // for linkname
+)
+
+// Unmarshal parses the JSON-encoded data and stores the result
+// in the value pointed to by v. If v is nil or not a pointer,
+// Unmarshal returns an [InvalidUnmarshalError].
+//
+// Unmarshal uses the inverse of the encodings that
+// [Marshal] uses, allocating maps, slices, and pointers as necessary,
+// with the following additional rules:
+//
+// To unmarshal JSON into a pointer, Unmarshal first handles the case of
+// the JSON being the JSON literal null. In that case, Unmarshal sets
+// the pointer to nil. Otherwise, Unmarshal unmarshals the JSON into
+// the value pointed at by the pointer. If the pointer is nil, Unmarshal
+// allocates a new value for it to point to.
+//
+// To unmarshal JSON into a value implementing [Unmarshaler],
+// Unmarshal calls that value's [Unmarshaler.UnmarshalJSON] method, including
+// when the input is a JSON null.
+// Otherwise, if the value implements [encoding.TextUnmarshaler]
+// and the input is a JSON quoted string, Unmarshal calls
+// [encoding.TextUnmarshaler.UnmarshalText] with the unquoted form of the string.
+//
+// To unmarshal JSON into a struct, Unmarshal matches incoming object keys to
+// the keys used by [Marshal] (either the struct field name or its tag),
+// ignoring case. If multiple struct fields match an object key, an exact case
+// match is preferred over a case-insensitive one.
+//
+// Incoming object members are processed in the order observed. If an object
+// includes duplicate keys, later duplicates will replace or be merged into
+// prior values.
+//
+// To unmarshal JSON into an interface value,
+// Unmarshal stores one of these in the interface value:
+//
+// - bool, for JSON booleans
+// - float64, for JSON numbers
+// - string, for JSON strings
+// - []any, for JSON arrays
+// - map[string]any, for JSON objects
+// - nil for JSON null
+//
+// To unmarshal a JSON array into a slice, Unmarshal resets the slice length
+// to zero and then appends each element to the slice.
+// As a special case, to unmarshal an empty JSON array into a slice,
+// Unmarshal replaces the slice with a new empty slice.
+//
+// To unmarshal a JSON array into a Go array, Unmarshal decodes
+// JSON array elements into corresponding Go array elements.
+// If the Go array is smaller than the JSON array,
+// the additional JSON array elements are discarded.
+// If the JSON array is smaller than the Go array,
+// the additional Go array elements are set to zero values.
+//
+// To unmarshal a JSON object into a map, Unmarshal first establishes a map to
+// use. If the map is nil, Unmarshal allocates a new map. Otherwise Unmarshal
+// reuses the existing map, keeping existing entries. Unmarshal then stores
+// key-value pairs from the JSON object into the map. The map's key type must
+// either be any string type, an integer, or implement [encoding.TextUnmarshaler].
+//
+// If the JSON-encoded data contain a syntax error, Unmarshal returns a [SyntaxError].
+//
+// If a JSON value is not appropriate for a given target type,
+// or if a JSON number overflows the target type, Unmarshal
+// skips that field and completes the unmarshaling as best it can.
+// If no more serious errors are encountered, Unmarshal returns
+// an [UnmarshalTypeError] describing the earliest such error. In any
+// case, it's not guaranteed that all the remaining fields following
+// the problematic one will be unmarshaled into the target object.
+//
+// The JSON null value unmarshals into an interface, map, pointer, or slice
+// by setting that Go value to nil. Because null is often used in JSON to mean
+// “not present,” unmarshaling a JSON null into any other Go type has no effect
+// on the value and produces no error.
+//
+// When unmarshaling quoted strings, invalid UTF-8 or
+// invalid UTF-16 surrogate pairs are not treated as an error.
+// Instead, they are replaced by the Unicode replacement
+// character U+FFFD.
+func Unmarshal(data []byte, v any) error {
+ // Check for well-formedness.
+ // Avoids filling out half a data structure
+ // before discovering a JSON syntax error.
+ var d decodeState
+ err := checkValid(data, &d.scan)
+ if err != nil {
+ return err
+ }
+
+ d.init(data)
+ return d.unmarshal(v)
+}
+
+// Unmarshaler is the interface implemented by types
+// that can unmarshal a JSON description of themselves.
+// The input can be assumed to be a valid encoding of
+// a JSON value. UnmarshalJSON must copy the JSON data
+// if it wishes to retain the data after returning.
+type Unmarshaler interface {
+ UnmarshalJSON([]byte) error
+}
+
+// An UnmarshalTypeError describes a JSON value that was
+// not appropriate for a value of a specific Go type.
+type UnmarshalTypeError struct {
+ Value string // description of JSON value - "bool", "array", "number -5"
+ Type reflect.Type // type of Go value it could not be assigned to
+ Offset int64 // error occurred after reading Offset bytes
+ Struct string // name of the struct type containing the field
+ Field string // the full path from root node to the field, include embedded struct
+}
+
+func (e *UnmarshalTypeError) Error() string {
+ if e.Struct != "" || e.Field != "" {
+ return "json: cannot unmarshal " + e.Value + " into Go struct field " + e.Struct + "." + e.Field + " of type " + e.Type.String()
+ }
+ return "json: cannot unmarshal " + e.Value + " into Go value of type " + e.Type.String()
+}
+
+// An UnmarshalFieldError describes a JSON object key that
+// led to an unexported (and therefore unwritable) struct field.
+//
+// Deprecated: No longer used; kept for compatibility.
+type UnmarshalFieldError struct {
+ Key string
+ Type reflect.Type
+ Field reflect.StructField
+}
+
+func (e *UnmarshalFieldError) Error() string {
+ return "json: cannot unmarshal object key " + strconv.Quote(e.Key) + " into unexported field " + e.Field.Name + " of type " + e.Type.String()
+}
+
+// An InvalidUnmarshalError describes an invalid argument passed to [Unmarshal].
+// (The argument to [Unmarshal] must be a non-nil pointer.)
+type InvalidUnmarshalError struct {
+ Type reflect.Type
+}
+
+func (e *InvalidUnmarshalError) Error() string {
+ if e.Type == nil {
+ return "json: Unmarshal(nil)"
+ }
+
+ if e.Type.Kind() != reflect.Pointer {
+ return "json: Unmarshal(non-pointer " + e.Type.String() + ")"
+ }
+ return "json: Unmarshal(nil " + e.Type.String() + ")"
+}
+
+func (d *decodeState) unmarshal(v any) error {
+ rv := reflect.ValueOf(v)
+ if rv.Kind() != reflect.Pointer || rv.IsNil() {
+ return &InvalidUnmarshalError{reflect.TypeOf(v)}
+ }
+
+ d.scan.reset()
+ d.scanWhile(scanSkipSpace)
+ // We decode rv not rv.Elem because the Unmarshaler interface
+ // test must be applied at the top level of the value.
+ err := d.value(rv)
+ if err != nil {
+ return d.addErrorContext(err)
+ }
+ return d.savedError
+}
+
+// A Number represents a JSON number literal.
+type Number string
+
+// String returns the literal text of the number.
+func (n Number) String() string { return string(n) }
+
+// Float64 returns the number as a float64.
+func (n Number) Float64() (float64, error) {
+ return strconv.ParseFloat(string(n), 64)
+}
+
+// Int64 returns the number as an int64.
+func (n Number) Int64() (int64, error) {
+ return strconv.ParseInt(string(n), 10, 64)
+}
+
+// An errorContext provides context for type errors during decoding.
+type errorContext struct {
+ Struct reflect.Type
+ FieldStack []string
+}
+
+// decodeState represents the state while decoding a JSON value.
+type decodeState struct {
+ data []byte
+ off int // next read offset in data
+ opcode int // last read result
+ scan scanner
+ errorContext *errorContext
+ savedError error
+ useNumber bool
+ disallowUnknownFields bool
+}
+
+// readIndex returns the position of the last byte read.
+func (d *decodeState) readIndex() int {
+ return d.off - 1
+}
+
+// phasePanicMsg is used as a panic message when we end up with something that
+// shouldn't happen. It can indicate a bug in the JSON decoder, or that
+// something is editing the data slice while the decoder executes.
+const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?"
+
+func (d *decodeState) init(data []byte) *decodeState {
+ d.data = data
+ d.off = 0
+ d.savedError = nil
+ if d.errorContext != nil {
+ d.errorContext.Struct = nil
+ // Reuse the allocated space for the FieldStack slice.
+ d.errorContext.FieldStack = d.errorContext.FieldStack[:0]
+ }
+ return d
+}
+
+// saveError saves the first err it is called with,
+// for reporting at the end of the unmarshal.
+func (d *decodeState) saveError(err error) {
+ if d.savedError == nil {
+ d.savedError = d.addErrorContext(err)
+ }
+}
+
+// addErrorContext returns a new error enhanced with information from d.errorContext
+func (d *decodeState) addErrorContext(err error) error {
+ if d.errorContext != nil && (d.errorContext.Struct != nil || len(d.errorContext.FieldStack) > 0) {
+ switch err := err.(type) {
+ case *UnmarshalTypeError:
+ err.Struct = d.errorContext.Struct.Name()
+ fieldStack := d.errorContext.FieldStack
+ if err.Field != "" {
+ fieldStack = append(fieldStack, err.Field)
+ }
+ err.Field = strings.Join(fieldStack, ".")
+ }
+ }
+ return err
+}
+
+// skip scans to the end of what was started.
+func (d *decodeState) skip() {
+ s, data, i := &d.scan, d.data, d.off
+ depth := len(s.parseState)
+ for {
+ op := s.step(s, data[i])
+ i++
+ if len(s.parseState) < depth {
+ d.off = i
+ d.opcode = op
+ return
+ }
+ }
+}
+
+// scanNext processes the byte at d.data[d.off].
+func (d *decodeState) scanNext() {
+ if d.off < len(d.data) {
+ d.opcode = d.scan.step(&d.scan, d.data[d.off])
+ d.off++
+ } else {
+ d.opcode = d.scan.eof()
+ d.off = len(d.data) + 1 // mark processed EOF with len+1
+ }
+}
+
+// scanWhile processes bytes in d.data[d.off:] until it
+// receives a scan code not equal to op.
+func (d *decodeState) scanWhile(op int) {
+ s, data, i := &d.scan, d.data, d.off
+ for i < len(data) {
+ newOp := s.step(s, data[i])
+ i++
+ if newOp != op {
+ d.opcode = newOp
+ d.off = i
+ return
+ }
+ }
+
+ d.off = len(data) + 1 // mark processed EOF with len+1
+ d.opcode = d.scan.eof()
+}
+
+// rescanLiteral is similar to scanWhile(scanContinue), but it specialises the
+// common case where we're decoding a literal. The decoder scans the input
+// twice, once for syntax errors and to check the length of the value, and the
+// second to perform the decoding.
+//
+// Only in the second step do we use decodeState to tokenize literals, so we
+// know there aren't any syntax errors. We can take advantage of that knowledge,
+// and scan a literal's bytes much more quickly.
+func (d *decodeState) rescanLiteral() {
+ data, i := d.data, d.off
+Switch:
+ switch data[i-1] {
+ case '"': // string
+ for ; i < len(data); i++ {
+ switch data[i] {
+ case '\\':
+ i++ // escaped char
+ case '"':
+ i++ // tokenize the closing quote too
+ break Switch
+ }
+ }
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number
+ for ; i < len(data); i++ {
+ switch data[i] {
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+ '.', 'e', 'E', '+', '-':
+ default:
+ break Switch
+ }
+ }
+ case 't': // true
+ i += len("rue")
+ case 'f': // false
+ i += len("alse")
+ case 'n': // null
+ i += len("ull")
+ }
+ if i < len(data) {
+ d.opcode = stateEndValue(&d.scan, data[i])
+ } else {
+ d.opcode = scanEnd
+ }
+ d.off = i + 1
+}
+
+// value consumes a JSON value from d.data[d.off-1:], decoding into v, and
+// reads the following byte ahead. If v is invalid, the value is discarded.
+// The first byte of the value has been read already.
+func (d *decodeState) value(v reflect.Value) error {
+ switch d.opcode {
+ default:
+ panic(phasePanicMsg)
+
+ case scanBeginArray:
+ if v.IsValid() {
+ if err := d.array(v); err != nil {
+ return err
+ }
+ } else {
+ d.skip()
+ }
+ d.scanNext()
+
+ case scanBeginObject:
+ if v.IsValid() {
+ if err := d.object(v); err != nil {
+ return err
+ }
+ } else {
+ d.skip()
+ }
+ d.scanNext()
+
+ case scanBeginLiteral:
+ // All bytes inside literal return scanContinue op code.
+ start := d.readIndex()
+ d.rescanLiteral()
+
+ if v.IsValid() {
+ if err := d.literalStore(d.data[start:d.readIndex()], v, false); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+type unquotedValue struct{}
+
+// valueQuoted is like value but decodes a
+// quoted string literal or literal null into an interface value.
+// If it finds anything other than a quoted string literal or null,
+// valueQuoted returns unquotedValue{}.
+func (d *decodeState) valueQuoted() any {
+ switch d.opcode {
+ default:
+ panic(phasePanicMsg)
+
+ case scanBeginArray, scanBeginObject:
+ d.skip()
+ d.scanNext()
+
+ case scanBeginLiteral:
+ v := d.literalInterface()
+ switch v.(type) {
+ case nil, string:
+ return v
+ }
+ }
+ return unquotedValue{}
+}
+
+// indirect walks down v allocating pointers as needed,
+// until it gets to a non-pointer.
+// If it encounters an Unmarshaler, indirect stops and returns that.
+// If decodingNull is true, indirect stops at the first settable pointer so it
+// can be set to nil.
+func indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnmarshaler, reflect.Value) {
+ // Issue #24153 indicates that it is generally not a guaranteed property
+ // that you may round-trip a reflect.Value by calling Value.Addr().Elem()
+ // and expect the value to still be settable for values derived from
+ // unexported embedded struct fields.
+ //
+ // The logic below effectively does this when it first addresses the value
+ // (to satisfy possible pointer methods) and continues to dereference
+ // subsequent pointers as necessary.
+ //
+ // After the first round-trip, we set v back to the original value to
+ // preserve the original RW flags contained in reflect.Value.
+ v0 := v
+ haveAddr := false
+
+ // If v is a named type and is addressable,
+ // start with its address, so that if the type has pointer methods,
+ // we find them.
+ if v.Kind() != reflect.Pointer && v.Type().Name() != "" && v.CanAddr() {
+ haveAddr = true
+ v = v.Addr()
+ }
+ for {
+ // Load value from interface, but only if the result will be
+ // usefully addressable.
+ if v.Kind() == reflect.Interface && !v.IsNil() {
+ e := v.Elem()
+ if e.Kind() == reflect.Pointer && !e.IsNil() && (!decodingNull || e.Elem().Kind() == reflect.Pointer) {
+ haveAddr = false
+ v = e
+ continue
+ }
+ }
+
+ if v.Kind() != reflect.Pointer {
+ break
+ }
+
+ if decodingNull && v.CanSet() {
+ break
+ }
+
+ // Prevent infinite loop if v is an interface pointing to its own address:
+ // var v any
+ // v = &v
+ if v.Elem().Kind() == reflect.Interface && v.Elem().Elem().Equal(v) {
+ v = v.Elem()
+ break
+ }
+ if v.IsNil() {
+ v.Set(reflect.New(v.Type().Elem()))
+ }
+ if v.Type().NumMethod() > 0 && v.CanInterface() {
+ if u, ok := v.Interface().(Unmarshaler); ok {
+ return u, nil, reflect.Value{}
+ }
+ if !decodingNull {
+ if u, ok := v.Interface().(encoding.TextUnmarshaler); ok {
+ return nil, u, reflect.Value{}
+ }
+ }
+ }
+
+ if haveAddr {
+ v = v0 // restore original value after round-trip Value.Addr().Elem()
+ haveAddr = false
+ } else {
+ v = v.Elem()
+ }
+ }
+ return nil, nil, v
+}
+
+// array consumes an array from d.data[d.off-1:], decoding into v.
+// The first byte of the array ('[') has been read already.
+func (d *decodeState) array(v reflect.Value) error {
+ // Check for unmarshaler.
+ u, ut, pv := indirect(v, false)
+ if u != nil {
+ start := d.readIndex()
+ d.skip()
+ return u.UnmarshalJSON(d.data[start:d.off])
+ }
+ if ut != nil {
+ d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)})
+ d.skip()
+ return nil
+ }
+ v = pv
+
+ // Check type of target.
+ switch v.Kind() {
+ case reflect.Interface:
+ if v.NumMethod() == 0 {
+ // Decoding into nil interface? Switch to non-reflect code.
+ ai := d.arrayInterface()
+ v.Set(reflect.ValueOf(ai))
+ return nil
+ }
+ // Otherwise it's invalid.
+ fallthrough
+ default:
+ d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)})
+ d.skip()
+ return nil
+ case reflect.Array, reflect.Slice:
+ break
+ }
+
+ i := 0
+ for {
+ // Look ahead for ] - can only happen on first iteration.
+ d.scanWhile(scanSkipSpace)
+ if d.opcode == scanEndArray {
+ break
+ }
+
+ // Expand slice length, growing the slice if necessary.
+ if v.Kind() == reflect.Slice {
+ if i >= v.Cap() {
+ v.Grow(1)
+ }
+ if i >= v.Len() {
+ v.SetLen(i + 1)
+ }
+ }
+
+ if i < v.Len() {
+ // Decode into element.
+ if err := d.value(v.Index(i)); err != nil {
+ return err
+ }
+ } else {
+ // Ran out of fixed array: skip.
+ if err := d.value(reflect.Value{}); err != nil {
+ return err
+ }
+ }
+ i++
+
+ // Next token must be , or ].
+ if d.opcode == scanSkipSpace {
+ d.scanWhile(scanSkipSpace)
+ }
+ if d.opcode == scanEndArray {
+ break
+ }
+ if d.opcode != scanArrayValue {
+ panic(phasePanicMsg)
+ }
+ }
+
+ if i < v.Len() {
+ if v.Kind() == reflect.Array {
+ for ; i < v.Len(); i++ {
+ v.Index(i).SetZero() // zero remainder of array
+ }
+ } else {
+ v.SetLen(i) // truncate the slice
+ }
+ }
+ if i == 0 && v.Kind() == reflect.Slice {
+ v.Set(reflect.MakeSlice(v.Type(), 0, 0))
+ }
+ return nil
+}
+
+var nullLiteral = []byte("null")
+var textUnmarshalerType = reflect.TypeFor[encoding.TextUnmarshaler]()
+
+// object consumes an object from d.data[d.off-1:], decoding into v.
+// The first byte ('{') of the object has been read already.
+func (d *decodeState) object(v reflect.Value) error {
+ // Check for unmarshaler.
+ u, ut, pv := indirect(v, false)
+ if u != nil {
+ start := d.readIndex()
+ d.skip()
+ return u.UnmarshalJSON(d.data[start:d.off])
+ }
+ if ut != nil {
+ d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)})
+ d.skip()
+ return nil
+ }
+ v = pv
+ t := v.Type()
+
+ // Decoding into nil interface? Switch to non-reflect code.
+ if v.Kind() == reflect.Interface && v.NumMethod() == 0 {
+ oi := d.objectInterface()
+ v.Set(reflect.ValueOf(oi))
+ return nil
+ }
+
+ var fields structFields
+
+ // Check type of target:
+ // struct or
+ // map[T1]T2 where T1 is string, an integer type,
+ // or an encoding.TextUnmarshaler
+ switch v.Kind() {
+ case reflect.Map:
+ // Map key must either have string kind, have an integer kind,
+ // or be an encoding.TextUnmarshaler.
+ switch t.Key().Kind() {
+ case reflect.String,
+ reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
+ reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+ default:
+ if !reflect.PointerTo(t.Key()).Implements(textUnmarshalerType) {
+ d.saveError(&UnmarshalTypeError{Value: "object", Type: t, Offset: int64(d.off)})
+ d.skip()
+ return nil
+ }
+ }
+ if v.IsNil() {
+ v.Set(reflect.MakeMap(t))
+ }
+ case reflect.Struct:
+ fields = cachedTypeFields(t)
+ // ok
+ default:
+ d.saveError(&UnmarshalTypeError{Value: "object", Type: t, Offset: int64(d.off)})
+ d.skip()
+ return nil
+ }
+
+ var mapElem reflect.Value
+ var origErrorContext errorContext
+ if d.errorContext != nil {
+ origErrorContext = *d.errorContext
+ }
+
+ for {
+ // Read opening " of string key or closing }.
+ d.scanWhile(scanSkipSpace)
+ if d.opcode == scanEndObject {
+ // closing } - can only happen on first iteration.
+ break
+ }
+ if d.opcode != scanBeginLiteral {
+ panic(phasePanicMsg)
+ }
+
+ // Read key.
+ start := d.readIndex()
+ d.rescanLiteral()
+ item := d.data[start:d.readIndex()]
+ key, ok := unquoteBytes(item)
+ if !ok {
+ panic(phasePanicMsg)
+ }
+
+ // Figure out field corresponding to key.
+ var subv reflect.Value
+ destring := false // whether the value is wrapped in a string to be decoded first
+
+ if v.Kind() == reflect.Map {
+ elemType := t.Elem()
+ if !mapElem.IsValid() {
+ mapElem = reflect.New(elemType).Elem()
+ } else {
+ mapElem.SetZero()
+ }
+ subv = mapElem
+ } else {
+ f := fields.byExactName[string(key)]
+ if f == nil {
+ f = fields.byFoldedName[string(foldName(key))]
+ }
+ if f != nil {
+ subv = v
+ destring = f.quoted
+ if d.errorContext == nil {
+ d.errorContext = new(errorContext)
+ }
+ for i, ind := range f.index {
+ if subv.Kind() == reflect.Pointer {
+ if subv.IsNil() {
+ // If a struct embeds a pointer to an unexported type,
+ // it is not possible to set a newly allocated value
+ // since the field is unexported.
+ //
+ // See https://golang.org/issue/21357
+ if !subv.CanSet() {
+ d.saveError(fmt.Errorf("json: cannot set embedded pointer to unexported struct: %v", subv.Type().Elem()))
+ // Invalidate subv to ensure d.value(subv) skips over
+ // the JSON value without assigning it to subv.
+ subv = reflect.Value{}
+ destring = false
+ break
+ }
+ subv.Set(reflect.New(subv.Type().Elem()))
+ }
+ subv = subv.Elem()
+ }
+ if i < len(f.index)-1 {
+ d.errorContext.FieldStack = append(
+ d.errorContext.FieldStack,
+ subv.Type().Field(ind).Name,
+ )
+ }
+ subv = subv.Field(ind)
+ }
+ d.errorContext.Struct = t
+ d.errorContext.FieldStack = append(d.errorContext.FieldStack, f.name)
+ } else if d.disallowUnknownFields {
+ d.saveError(fmt.Errorf("json: unknown field %q", key))
+ }
+ }
+
+ // Read : before value.
+ if d.opcode == scanSkipSpace {
+ d.scanWhile(scanSkipSpace)
+ }
+ if d.opcode != scanObjectKey {
+ panic(phasePanicMsg)
+ }
+ d.scanWhile(scanSkipSpace)
+
+ if destring {
+ switch qv := d.valueQuoted().(type) {
+ case nil:
+ if err := d.literalStore(nullLiteral, subv, false); err != nil {
+ return err
+ }
+ case string:
+ if err := d.literalStore([]byte(qv), subv, true); err != nil {
+ return err
+ }
+ default:
+ d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into %v", subv.Type()))
+ }
+ } else {
+ if err := d.value(subv); err != nil {
+ return err
+ }
+ }
+
+ // Write value back to map;
+ // if using struct, subv points into struct already.
+ if v.Kind() == reflect.Map {
+ kt := t.Key()
+ var kv reflect.Value
+ if reflect.PointerTo(kt).Implements(textUnmarshalerType) {
+ kv = reflect.New(kt)
+ if err := d.literalStore(item, kv, true); err != nil {
+ return err
+ }
+ kv = kv.Elem()
+ } else {
+ switch kt.Kind() {
+ case reflect.String:
+ kv = reflect.New(kt).Elem()
+ kv.SetString(string(key))
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+ s := string(key)
+ n, err := strconv.ParseInt(s, 10, 64)
+ if err != nil || kt.OverflowInt(n) {
+ d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)})
+ break
+ }
+ kv = reflect.New(kt).Elem()
+ kv.SetInt(n)
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+ s := string(key)
+ n, err := strconv.ParseUint(s, 10, 64)
+ if err != nil || kt.OverflowUint(n) {
+ d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)})
+ break
+ }
+ kv = reflect.New(kt).Elem()
+ kv.SetUint(n)
+ default:
+ panic("json: Unexpected key type") // should never occur
+ }
+ }
+ if kv.IsValid() {
+ v.SetMapIndex(kv, subv)
+ }
+ }
+
+ // Next token must be , or }.
+ if d.opcode == scanSkipSpace {
+ d.scanWhile(scanSkipSpace)
+ }
+ if d.errorContext != nil {
+ // Reset errorContext to its original state.
+ // Keep the same underlying array for FieldStack, to reuse the
+ // space and avoid unnecessary allocs.
+ d.errorContext.FieldStack = d.errorContext.FieldStack[:len(origErrorContext.FieldStack)]
+ d.errorContext.Struct = origErrorContext.Struct
+ }
+ if d.opcode == scanEndObject {
+ break
+ }
+ if d.opcode != scanObjectValue {
+ panic(phasePanicMsg)
+ }
+ }
+ return nil
+}
+
+// convertNumber converts the number literal s to a float64 or a Number
+// depending on the setting of d.useNumber.
+func (d *decodeState) convertNumber(s string) (any, error) {
+ if d.useNumber {
+ return Number(s), nil
+ }
+ f, err := strconv.ParseFloat(s, 64)
+ if err != nil {
+ return nil, &UnmarshalTypeError{Value: "number " + s, Type: reflect.TypeFor[float64](), Offset: int64(d.off)}
+ }
+ return f, nil
+}
+
+var numberType = reflect.TypeFor[Number]()
+
+// literalStore decodes a literal stored in item into v.
+//
+// fromQuoted indicates whether this literal came from unwrapping a
+// string from the ",string" struct tag option. this is used only to
+// produce more helpful error messages.
+func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool) error {
+ // Check for unmarshaler.
+ if len(item) == 0 {
+ // Empty string given.
+ d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()))
+ return nil
+ }
+ isNull := item[0] == 'n' // null
+ u, ut, pv := indirect(v, isNull)
+ if u != nil {
+ return u.UnmarshalJSON(item)
+ }
+ if ut != nil {
+ if item[0] != '"' {
+ if fromQuoted {
+ d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()))
+ return nil
+ }
+ val := "number"
+ switch item[0] {
+ case 'n':
+ val = "null"
+ case 't', 'f':
+ val = "bool"
+ }
+ d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())})
+ return nil
+ }
+ s, ok := unquoteBytes(item)
+ if !ok {
+ if fromQuoted {
+ return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
+ }
+ panic(phasePanicMsg)
+ }
+ return ut.UnmarshalText(s)
+ }
+
+ v = pv
+
+ switch c := item[0]; c {
+ case 'n': // null
+ // The main parser checks that only true and false can reach here,
+ // but if this was a quoted string input, it could be anything.
+ if fromQuoted && string(item) != "null" {
+ d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()))
+ break
+ }
+ switch v.Kind() {
+ case reflect.Interface, reflect.Pointer, reflect.Map, reflect.Slice:
+ v.SetZero()
+ // otherwise, ignore null for primitives/string
+ }
+ case 't', 'f': // true, false
+ value := item[0] == 't'
+ // The main parser checks that only true and false can reach here,
+ // but if this was a quoted string input, it could be anything.
+ if fromQuoted && string(item) != "true" && string(item) != "false" {
+ d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()))
+ break
+ }
+ switch v.Kind() {
+ default:
+ if fromQuoted {
+ d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()))
+ } else {
+ d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())})
+ }
+ case reflect.Bool:
+ v.SetBool(value)
+ case reflect.Interface:
+ if v.NumMethod() == 0 {
+ v.Set(reflect.ValueOf(value))
+ } else {
+ d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())})
+ }
+ }
+
+ case '"': // string
+ s, ok := unquoteBytes(item)
+ if !ok {
+ if fromQuoted {
+ return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
+ }
+ panic(phasePanicMsg)
+ }
+ switch v.Kind() {
+ default:
+ d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())})
+ case reflect.Slice:
+ if v.Type().Elem().Kind() != reflect.Uint8 {
+ d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())})
+ break
+ }
+ b := make([]byte, base64.StdEncoding.DecodedLen(len(s)))
+ n, err := base64.StdEncoding.Decode(b, s)
+ if err != nil {
+ d.saveError(err)
+ break
+ }
+ v.SetBytes(b[:n])
+ case reflect.String:
+ t := string(s)
+ if v.Type() == numberType && !isValidNumber(t) {
+ return fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", item)
+ }
+ v.SetString(t)
+ case reflect.Interface:
+ if v.NumMethod() == 0 {
+ v.Set(reflect.ValueOf(string(s)))
+ } else {
+ d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())})
+ }
+ }
+
+ default: // number
+ if c != '-' && (c < '0' || c > '9') {
+ if fromQuoted {
+ return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
+ }
+ panic(phasePanicMsg)
+ }
+ switch v.Kind() {
+ default:
+ if v.Kind() == reflect.String && v.Type() == numberType {
+ // s must be a valid number, because it's
+ // already been tokenized.
+ v.SetString(string(item))
+ break
+ }
+ if fromQuoted {
+ return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
+ }
+ d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())})
+ case reflect.Interface:
+ n, err := d.convertNumber(string(item))
+ if err != nil {
+ d.saveError(err)
+ break
+ }
+ if v.NumMethod() != 0 {
+ d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())})
+ break
+ }
+ v.Set(reflect.ValueOf(n))
+
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+ n, err := strconv.ParseInt(string(item), 10, 64)
+ if err != nil || v.OverflowInt(n) {
+ d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())})
+ break
+ }
+ v.SetInt(n)
+
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+ n, err := strconv.ParseUint(string(item), 10, 64)
+ if err != nil || v.OverflowUint(n) {
+ d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())})
+ break
+ }
+ v.SetUint(n)
+
+ case reflect.Float32, reflect.Float64:
+ n, err := strconv.ParseFloat(string(item), v.Type().Bits())
+ if err != nil || v.OverflowFloat(n) {
+ d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())})
+ break
+ }
+ v.SetFloat(n)
+ }
+ }
+ return nil
+}
+
+// The xxxInterface routines build up a value to be stored
+// in an empty interface. They are not strictly necessary,
+// but they avoid the weight of reflection in this common case.
+
+// valueInterface is like value but returns any.
+func (d *decodeState) valueInterface() (val any) {
+ switch d.opcode {
+ default:
+ panic(phasePanicMsg)
+ case scanBeginArray:
+ val = d.arrayInterface()
+ d.scanNext()
+ case scanBeginObject:
+ val = d.objectInterface()
+ d.scanNext()
+ case scanBeginLiteral:
+ val = d.literalInterface()
+ }
+ return
+}
+
+// arrayInterface is like array but returns []any.
+func (d *decodeState) arrayInterface() []any {
+ var v = make([]any, 0)
+ for {
+ // Look ahead for ] - can only happen on first iteration.
+ d.scanWhile(scanSkipSpace)
+ if d.opcode == scanEndArray {
+ break
+ }
+
+ v = append(v, d.valueInterface())
+
+ // Next token must be , or ].
+ if d.opcode == scanSkipSpace {
+ d.scanWhile(scanSkipSpace)
+ }
+ if d.opcode == scanEndArray {
+ break
+ }
+ if d.opcode != scanArrayValue {
+ panic(phasePanicMsg)
+ }
+ }
+ return v
+}
+
+// objectInterface is like object but returns map[string]any.
+func (d *decodeState) objectInterface() map[string]any {
+ m := make(map[string]any)
+ for {
+ // Read opening " of string key or closing }.
+ d.scanWhile(scanSkipSpace)
+ if d.opcode == scanEndObject {
+ // closing } - can only happen on first iteration.
+ break
+ }
+ if d.opcode != scanBeginLiteral {
+ panic(phasePanicMsg)
+ }
+
+ // Read string key.
+ start := d.readIndex()
+ d.rescanLiteral()
+ item := d.data[start:d.readIndex()]
+ key, ok := unquote(item)
+ if !ok {
+ panic(phasePanicMsg)
+ }
+
+ // Read : before value.
+ if d.opcode == scanSkipSpace {
+ d.scanWhile(scanSkipSpace)
+ }
+ if d.opcode != scanObjectKey {
+ panic(phasePanicMsg)
+ }
+ d.scanWhile(scanSkipSpace)
+
+ // Read value.
+ m[key] = d.valueInterface()
+
+ // Next token must be , or }.
+ if d.opcode == scanSkipSpace {
+ d.scanWhile(scanSkipSpace)
+ }
+ if d.opcode == scanEndObject {
+ break
+ }
+ if d.opcode != scanObjectValue {
+ panic(phasePanicMsg)
+ }
+ }
+ return m
+}
+
+// literalInterface consumes and returns a literal from d.data[d.off-1:] and
+// it reads the following byte ahead. The first byte of the literal has been
+// read already (that's how the caller knows it's a literal).
+func (d *decodeState) literalInterface() any {
+ // All bytes inside literal return scanContinue op code.
+ start := d.readIndex()
+ d.rescanLiteral()
+
+ item := d.data[start:d.readIndex()]
+
+ switch c := item[0]; c {
+ case 'n': // null
+ return nil
+
+ case 't', 'f': // true, false
+ return c == 't'
+
+ case '"': // string
+ s, ok := unquote(item)
+ if !ok {
+ panic(phasePanicMsg)
+ }
+ return s
+
+ default: // number
+ if c != '-' && (c < '0' || c > '9') {
+ panic(phasePanicMsg)
+ }
+ n, err := d.convertNumber(string(item))
+ if err != nil {
+ d.saveError(err)
+ }
+ return n
+ }
+}
+
+// getu4 decodes \uXXXX from the beginning of s, returning the hex value,
+// or it returns -1.
+func getu4(s []byte) rune {
+ if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
+ return -1
+ }
+ var r rune
+ for _, c := range s[2:6] {
+ switch {
+ case '0' <= c && c <= '9':
+ c = c - '0'
+ case 'a' <= c && c <= 'f':
+ c = c - 'a' + 10
+ case 'A' <= c && c <= 'F':
+ c = c - 'A' + 10
+ default:
+ return -1
+ }
+ r = r*16 + rune(c)
+ }
+ return r
+}
+
+// unquote converts a quoted JSON string literal s into an actual string t.
+// The rules are different than for Go, so cannot use strconv.Unquote.
+func unquote(s []byte) (t string, ok bool) {
+ s, ok = unquoteBytes(s)
+ t = string(s)
+ return
+}
+
+// unquoteBytes should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+// - github.com/bytedance/sonic
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname unquoteBytes
+func unquoteBytes(s []byte) (t []byte, ok bool) {
+ if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
+ return
+ }
+ s = s[1 : len(s)-1]
+
+ // Check for unusual characters. If there are none,
+ // then no unquoting is needed, so return a slice of the
+ // original bytes.
+ r := 0
+ for r < len(s) {
+ c := s[r]
+ if c == '\\' || c == '"' || c < ' ' {
+ break
+ }
+ if c < utf8.RuneSelf {
+ r++
+ continue
+ }
+ rr, size := utf8.DecodeRune(s[r:])
+ if rr == utf8.RuneError && size == 1 {
+ break
+ }
+ r += size
+ }
+ if r == len(s) {
+ return s, true
+ }
+
+ b := make([]byte, len(s)+2*utf8.UTFMax)
+ w := copy(b, s[0:r])
+ for r < len(s) {
+ // Out of room? Can only happen if s is full of
+ // malformed UTF-8 and we're replacing each
+ // byte with RuneError.
+ if w >= len(b)-2*utf8.UTFMax {
+ nb := make([]byte, (len(b)+utf8.UTFMax)*2)
+ copy(nb, b[0:w])
+ b = nb
+ }
+ switch c := s[r]; {
+ case c == '\\':
+ r++
+ if r >= len(s) {
+ return
+ }
+ switch s[r] {
+ default:
+ return
+ case '"', '\\', '/', '\'':
+ b[w] = s[r]
+ r++
+ w++
+ case 'b':
+ b[w] = '\b'
+ r++
+ w++
+ case 'f':
+ b[w] = '\f'
+ r++
+ w++
+ case 'n':
+ b[w] = '\n'
+ r++
+ w++
+ case 'r':
+ b[w] = '\r'
+ r++
+ w++
+ case 't':
+ b[w] = '\t'
+ r++
+ w++
+ case 'u':
+ r--
+ rr := getu4(s[r:])
+ if rr < 0 {
+ return
+ }
+ r += 6
+ if utf16.IsSurrogate(rr) {
+ rr1 := getu4(s[r:])
+ if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
+ // A valid pair; consume.
+ r += 6
+ w += utf8.EncodeRune(b[w:], dec)
+ break
+ }
+ // Invalid surrogate; fall back to replacement rune.
+ rr = unicode.ReplacementChar
+ }
+ w += utf8.EncodeRune(b[w:], rr)
+ }
+
+ // Quote, control characters are invalid.
+ case c == '"', c < ' ':
+ return
+
+ // ASCII
+ case c < utf8.RuneSelf:
+ b[w] = c
+ r++
+ w++
+
+ // Coerce to well-formed UTF-8.
+ default:
+ rr, size := utf8.DecodeRune(s[r:])
+ r += size
+ w += utf8.EncodeRune(b[w:], rr)
+ }
+ }
+ return b[0:w], true
+}
diff --git a/pkg/encoders/json/decode_test.go b/pkg/encoders/json/decode_test.go
new file mode 100644
index 0000000..0df31c8
--- /dev/null
+++ b/pkg/encoders/json/decode_test.go
@@ -0,0 +1,2830 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !goexperiment.jsonv2
+
+package json
+
+import (
+ "bytes"
+ "encoding"
+ "errors"
+ "fmt"
+ "image"
+ "io"
+ "maps"
+ "math"
+ "math/big"
+ "net"
+ "reflect"
+ "slices"
+ "strconv"
+ "strings"
+ "testing"
+ "time"
+)
+
+type T struct {
+ X string
+ Y int
+ Z int `json:"-"`
+}
+
+type U struct {
+ Alphabet string `json:"alpha"`
+}
+
+type V struct {
+ F1 any
+ F2 int32
+ F3 Number
+ F4 *VOuter
+}
+
+type VOuter struct {
+ V V
+}
+
+type W struct {
+ S SS
+}
+
+type P struct {
+ PP PP
+}
+
+type PP struct {
+ T T
+ Ts []T
+}
+
+type SS string
+
+func (*SS) UnmarshalJSON(data []byte) error {
+ return &UnmarshalTypeError{Value: "number", Type: reflect.TypeFor[SS]()}
+}
+
+type TAlias T
+
+func (tt *TAlias) UnmarshalJSON(data []byte) error {
+ t := T{}
+ if err := Unmarshal(data, &t); err != nil {
+ return err
+ }
+ *tt = TAlias(t)
+ return nil
+}
+
+type TOuter struct {
+ T TAlias
+}
+
+// ifaceNumAsFloat64/ifaceNumAsNumber are used to test unmarshaling with and
+// without UseNumber
+var ifaceNumAsFloat64 = map[string]any{
+ "k1": float64(1),
+ "k2": "s",
+ "k3": []any{float64(1), float64(2.0), float64(3e-3)},
+ "k4": map[string]any{"kk1": "s", "kk2": float64(2)},
+}
+
+var ifaceNumAsNumber = map[string]any{
+ "k1": Number("1"),
+ "k2": "s",
+ "k3": []any{Number("1"), Number("2.0"), Number("3e-3")},
+ "k4": map[string]any{"kk1": "s", "kk2": Number("2")},
+}
+
+type tx struct {
+ x int
+}
+
+type u8 uint8
+
+// A type that can unmarshal itself.
+
+type unmarshaler struct {
+ T bool
+}
+
+func (u *unmarshaler) UnmarshalJSON(b []byte) error {
+ *u = unmarshaler{true} // All we need to see that UnmarshalJSON is called.
+ return nil
+}
+
+type ustruct struct {
+ M unmarshaler
+}
+
+type unmarshalerText struct {
+ A, B string
+}
+
+// needed for re-marshaling tests
+func (u unmarshalerText) MarshalText() ([]byte, error) {
+ return []byte(u.A + ":" + u.B), nil
+}
+
+func (u *unmarshalerText) UnmarshalText(b []byte) error {
+ pos := bytes.IndexByte(b, ':')
+ if pos == -1 {
+ return errors.New("missing separator")
+ }
+ u.A, u.B = string(b[:pos]), string(b[pos+1:])
+ return nil
+}
+
+var _ encoding.TextUnmarshaler = (*unmarshalerText)(nil)
+
+type ustructText struct {
+ M unmarshalerText
+}
+
+// u8marshal is an integer type that can marshal/unmarshal itself.
+type u8marshal uint8
+
+func (u8 u8marshal) MarshalText() ([]byte, error) {
+ return []byte(fmt.Sprintf("u%d", u8)), nil
+}
+
+var errMissingU8Prefix = errors.New("missing 'u' prefix")
+
+func (u8 *u8marshal) UnmarshalText(b []byte) error {
+ if !bytes.HasPrefix(b, []byte{'u'}) {
+ return errMissingU8Prefix
+ }
+ n, err := strconv.Atoi(string(b[1:]))
+ if err != nil {
+ return err
+ }
+ *u8 = u8marshal(n)
+ return nil
+}
+
+var _ encoding.TextUnmarshaler = (*u8marshal)(nil)
+
+var (
+ umtrue = unmarshaler{true}
+ umslice = []unmarshaler{{true}}
+ umstruct = ustruct{unmarshaler{true}}
+
+ umtrueXY = unmarshalerText{"x", "y"}
+ umsliceXY = []unmarshalerText{{"x", "y"}}
+ umstructXY = ustructText{unmarshalerText{"x", "y"}}
+
+ ummapXY = map[unmarshalerText]bool{{"x", "y"}: true}
+)
+
+// Test data structures for anonymous fields.
+
+type Point struct {
+ Z int
+}
+
+type Top struct {
+ Level0 int
+ Embed0
+ *Embed0a
+ *Embed0b `json:"e,omitempty"` // treated as named
+ Embed0c `json:"-"` // ignored
+ Loop
+ Embed0p // has Point with X, Y, used
+ Embed0q // has Point with Z, used
+ embed // contains exported field
+}
+
+type Embed0 struct {
+ Level1a int // overridden by Embed0a's Level1a with json tag
+ Level1b int // used because Embed0a's Level1b is renamed
+ Level1c int // used because Embed0a's Level1c is ignored
+ Level1d int // annihilated by Embed0a's Level1d
+ Level1e int `json:"x"` // annihilated by Embed0a.Level1e
+}
+
+type Embed0a struct {
+ Level1a int `json:"Level1a,omitempty"`
+ Level1b int `json:"LEVEL1B,omitempty"`
+ Level1c int `json:"-"`
+ Level1d int // annihilated by Embed0's Level1d
+ Level1f int `json:"x"` // annihilated by Embed0's Level1e
+}
+
+type Embed0b Embed0
+
+type Embed0c Embed0
+
+type Embed0p struct {
+ image.Point
+}
+
+type Embed0q struct {
+ Point
+}
+
+type embed struct {
+ Q int
+}
+
+type Loop struct {
+ Loop1 int `json:",omitempty"`
+ Loop2 int `json:",omitempty"`
+ *Loop
+}
+
+// From reflect test:
+// The X in S6 and S7 annihilate, but they also block the X in S8.S9.
+type S5 struct {
+ S6
+ S7
+ S8
+}
+
+type S6 struct {
+ X int
+}
+
+type S7 S6
+
+type S8 struct {
+ S9
+}
+
+type S9 struct {
+ X int
+ Y int
+}
+
+// From reflect test:
+// The X in S11.S6 and S12.S6 annihilate, but they also block the X in S13.S8.S9.
+type S10 struct {
+ S11
+ S12
+ S13
+}
+
+type S11 struct {
+ S6
+}
+
+type S12 struct {
+ S6
+}
+
+type S13 struct {
+ S8
+}
+
+type Ambig struct {
+ // Given "hello", the first match should win.
+ First int `json:"HELLO"`
+ Second int `json:"Hello"`
+}
+
+type XYZ struct {
+ X any
+ Y any
+ Z any
+}
+
+type unexportedWithMethods struct{}
+
+func (unexportedWithMethods) F() {}
+
+type byteWithMarshalJSON byte
+
+func (b byteWithMarshalJSON) MarshalJSON() ([]byte, error) {
+ return []byte(fmt.Sprintf(`"Z%.2x"`, byte(b))), nil
+}
+
+func (b *byteWithMarshalJSON) UnmarshalJSON(data []byte) error {
+ if len(data) != 5 || data[0] != '"' || data[1] != 'Z' || data[4] != '"' {
+ return fmt.Errorf("bad quoted string")
+ }
+ i, err := strconv.ParseInt(string(data[2:4]), 16, 8)
+ if err != nil {
+ return fmt.Errorf("bad hex")
+ }
+ *b = byteWithMarshalJSON(i)
+ return nil
+}
+
+type byteWithPtrMarshalJSON byte
+
+func (b *byteWithPtrMarshalJSON) MarshalJSON() ([]byte, error) {
+ return byteWithMarshalJSON(*b).MarshalJSON()
+}
+
+func (b *byteWithPtrMarshalJSON) UnmarshalJSON(data []byte) error {
+ return (*byteWithMarshalJSON)(b).UnmarshalJSON(data)
+}
+
+type byteWithMarshalText byte
+
+func (b byteWithMarshalText) MarshalText() ([]byte, error) {
+ return []byte(fmt.Sprintf(`Z%.2x`, byte(b))), nil
+}
+
+func (b *byteWithMarshalText) UnmarshalText(data []byte) error {
+ if len(data) != 3 || data[0] != 'Z' {
+ return fmt.Errorf("bad quoted string")
+ }
+ i, err := strconv.ParseInt(string(data[1:3]), 16, 8)
+ if err != nil {
+ return fmt.Errorf("bad hex")
+ }
+ *b = byteWithMarshalText(i)
+ return nil
+}
+
+type byteWithPtrMarshalText byte
+
+func (b *byteWithPtrMarshalText) MarshalText() ([]byte, error) {
+ return byteWithMarshalText(*b).MarshalText()
+}
+
+func (b *byteWithPtrMarshalText) UnmarshalText(data []byte) error {
+ return (*byteWithMarshalText)(b).UnmarshalText(data)
+}
+
+type intWithMarshalJSON int
+
+func (b intWithMarshalJSON) MarshalJSON() ([]byte, error) {
+ return []byte(fmt.Sprintf(`"Z%.2x"`, int(b))), nil
+}
+
+func (b *intWithMarshalJSON) UnmarshalJSON(data []byte) error {
+ if len(data) != 5 || data[0] != '"' || data[1] != 'Z' || data[4] != '"' {
+ return fmt.Errorf("bad quoted string")
+ }
+ i, err := strconv.ParseInt(string(data[2:4]), 16, 8)
+ if err != nil {
+ return fmt.Errorf("bad hex")
+ }
+ *b = intWithMarshalJSON(i)
+ return nil
+}
+
+type intWithPtrMarshalJSON int
+
+func (b *intWithPtrMarshalJSON) MarshalJSON() ([]byte, error) {
+ return intWithMarshalJSON(*b).MarshalJSON()
+}
+
+func (b *intWithPtrMarshalJSON) UnmarshalJSON(data []byte) error {
+ return (*intWithMarshalJSON)(b).UnmarshalJSON(data)
+}
+
+type intWithMarshalText int
+
+func (b intWithMarshalText) MarshalText() ([]byte, error) {
+ return []byte(fmt.Sprintf(`Z%.2x`, int(b))), nil
+}
+
+func (b *intWithMarshalText) UnmarshalText(data []byte) error {
+ if len(data) != 3 || data[0] != 'Z' {
+ return fmt.Errorf("bad quoted string")
+ }
+ i, err := strconv.ParseInt(string(data[1:3]), 16, 8)
+ if err != nil {
+ return fmt.Errorf("bad hex")
+ }
+ *b = intWithMarshalText(i)
+ return nil
+}
+
+type intWithPtrMarshalText int
+
+func (b *intWithPtrMarshalText) MarshalText() ([]byte, error) {
+ return intWithMarshalText(*b).MarshalText()
+}
+
+func (b *intWithPtrMarshalText) UnmarshalText(data []byte) error {
+ return (*intWithMarshalText)(b).UnmarshalText(data)
+}
+
+type mapStringToStringData struct {
+ Data map[string]string `json:"data"`
+}
+
+type B struct {
+ B bool `json:",string"`
+}
+
+type DoublePtr struct {
+ I **int
+ J **int
+}
+
+var unmarshalTests = []struct {
+ CaseName
+ in string
+ ptr any // new(type)
+ out any
+ err error
+ useNumber bool
+ golden bool
+ disallowUnknownFields bool
+}{
+ // basic types
+ {CaseName: Name(""), in: `true`, ptr: new(bool), out: true},
+ {CaseName: Name(""), in: `1`, ptr: new(int), out: 1},
+ {CaseName: Name(""), in: `1.2`, ptr: new(float64), out: 1.2},
+ {CaseName: Name(""), in: `-5`, ptr: new(int16), out: int16(-5)},
+ {CaseName: Name(""), in: `2`, ptr: new(Number), out: Number("2"), useNumber: true},
+ {CaseName: Name(""), in: `2`, ptr: new(Number), out: Number("2")},
+ {CaseName: Name(""), in: `2`, ptr: new(any), out: float64(2.0)},
+ {CaseName: Name(""), in: `2`, ptr: new(any), out: Number("2"), useNumber: true},
+ {CaseName: Name(""), in: `"a\u1234"`, ptr: new(string), out: "a\u1234"},
+ {CaseName: Name(""), in: `"http:\/\/"`, ptr: new(string), out: "http://"},
+ {CaseName: Name(""), in: `"g-clef: \uD834\uDD1E"`, ptr: new(string), out: "g-clef: \U0001D11E"},
+ {CaseName: Name(""), in: `"invalid: \uD834x\uDD1E"`, ptr: new(string), out: "invalid: \uFFFDx\uFFFD"},
+ {CaseName: Name(""), in: "null", ptr: new(any), out: nil},
+ {CaseName: Name(""), in: `{"X": [1,2,3], "Y": 4}`, ptr: new(T), out: T{Y: 4}, err: &UnmarshalTypeError{"array", reflect.TypeFor[string](), 7, "T", "X"}},
+ {CaseName: Name(""), in: `{"X": 23}`, ptr: new(T), out: T{}, err: &UnmarshalTypeError{"number", reflect.TypeFor[string](), 8, "T", "X"}},
+ {CaseName: Name(""), in: `{"x": 1}`, ptr: new(tx), out: tx{}},
+ {CaseName: Name(""), in: `{"x": 1}`, ptr: new(tx), out: tx{}},
+ {CaseName: Name(""), in: `{"x": 1}`, ptr: new(tx), err: fmt.Errorf("json: unknown field \"x\""), disallowUnknownFields: true},
+ {CaseName: Name(""), in: `{"S": 23}`, ptr: new(W), out: W{}, err: &UnmarshalTypeError{"number", reflect.TypeFor[SS](), 0, "W", "S"}},
+ {CaseName: Name(""), in: `{"T": {"X": 23}}`, ptr: new(TOuter), out: TOuter{}, err: &UnmarshalTypeError{"number", reflect.TypeFor[string](), 8, "TOuter", "T.X"}},
+ {CaseName: Name(""), in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: float64(1), F2: int32(2), F3: Number("3")}},
+ {CaseName: Name(""), in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: Number("1"), F2: int32(2), F3: Number("3")}, useNumber: true},
+ {CaseName: Name(""), in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(any), out: ifaceNumAsFloat64},
+ {CaseName: Name(""), in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(any), out: ifaceNumAsNumber, useNumber: true},
+
+ // raw values with whitespace
+ {CaseName: Name(""), in: "\n true ", ptr: new(bool), out: true},
+ {CaseName: Name(""), in: "\t 1 ", ptr: new(int), out: 1},
+ {CaseName: Name(""), in: "\r 1.2 ", ptr: new(float64), out: 1.2},
+ {CaseName: Name(""), in: "\t -5 \n", ptr: new(int16), out: int16(-5)},
+ {CaseName: Name(""), in: "\t \"a\\u1234\" \n", ptr: new(string), out: "a\u1234"},
+
+ // Z has a "-" tag.
+ {CaseName: Name(""), in: `{"Y": 1, "Z": 2}`, ptr: new(T), out: T{Y: 1}},
+ {CaseName: Name(""), in: `{"Y": 1, "Z": 2}`, ptr: new(T), out: T{Y: 1}, err: fmt.Errorf("json: unknown field \"Z\""), disallowUnknownFields: true},
+
+ {CaseName: Name(""), in: `{"alpha": "abc", "alphabet": "xyz"}`, ptr: new(U), out: U{Alphabet: "abc"}},
+ {CaseName: Name(""), in: `{"alpha": "abc", "alphabet": "xyz"}`, ptr: new(U), out: U{Alphabet: "abc"}, err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true},
+ {CaseName: Name(""), in: `{"alpha": "abc"}`, ptr: new(U), out: U{Alphabet: "abc"}},
+ {CaseName: Name(""), in: `{"alphabet": "xyz"}`, ptr: new(U), out: U{}},
+ {CaseName: Name(""), in: `{"alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true},
+
+ // syntax errors
+ {CaseName: Name(""), in: ``, ptr: new(any), err: &SyntaxError{"unexpected end of JSON input", 0}},
+ {CaseName: Name(""), in: " \n\r\t", ptr: new(any), err: &SyntaxError{"unexpected end of JSON input", 4}},
+ {CaseName: Name(""), in: `[2, 3`, ptr: new(any), err: &SyntaxError{"unexpected end of JSON input", 5}},
+ {CaseName: Name(""), in: `{"X": "foo", "Y"}`, err: &SyntaxError{"invalid character '}' after object key", 17}},
+ {CaseName: Name(""), in: `[1, 2, 3+]`, err: &SyntaxError{"invalid character '+' after array element", 9}},
+ {CaseName: Name(""), in: `{"X":12x}`, err: &SyntaxError{"invalid character 'x' after object key:value pair", 8}, useNumber: true},
+ {CaseName: Name(""), in: `{"F3": -}`, ptr: new(V), err: &SyntaxError{"invalid character '}' in numeric literal", 9}},
+
+ // raw value errors
+ {CaseName: Name(""), in: "\x01 42", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}},
+ {CaseName: Name(""), in: " 42 \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 5}},
+ {CaseName: Name(""), in: "\x01 true", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}},
+ {CaseName: Name(""), in: " false \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 8}},
+ {CaseName: Name(""), in: "\x01 1.2", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}},
+ {CaseName: Name(""), in: " 3.4 \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 6}},
+ {CaseName: Name(""), in: "\x01 \"string\"", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}},
+ {CaseName: Name(""), in: " \"string\" \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 11}},
+
+ // array tests
+ {CaseName: Name(""), in: `[1, 2, 3]`, ptr: new([3]int), out: [3]int{1, 2, 3}},
+ {CaseName: Name(""), in: `[1, 2, 3]`, ptr: new([1]int), out: [1]int{1}},
+ {CaseName: Name(""), in: `[1, 2, 3]`, ptr: new([5]int), out: [5]int{1, 2, 3, 0, 0}},
+ {CaseName: Name(""), in: `[1, 2, 3]`, ptr: new(MustNotUnmarshalJSON), err: errors.New("MustNotUnmarshalJSON was used")},
+
+ // empty array to interface test
+ {CaseName: Name(""), in: `[]`, ptr: new([]any), out: []any{}},
+ {CaseName: Name(""), in: `null`, ptr: new([]any), out: []any(nil)},
+ {CaseName: Name(""), in: `{"T":[]}`, ptr: new(map[string]any), out: map[string]any{"T": []any{}}},
+ {CaseName: Name(""), in: `{"T":null}`, ptr: new(map[string]any), out: map[string]any{"T": any(nil)}},
+
+ // composite tests
+ {CaseName: Name(""), in: allValueIndent, ptr: new(All), out: allValue},
+ {CaseName: Name(""), in: allValueCompact, ptr: new(All), out: allValue},
+ {CaseName: Name(""), in: allValueIndent, ptr: new(*All), out: &allValue},
+ {CaseName: Name(""), in: allValueCompact, ptr: new(*All), out: &allValue},
+ {CaseName: Name(""), in: pallValueIndent, ptr: new(All), out: pallValue},
+ {CaseName: Name(""), in: pallValueCompact, ptr: new(All), out: pallValue},
+ {CaseName: Name(""), in: pallValueIndent, ptr: new(*All), out: &pallValue},
+ {CaseName: Name(""), in: pallValueCompact, ptr: new(*All), out: &pallValue},
+
+ // unmarshal interface test
+ {CaseName: Name(""), in: `{"T":false}`, ptr: new(unmarshaler), out: umtrue}, // use "false" so test will fail if custom unmarshaler is not called
+ {CaseName: Name(""), in: `{"T":false}`, ptr: new(*unmarshaler), out: &umtrue},
+ {CaseName: Name(""), in: `[{"T":false}]`, ptr: new([]unmarshaler), out: umslice},
+ {CaseName: Name(""), in: `[{"T":false}]`, ptr: new(*[]unmarshaler), out: &umslice},
+ {CaseName: Name(""), in: `{"M":{"T":"x:y"}}`, ptr: new(ustruct), out: umstruct},
+
+ // UnmarshalText interface test
+ {CaseName: Name(""), in: `"x:y"`, ptr: new(unmarshalerText), out: umtrueXY},
+ {CaseName: Name(""), in: `"x:y"`, ptr: new(*unmarshalerText), out: &umtrueXY},
+ {CaseName: Name(""), in: `["x:y"]`, ptr: new([]unmarshalerText), out: umsliceXY},
+ {CaseName: Name(""), in: `["x:y"]`, ptr: new(*[]unmarshalerText), out: &umsliceXY},
+ {CaseName: Name(""), in: `{"M":"x:y"}`, ptr: new(ustructText), out: umstructXY},
+
+ // integer-keyed map test
+ {
+ CaseName: Name(""),
+ in: `{"-1":"a","0":"b","1":"c"}`,
+ ptr: new(map[int]string),
+ out: map[int]string{-1: "a", 0: "b", 1: "c"},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"0":"a","10":"c","9":"b"}`,
+ ptr: new(map[u8]string),
+ out: map[u8]string{0: "a", 9: "b", 10: "c"},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"-9223372036854775808":"min","9223372036854775807":"max"}`,
+ ptr: new(map[int64]string),
+ out: map[int64]string{math.MinInt64: "min", math.MaxInt64: "max"},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"18446744073709551615":"max"}`,
+ ptr: new(map[uint64]string),
+ out: map[uint64]string{math.MaxUint64: "max"},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"0":false,"10":true}`,
+ ptr: new(map[uintptr]bool),
+ out: map[uintptr]bool{0: false, 10: true},
+ },
+
+ // Check that MarshalText and UnmarshalText take precedence
+ // over default integer handling in map keys.
+ {
+ CaseName: Name(""),
+ in: `{"u2":4}`,
+ ptr: new(map[u8marshal]int),
+ out: map[u8marshal]int{2: 4},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"2":4}`,
+ ptr: new(map[u8marshal]int),
+ out: map[u8marshal]int{},
+ err: errMissingU8Prefix,
+ },
+
+ // integer-keyed map errors
+ {
+ CaseName: Name(""),
+ in: `{"abc":"abc"}`,
+ ptr: new(map[int]string),
+ out: map[int]string{},
+ err: &UnmarshalTypeError{Value: "number abc", Type: reflect.TypeFor[int](), Offset: 2},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"256":"abc"}`,
+ ptr: new(map[uint8]string),
+ out: map[uint8]string{},
+ err: &UnmarshalTypeError{Value: "number 256", Type: reflect.TypeFor[uint8](), Offset: 2},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"128":"abc"}`,
+ ptr: new(map[int8]string),
+ out: map[int8]string{},
+ err: &UnmarshalTypeError{Value: "number 128", Type: reflect.TypeFor[int8](), Offset: 2},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"-1":"abc"}`,
+ ptr: new(map[uint8]string),
+ out: map[uint8]string{},
+ err: &UnmarshalTypeError{Value: "number -1", Type: reflect.TypeFor[uint8](), Offset: 2},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"F":{"a":2,"3":4}}`,
+ ptr: new(map[string]map[int]int),
+ out: map[string]map[int]int{"F": {3: 4}},
+ err: &UnmarshalTypeError{Value: "number a", Type: reflect.TypeFor[int](), Offset: 7},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"F":{"a":2,"3":4}}`,
+ ptr: new(map[string]map[uint]int),
+ out: map[string]map[uint]int{"F": {3: 4}},
+ err: &UnmarshalTypeError{Value: "number a", Type: reflect.TypeFor[uint](), Offset: 7},
+ },
+
+ // Map keys can be encoding.TextUnmarshalers.
+ {CaseName: Name(""), in: `{"x:y":true}`, ptr: new(map[unmarshalerText]bool), out: ummapXY},
+ // If multiple values for the same key exists, only the most recent value is used.
+ {CaseName: Name(""), in: `{"x:y":false,"x:y":true}`, ptr: new(map[unmarshalerText]bool), out: ummapXY},
+
+ {
+ CaseName: Name(""),
+ in: `{
+ "Level0": 1,
+ "Level1b": 2,
+ "Level1c": 3,
+ "x": 4,
+ "Level1a": 5,
+ "LEVEL1B": 6,
+ "e": {
+ "Level1a": 8,
+ "Level1b": 9,
+ "Level1c": 10,
+ "Level1d": 11,
+ "x": 12
+ },
+ "Loop1": 13,
+ "Loop2": 14,
+ "X": 15,
+ "Y": 16,
+ "Z": 17,
+ "Q": 18
+ }`,
+ ptr: new(Top),
+ out: Top{
+ Level0: 1,
+ Embed0: Embed0{
+ Level1b: 2,
+ Level1c: 3,
+ },
+ Embed0a: &Embed0a{
+ Level1a: 5,
+ Level1b: 6,
+ },
+ Embed0b: &Embed0b{
+ Level1a: 8,
+ Level1b: 9,
+ Level1c: 10,
+ Level1d: 11,
+ Level1e: 12,
+ },
+ Loop: Loop{
+ Loop1: 13,
+ Loop2: 14,
+ },
+ Embed0p: Embed0p{
+ Point: image.Point{X: 15, Y: 16},
+ },
+ Embed0q: Embed0q{
+ Point: Point{Z: 17},
+ },
+ embed: embed{
+ Q: 18,
+ },
+ },
+ },
+ {
+ CaseName: Name(""),
+ in: `{"hello": 1}`,
+ ptr: new(Ambig),
+ out: Ambig{First: 1},
+ },
+
+ {
+ CaseName: Name(""),
+ in: `{"X": 1,"Y":2}`,
+ ptr: new(S5),
+ out: S5{S8: S8{S9: S9{Y: 2}}},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"X": 1,"Y":2}`,
+ ptr: new(S5),
+ out: S5{S8: S8{S9{Y: 2}}},
+ err: fmt.Errorf("json: unknown field \"X\""),
+ disallowUnknownFields: true,
+ },
+ {
+ CaseName: Name(""),
+ in: `{"X": 1,"Y":2}`,
+ ptr: new(S10),
+ out: S10{S13: S13{S8: S8{S9: S9{Y: 2}}}},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"X": 1,"Y":2}`,
+ ptr: new(S10),
+ out: S10{S13: S13{S8{S9{Y: 2}}}},
+ err: fmt.Errorf("json: unknown field \"X\""),
+ disallowUnknownFields: true,
+ },
+ {
+ CaseName: Name(""),
+ in: `{"I": 0, "I": null, "J": null}`,
+ ptr: new(DoublePtr),
+ out: DoublePtr{I: nil, J: nil},
+ },
+
+ // invalid UTF-8 is coerced to valid UTF-8.
+ {
+ CaseName: Name(""),
+ in: "\"hello\xffworld\"",
+ ptr: new(string),
+ out: "hello\ufffdworld",
+ },
+ {
+ CaseName: Name(""),
+ in: "\"hello\xc2\xc2world\"",
+ ptr: new(string),
+ out: "hello\ufffd\ufffdworld",
+ },
+ {
+ CaseName: Name(""),
+ in: "\"hello\xc2\xffworld\"",
+ ptr: new(string),
+ out: "hello\ufffd\ufffdworld",
+ },
+ {
+ CaseName: Name(""),
+ in: "\"hello\\ud800world\"",
+ ptr: new(string),
+ out: "hello\ufffdworld",
+ },
+ {
+ CaseName: Name(""),
+ in: "\"hello\\ud800\\ud800world\"",
+ ptr: new(string),
+ out: "hello\ufffd\ufffdworld",
+ },
+ {
+ CaseName: Name(""),
+ in: "\"hello\\ud800\\ud800world\"",
+ ptr: new(string),
+ out: "hello\ufffd\ufffdworld",
+ },
+ {
+ CaseName: Name(""),
+ in: "\"hello\xed\xa0\x80\xed\xb0\x80world\"",
+ ptr: new(string),
+ out: "hello\ufffd\ufffd\ufffd\ufffd\ufffd\ufffdworld",
+ },
+
+ // Used to be issue 8305, but time.Time implements encoding.TextUnmarshaler so this works now.
+ {
+ CaseName: Name(""),
+ in: `{"2009-11-10T23:00:00Z": "hello world"}`,
+ ptr: new(map[time.Time]string),
+ out: map[time.Time]string{time.Date(2009, 11, 10, 23, 0, 0, 0, time.UTC): "hello world"},
+ },
+
+ // issue 8305
+ {
+ CaseName: Name(""),
+ in: `{"2009-11-10T23:00:00Z": "hello world"}`,
+ ptr: new(map[Point]string),
+ err: &UnmarshalTypeError{Value: "object", Type: reflect.TypeFor[map[Point]string](), Offset: 1},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"asdf": "hello world"}`,
+ ptr: new(map[unmarshaler]string),
+ err: &UnmarshalTypeError{Value: "object", Type: reflect.TypeFor[map[unmarshaler]string](), Offset: 1},
+ },
+
+ // related to issue 13783.
+ // Go 1.7 changed marshaling a slice of typed byte to use the methods on the byte type,
+ // similar to marshaling a slice of typed int.
+ // These tests check that, assuming the byte type also has valid decoding methods,
+ // either the old base64 string encoding or the new per-element encoding can be
+ // successfully unmarshaled. The custom unmarshalers were accessible in earlier
+ // versions of Go, even though the custom marshaler was not.
+ {
+ CaseName: Name(""),
+ in: `"AQID"`,
+ ptr: new([]byteWithMarshalJSON),
+ out: []byteWithMarshalJSON{1, 2, 3},
+ },
+ {
+ CaseName: Name(""),
+ in: `["Z01","Z02","Z03"]`,
+ ptr: new([]byteWithMarshalJSON),
+ out: []byteWithMarshalJSON{1, 2, 3},
+ golden: true,
+ },
+ {
+ CaseName: Name(""),
+ in: `"AQID"`,
+ ptr: new([]byteWithMarshalText),
+ out: []byteWithMarshalText{1, 2, 3},
+ },
+ {
+ CaseName: Name(""),
+ in: `["Z01","Z02","Z03"]`,
+ ptr: new([]byteWithMarshalText),
+ out: []byteWithMarshalText{1, 2, 3},
+ golden: true,
+ },
+ {
+ CaseName: Name(""),
+ in: `"AQID"`,
+ ptr: new([]byteWithPtrMarshalJSON),
+ out: []byteWithPtrMarshalJSON{1, 2, 3},
+ },
+ {
+ CaseName: Name(""),
+ in: `["Z01","Z02","Z03"]`,
+ ptr: new([]byteWithPtrMarshalJSON),
+ out: []byteWithPtrMarshalJSON{1, 2, 3},
+ golden: true,
+ },
+ {
+ CaseName: Name(""),
+ in: `"AQID"`,
+ ptr: new([]byteWithPtrMarshalText),
+ out: []byteWithPtrMarshalText{1, 2, 3},
+ },
+ {
+ CaseName: Name(""),
+ in: `["Z01","Z02","Z03"]`,
+ ptr: new([]byteWithPtrMarshalText),
+ out: []byteWithPtrMarshalText{1, 2, 3},
+ golden: true,
+ },
+
+ // ints work with the marshaler but not the base64 []byte case
+ {
+ CaseName: Name(""),
+ in: `["Z01","Z02","Z03"]`,
+ ptr: new([]intWithMarshalJSON),
+ out: []intWithMarshalJSON{1, 2, 3},
+ golden: true,
+ },
+ {
+ CaseName: Name(""),
+ in: `["Z01","Z02","Z03"]`,
+ ptr: new([]intWithMarshalText),
+ out: []intWithMarshalText{1, 2, 3},
+ golden: true,
+ },
+ {
+ CaseName: Name(""),
+ in: `["Z01","Z02","Z03"]`,
+ ptr: new([]intWithPtrMarshalJSON),
+ out: []intWithPtrMarshalJSON{1, 2, 3},
+ golden: true,
+ },
+ {
+ CaseName: Name(""),
+ in: `["Z01","Z02","Z03"]`,
+ ptr: new([]intWithPtrMarshalText),
+ out: []intWithPtrMarshalText{1, 2, 3},
+ golden: true,
+ },
+
+ {CaseName: Name(""), in: `0.000001`, ptr: new(float64), out: 0.000001, golden: true},
+ {CaseName: Name(""), in: `1e-7`, ptr: new(float64), out: 1e-7, golden: true},
+ {CaseName: Name(""), in: `100000000000000000000`, ptr: new(float64), out: 100000000000000000000.0, golden: true},
+ {CaseName: Name(""), in: `1e+21`, ptr: new(float64), out: 1e21, golden: true},
+ {CaseName: Name(""), in: `-0.000001`, ptr: new(float64), out: -0.000001, golden: true},
+ {CaseName: Name(""), in: `-1e-7`, ptr: new(float64), out: -1e-7, golden: true},
+ {CaseName: Name(""), in: `-100000000000000000000`, ptr: new(float64), out: -100000000000000000000.0, golden: true},
+ {CaseName: Name(""), in: `-1e+21`, ptr: new(float64), out: -1e21, golden: true},
+ {CaseName: Name(""), in: `999999999999999900000`, ptr: new(float64), out: 999999999999999900000.0, golden: true},
+ {CaseName: Name(""), in: `9007199254740992`, ptr: new(float64), out: 9007199254740992.0, golden: true},
+ {CaseName: Name(""), in: `9007199254740993`, ptr: new(float64), out: 9007199254740992.0, golden: false},
+
+ {
+ CaseName: Name(""),
+ in: `{"V": {"F2": "hello"}}`,
+ ptr: new(VOuter),
+ err: &UnmarshalTypeError{
+ Value: "string",
+ Struct: "V",
+ Field: "V.F2",
+ Type: reflect.TypeFor[int32](),
+ Offset: 20,
+ },
+ },
+ {
+ CaseName: Name(""),
+ in: `{"V": {"F4": {}, "F2": "hello"}}`,
+ ptr: new(VOuter),
+ out: VOuter{V: V{F4: &VOuter{}}},
+ err: &UnmarshalTypeError{
+ Value: "string",
+ Struct: "V",
+ Field: "V.F2",
+ Type: reflect.TypeFor[int32](),
+ Offset: 30,
+ },
+ },
+
+ {
+ CaseName: Name(""),
+ in: `{"Level1a": "hello"}`,
+ ptr: new(Top),
+ out: Top{Embed0a: &Embed0a{}},
+ err: &UnmarshalTypeError{
+ Value: "string",
+ Struct: "Top",
+ Field: "Embed0a.Level1a",
+ Type: reflect.TypeFor[int](),
+ Offset: 19,
+ },
+ },
+
+ // issue 15146.
+ // invalid inputs in wrongStringTests below.
+ {CaseName: Name(""), in: `{"B":"true"}`, ptr: new(B), out: B{true}, golden: true},
+ {CaseName: Name(""), in: `{"B":"false"}`, ptr: new(B), out: B{false}, golden: true},
+ {CaseName: Name(""), in: `{"B": "maybe"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "maybe" into bool`)},
+ {CaseName: Name(""), in: `{"B": "tru"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "tru" into bool`)},
+ {CaseName: Name(""), in: `{"B": "False"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "False" into bool`)},
+ {CaseName: Name(""), in: `{"B": "null"}`, ptr: new(B), out: B{false}},
+ {CaseName: Name(""), in: `{"B": "nul"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "nul" into bool`)},
+ {CaseName: Name(""), in: `{"B": [2, 3]}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal unquoted value into bool`)},
+
+ // additional tests for disallowUnknownFields
+ {
+ CaseName: Name(""),
+ in: `{
+ "Level0": 1,
+ "Level1b": 2,
+ "Level1c": 3,
+ "x": 4,
+ "Level1a": 5,
+ "LEVEL1B": 6,
+ "e": {
+ "Level1a": 8,
+ "Level1b": 9,
+ "Level1c": 10,
+ "Level1d": 11,
+ "x": 12
+ },
+ "Loop1": 13,
+ "Loop2": 14,
+ "X": 15,
+ "Y": 16,
+ "Z": 17,
+ "Q": 18,
+ "extra": true
+ }`,
+ ptr: new(Top),
+ out: Top{
+ Level0: 1,
+ Embed0: Embed0{
+ Level1b: 2,
+ Level1c: 3,
+ },
+ Embed0a: &Embed0a{Level1a: 5, Level1b: 6},
+ Embed0b: &Embed0b{Level1a: 8, Level1b: 9, Level1c: 10, Level1d: 11, Level1e: 12},
+ Loop: Loop{
+ Loop1: 13,
+ Loop2: 14,
+ Loop: nil,
+ },
+ Embed0p: Embed0p{
+ Point: image.Point{
+ X: 15,
+ Y: 16,
+ },
+ },
+ Embed0q: Embed0q{Point: Point{Z: 17}},
+ embed: embed{Q: 18},
+ },
+ err: fmt.Errorf("json: unknown field \"extra\""),
+ disallowUnknownFields: true,
+ },
+ {
+ CaseName: Name(""),
+ in: `{
+ "Level0": 1,
+ "Level1b": 2,
+ "Level1c": 3,
+ "x": 4,
+ "Level1a": 5,
+ "LEVEL1B": 6,
+ "e": {
+ "Level1a": 8,
+ "Level1b": 9,
+ "Level1c": 10,
+ "Level1d": 11,
+ "x": 12,
+ "extra": null
+ },
+ "Loop1": 13,
+ "Loop2": 14,
+ "X": 15,
+ "Y": 16,
+ "Z": 17,
+ "Q": 18
+ }`,
+ ptr: new(Top),
+ out: Top{
+ Level0: 1,
+ Embed0: Embed0{
+ Level1b: 2,
+ Level1c: 3,
+ },
+ Embed0a: &Embed0a{Level1a: 5, Level1b: 6},
+ Embed0b: &Embed0b{Level1a: 8, Level1b: 9, Level1c: 10, Level1d: 11, Level1e: 12},
+ Loop: Loop{
+ Loop1: 13,
+ Loop2: 14,
+ Loop: nil,
+ },
+ Embed0p: Embed0p{
+ Point: image.Point{
+ X: 15,
+ Y: 16,
+ },
+ },
+ Embed0q: Embed0q{Point: Point{Z: 17}},
+ embed: embed{Q: 18},
+ },
+ err: fmt.Errorf("json: unknown field \"extra\""),
+ disallowUnknownFields: true,
+ },
+ // issue 26444
+ // UnmarshalTypeError without field & struct values
+ {
+ CaseName: Name(""),
+ in: `{"data":{"test1": "bob", "test2": 123}}`,
+ ptr: new(mapStringToStringData),
+ out: mapStringToStringData{map[string]string{"test1": "bob", "test2": ""}},
+ err: &UnmarshalTypeError{Value: "number", Type: reflect.TypeFor[string](), Offset: 37, Struct: "mapStringToStringData", Field: "data"},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"data":{"test1": 123, "test2": "bob"}}`,
+ ptr: new(mapStringToStringData),
+ out: mapStringToStringData{Data: map[string]string{"test1": "", "test2": "bob"}},
+ err: &UnmarshalTypeError{Value: "number", Type: reflect.TypeFor[string](), Offset: 21, Struct: "mapStringToStringData", Field: "data"},
+ },
+
+ // trying to decode JSON arrays or objects via TextUnmarshaler
+ {
+ CaseName: Name(""),
+ in: `[1, 2, 3]`,
+ ptr: new(MustNotUnmarshalText),
+ err: &UnmarshalTypeError{Value: "array", Type: reflect.TypeFor[*MustNotUnmarshalText](), Offset: 1},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"foo": "bar"}`,
+ ptr: new(MustNotUnmarshalText),
+ err: &UnmarshalTypeError{Value: "object", Type: reflect.TypeFor[*MustNotUnmarshalText](), Offset: 1},
+ },
+ // #22369
+ {
+ CaseName: Name(""),
+ in: `{"PP": {"T": {"Y": "bad-type"}}}`,
+ ptr: new(P),
+ err: &UnmarshalTypeError{
+ Value: "string",
+ Struct: "T",
+ Field: "PP.T.Y",
+ Type: reflect.TypeFor[int](),
+ Offset: 29,
+ },
+ },
+ {
+ CaseName: Name(""),
+ in: `{"Ts": [{"Y": 1}, {"Y": 2}, {"Y": "bad-type"}]}`,
+ ptr: new(PP),
+ out: PP{Ts: []T{{Y: 1}, {Y: 2}, {Y: 0}}},
+ err: &UnmarshalTypeError{
+ Value: "string",
+ Struct: "T",
+ Field: "Ts.Y",
+ Type: reflect.TypeFor[int](),
+ Offset: 44,
+ },
+ },
+ // #14702
+ {
+ CaseName: Name(""),
+ in: `invalid`,
+ ptr: new(Number),
+ err: &SyntaxError{
+ msg: "invalid character 'i' looking for beginning of value",
+ Offset: 1,
+ },
+ },
+ {
+ CaseName: Name(""),
+ in: `"invalid"`,
+ ptr: new(Number),
+ err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`),
+ },
+ {
+ CaseName: Name(""),
+ in: `{"A":"invalid"}`,
+ ptr: new(struct{ A Number }),
+ err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`),
+ },
+ {
+ CaseName: Name(""),
+ in: `{"A":"invalid"}`,
+ ptr: new(struct {
+ A Number `json:",string"`
+ }),
+ err: fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into json.Number", `invalid`),
+ },
+ {
+ CaseName: Name(""),
+ in: `{"A":"invalid"}`,
+ ptr: new(map[string]Number),
+ out: map[string]Number{},
+ err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`),
+ },
+
+ {
+ CaseName: Name(""),
+ in: `5`,
+ ptr: new(Number),
+ out: Number("5"),
+ },
+ {
+ CaseName: Name(""),
+ in: `"5"`,
+ ptr: new(Number),
+ out: Number("5"),
+ },
+ {
+ CaseName: Name(""),
+ in: `{"N":5}`,
+ ptr: new(struct{ N Number }),
+ out: struct{ N Number }{"5"},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"N":"5"}`,
+ ptr: new(struct{ N Number }),
+ out: struct{ N Number }{"5"},
+ },
+ {
+ CaseName: Name(""),
+ in: `{"N":5}`,
+ ptr: new(struct {
+ N Number `json:",string"`
+ }),
+ err: fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into json.Number"),
+ },
+ {
+ CaseName: Name(""),
+ in: `{"N":"5"}`,
+ ptr: new(struct {
+ N Number `json:",string"`
+ }),
+ out: struct {
+ N Number `json:",string"`
+ }{"5"},
+ },
+
+ // Verify that syntactic errors are immediately fatal,
+ // while semantic errors are lazily reported
+ // (i.e., allow processing to continue).
+ {
+ CaseName: Name(""),
+ in: `[1,2,true,4,5}`,
+ ptr: new([]int),
+ err: &SyntaxError{msg: "invalid character '}' after array element", Offset: 14},
+ },
+ {
+ CaseName: Name(""),
+ in: `[1,2,true,4,5]`,
+ ptr: new([]int),
+ out: []int{1, 2, 0, 4, 5},
+ err: &UnmarshalTypeError{Value: "bool", Type: reflect.TypeFor[int](), Offset: 9},
+ },
+
+ {
+ CaseName: Name("DashComma"),
+ in: `{"-":"hello"}`,
+ ptr: new(struct {
+ F string `json:"-,"`
+ }),
+ out: struct {
+ F string `json:"-,"`
+ }{"hello"},
+ },
+ {
+ CaseName: Name("DashCommaOmitEmpty"),
+ in: `{"-":"hello"}`,
+ ptr: new(struct {
+ F string `json:"-,omitempty"`
+ }),
+ out: struct {
+ F string `json:"-,omitempty"`
+ }{"hello"},
+ },
+}
+
+func TestMarshal(t *testing.T) {
+ b, err := Marshal(allValue)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+ if string(b) != allValueCompact {
+ t.Errorf("Marshal:")
+ diff(t, b, []byte(allValueCompact))
+ return
+ }
+
+ b, err = Marshal(pallValue)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+ if string(b) != pallValueCompact {
+ t.Errorf("Marshal:")
+ diff(t, b, []byte(pallValueCompact))
+ return
+ }
+}
+
+func TestMarshalInvalidUTF8(t *testing.T) {
+ tests := []struct {
+ CaseName
+ in string
+ want string
+ }{
+ {Name(""), "hello\xffworld", `"hello\ufffdworld"`},
+ {Name(""), "", `""`},
+ {Name(""), "\xff", `"\ufffd"`},
+ {Name(""), "\xff\xff", `"\ufffd\ufffd"`},
+ {Name(""), "a\xffb", `"a\ufffdb"`},
+ {Name(""), "\xe6\x97\xa5\xe6\x9c\xac\xff\xaa\x9e", `"日本\ufffd\ufffd\ufffd"`},
+ }
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ got, err := Marshal(tt.in)
+ if string(got) != tt.want || err != nil {
+ t.Errorf("%s: Marshal(%q):\n\tgot: (%q, %v)\n\twant: (%q, nil)", tt.Where, tt.in, got, err, tt.want)
+ }
+ })
+ }
+}
+
+func TestMarshalNumberZeroVal(t *testing.T) {
+ var n Number
+ out, err := Marshal(n)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+ got := string(out)
+ if got != "0" {
+ t.Fatalf("Marshal: got %s, want 0", got)
+ }
+}
+
+func TestMarshalEmbeds(t *testing.T) {
+ top := &Top{
+ Level0: 1,
+ Embed0: Embed0{
+ Level1b: 2,
+ Level1c: 3,
+ },
+ Embed0a: &Embed0a{
+ Level1a: 5,
+ Level1b: 6,
+ },
+ Embed0b: &Embed0b{
+ Level1a: 8,
+ Level1b: 9,
+ Level1c: 10,
+ Level1d: 11,
+ Level1e: 12,
+ },
+ Loop: Loop{
+ Loop1: 13,
+ Loop2: 14,
+ },
+ Embed0p: Embed0p{
+ Point: image.Point{X: 15, Y: 16},
+ },
+ Embed0q: Embed0q{
+ Point: Point{Z: 17},
+ },
+ embed: embed{
+ Q: 18,
+ },
+ }
+ got, err := Marshal(top)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+ want := "{\"Level0\":1,\"Level1b\":2,\"Level1c\":3,\"Level1a\":5,\"LEVEL1B\":6,\"e\":{\"Level1a\":8,\"Level1b\":9,\"Level1c\":10,\"Level1d\":11,\"x\":12},\"Loop1\":13,\"Loop2\":14,\"X\":15,\"Y\":16,\"Z\":17,\"Q\":18}"
+ if string(got) != want {
+ t.Errorf("Marshal:\n\tgot: %s\n\twant: %s", got, want)
+ }
+}
+
+func equalError(a, b error) bool {
+ isJSONError := func(err error) bool {
+ switch err.(type) {
+ case
+ *InvalidUTF8Error,
+ *InvalidUnmarshalError,
+ *MarshalerError,
+ *SyntaxError,
+ *UnmarshalFieldError,
+ *UnmarshalTypeError,
+ *UnsupportedTypeError,
+ *UnsupportedValueError:
+ return true
+ }
+ return false
+ }
+
+ if a == nil || b == nil {
+ return a == nil && b == nil
+ }
+ if isJSONError(a) || isJSONError(b) {
+ return reflect.DeepEqual(a, b) // safe for locally defined error types
+ }
+ return a.Error() == b.Error()
+}
+
+func TestUnmarshal(t *testing.T) {
+ for _, tt := range unmarshalTests {
+ t.Run(tt.Name, func(t *testing.T) {
+ in := []byte(tt.in)
+ var scan scanner
+ if err := checkValid(in, &scan); err != nil {
+ if !equalError(err, tt.err) {
+ t.Fatalf("%s: checkValid error:\n\tgot %#v\n\twant %#v", tt.Where, err, tt.err)
+ }
+ }
+ if tt.ptr == nil {
+ return
+ }
+
+ typ := reflect.TypeOf(tt.ptr)
+ if typ.Kind() != reflect.Pointer {
+ t.Fatalf("%s: unmarshalTest.ptr %T is not a pointer type", tt.Where, tt.ptr)
+ }
+ typ = typ.Elem()
+
+ // v = new(right-type)
+ v := reflect.New(typ)
+
+ if !reflect.DeepEqual(tt.ptr, v.Interface()) {
+ // There's no reason for ptr to point to non-zero data,
+ // as we decode into new(right-type), so the data is
+ // discarded.
+ // This can easily mean tests that silently don't test
+ // what they should. To test decoding into existing
+ // data, see TestPrefilled.
+ t.Fatalf("%s: unmarshalTest.ptr %#v is not a pointer to a zero value", tt.Where, tt.ptr)
+ }
+
+ dec := NewDecoder(bytes.NewReader(in))
+ if tt.useNumber {
+ dec.UseNumber()
+ }
+ if tt.disallowUnknownFields {
+ dec.DisallowUnknownFields()
+ }
+ if tt.err != nil && strings.Contains(tt.err.Error(), "unexpected end of JSON input") {
+ // In streaming mode, we expect EOF or ErrUnexpectedEOF instead.
+ if strings.TrimSpace(tt.in) == "" {
+ tt.err = io.EOF
+ } else {
+ tt.err = io.ErrUnexpectedEOF
+ }
+ }
+ if err := dec.Decode(v.Interface()); !equalError(err, tt.err) {
+ t.Fatalf("%s: Decode error:\n\tgot: %v\n\twant: %v\n\n\tgot: %#v\n\twant: %#v", tt.Where, err, tt.err, err, tt.err)
+ } else if err != nil && tt.out == nil {
+ // Initialize tt.out during an error where there are no mutations,
+ // so the output is just the zero value of the input type.
+ tt.out = reflect.Zero(v.Elem().Type()).Interface()
+ }
+ if got := v.Elem().Interface(); !reflect.DeepEqual(got, tt.out) {
+ gotJSON, _ := Marshal(got)
+ wantJSON, _ := Marshal(tt.out)
+ t.Fatalf("%s: Decode:\n\tgot: %#+v\n\twant: %#+v\n\n\tgotJSON: %s\n\twantJSON: %s", tt.Where, got, tt.out, gotJSON, wantJSON)
+ }
+
+ // Check round trip also decodes correctly.
+ if tt.err == nil {
+ enc, err := Marshal(v.Interface())
+ if err != nil {
+ t.Fatalf("%s: Marshal error after roundtrip: %v", tt.Where, err)
+ }
+ if tt.golden && !bytes.Equal(enc, in) {
+ t.Errorf("%s: Marshal:\n\tgot: %s\n\twant: %s", tt.Where, enc, in)
+ }
+ vv := reflect.New(reflect.TypeOf(tt.ptr).Elem())
+ dec = NewDecoder(bytes.NewReader(enc))
+ if tt.useNumber {
+ dec.UseNumber()
+ }
+ if err := dec.Decode(vv.Interface()); err != nil {
+ t.Fatalf("%s: Decode(%#q) error after roundtrip: %v", tt.Where, enc, err)
+ }
+ if !reflect.DeepEqual(v.Elem().Interface(), vv.Elem().Interface()) {
+ t.Fatalf("%s: Decode:\n\tgot: %#+v\n\twant: %#+v\n\n\tgotJSON: %s\n\twantJSON: %s",
+ tt.Where, v.Elem().Interface(), vv.Elem().Interface(),
+ stripWhitespace(string(enc)), stripWhitespace(string(in)))
+ }
+ }
+ })
+ }
+}
+
+func TestUnmarshalMarshal(t *testing.T) {
+ initBig()
+ var v any
+ if err := Unmarshal(jsonBig, &v); err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+ b, err := Marshal(v)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+ if !bytes.Equal(jsonBig, b) {
+ t.Errorf("Marshal:")
+ diff(t, b, jsonBig)
+ return
+ }
+}
+
+// Independent of Decode, basic coverage of the accessors in Number
+func TestNumberAccessors(t *testing.T) {
+ tests := []struct {
+ CaseName
+ in string
+ i int64
+ intErr string
+ f float64
+ floatErr string
+ }{
+ {CaseName: Name(""), in: "-1.23e1", intErr: "strconv.ParseInt: parsing \"-1.23e1\": invalid syntax", f: -1.23e1},
+ {CaseName: Name(""), in: "-12", i: -12, f: -12.0},
+ {CaseName: Name(""), in: "1e1000", intErr: "strconv.ParseInt: parsing \"1e1000\": invalid syntax", floatErr: "strconv.ParseFloat: parsing \"1e1000\": value out of range"},
+ }
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ n := Number(tt.in)
+ if got := n.String(); got != tt.in {
+ t.Errorf("%s: Number(%q).String() = %s, want %s", tt.Where, tt.in, got, tt.in)
+ }
+ if i, err := n.Int64(); err == nil && tt.intErr == "" && i != tt.i {
+ t.Errorf("%s: Number(%q).Int64() = %d, want %d", tt.Where, tt.in, i, tt.i)
+ } else if (err == nil && tt.intErr != "") || (err != nil && err.Error() != tt.intErr) {
+ t.Errorf("%s: Number(%q).Int64() error:\n\tgot: %v\n\twant: %v", tt.Where, tt.in, err, tt.intErr)
+ }
+ if f, err := n.Float64(); err == nil && tt.floatErr == "" && f != tt.f {
+ t.Errorf("%s: Number(%q).Float64() = %g, want %g", tt.Where, tt.in, f, tt.f)
+ } else if (err == nil && tt.floatErr != "") || (err != nil && err.Error() != tt.floatErr) {
+ t.Errorf("%s: Number(%q).Float64() error:\n\tgot %v\n\twant: %v", tt.Where, tt.in, err, tt.floatErr)
+ }
+ })
+ }
+}
+
+func TestLargeByteSlice(t *testing.T) {
+ s0 := make([]byte, 2000)
+ for i := range s0 {
+ s0[i] = byte(i)
+ }
+ b, err := Marshal(s0)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+ var s1 []byte
+ if err := Unmarshal(b, &s1); err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+ if !bytes.Equal(s0, s1) {
+ t.Errorf("Marshal:")
+ diff(t, s0, s1)
+ }
+}
+
+type Xint struct {
+ X int
+}
+
+func TestUnmarshalInterface(t *testing.T) {
+ var xint Xint
+ var i any = &xint
+ if err := Unmarshal([]byte(`{"X":1}`), &i); err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+ if xint.X != 1 {
+ t.Fatalf("xint.X = %d, want 1", xint.X)
+ }
+}
+
+func TestUnmarshalPtrPtr(t *testing.T) {
+ var xint Xint
+ pxint := &xint
+ if err := Unmarshal([]byte(`{"X":1}`), &pxint); err != nil {
+ t.Fatalf("Unmarshal: %v", err)
+ }
+ if xint.X != 1 {
+ t.Fatalf("xint.X = %d, want 1", xint.X)
+ }
+}
+
+func TestEscape(t *testing.T) {
+ const input = `"foobar"` + " [\u2028 \u2029]"
+ const want = `"\"foobar\"\u003chtml\u003e [\u2028 \u2029]"`
+ got, err := Marshal(input)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+ if string(got) != want {
+ t.Errorf("Marshal(%#q):\n\tgot: %s\n\twant: %s", input, got, want)
+ }
+}
+
+// If people misuse the ,string modifier, the error message should be
+// helpful, telling the user that they're doing it wrong.
+func TestErrorMessageFromMisusedString(t *testing.T) {
+ // WrongString is a struct that's misusing the ,string modifier.
+ type WrongString struct {
+ Message string `json:"result,string"`
+ }
+ tests := []struct {
+ CaseName
+ in, err string
+ }{
+ {Name(""), `{"result":"x"}`, `json: invalid use of ,string struct tag, trying to unmarshal "x" into string`},
+ {Name(""), `{"result":"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "foo" into string`},
+ {Name(""), `{"result":"123"}`, `json: invalid use of ,string struct tag, trying to unmarshal "123" into string`},
+ {Name(""), `{"result":123}`, `json: invalid use of ,string struct tag, trying to unmarshal unquoted value into string`},
+ {Name(""), `{"result":"\""}`, `json: invalid use of ,string struct tag, trying to unmarshal "\"" into string`},
+ {Name(""), `{"result":"\"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "\"foo" into string`},
+ }
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ r := strings.NewReader(tt.in)
+ var s WrongString
+ err := NewDecoder(r).Decode(&s)
+ got := fmt.Sprintf("%v", err)
+ if got != tt.err {
+ t.Errorf("%s: Decode error:\n\tgot: %s\n\twant: %s", tt.Where, got, tt.err)
+ }
+ })
+ }
+}
+
+type All struct {
+ Bool bool
+ Int int
+ Int8 int8
+ Int16 int16
+ Int32 int32
+ Int64 int64
+ Uint uint
+ Uint8 uint8
+ Uint16 uint16
+ Uint32 uint32
+ Uint64 uint64
+ Uintptr uintptr
+ Float32 float32
+ Float64 float64
+
+ Foo string `json:"bar"`
+ Foo2 string `json:"bar2,dummyopt"`
+
+ IntStr int64 `json:",string"`
+ UintptrStr uintptr `json:",string"`
+
+ PBool *bool
+ PInt *int
+ PInt8 *int8
+ PInt16 *int16
+ PInt32 *int32
+ PInt64 *int64
+ PUint *uint
+ PUint8 *uint8
+ PUint16 *uint16
+ PUint32 *uint32
+ PUint64 *uint64
+ PUintptr *uintptr
+ PFloat32 *float32
+ PFloat64 *float64
+
+ String string
+ PString *string
+
+ Map map[string]Small
+ MapP map[string]*Small
+ PMap *map[string]Small
+ PMapP *map[string]*Small
+
+ EmptyMap map[string]Small
+ NilMap map[string]Small
+
+ Slice []Small
+ SliceP []*Small
+ PSlice *[]Small
+ PSliceP *[]*Small
+
+ EmptySlice []Small
+ NilSlice []Small
+
+ StringSlice []string
+ ByteSlice []byte
+
+ Small Small
+ PSmall *Small
+ PPSmall **Small
+
+ Interface any
+ PInterface *any
+
+ unexported int
+}
+
+type Small struct {
+ Tag string
+}
+
+var allValue = All{
+ Bool: true,
+ Int: 2,
+ Int8: 3,
+ Int16: 4,
+ Int32: 5,
+ Int64: 6,
+ Uint: 7,
+ Uint8: 8,
+ Uint16: 9,
+ Uint32: 10,
+ Uint64: 11,
+ Uintptr: 12,
+ Float32: 14.1,
+ Float64: 15.1,
+ Foo: "foo",
+ Foo2: "foo2",
+ IntStr: 42,
+ UintptrStr: 44,
+ String: "16",
+ Map: map[string]Small{
+ "17": {Tag: "tag17"},
+ "18": {Tag: "tag18"},
+ },
+ MapP: map[string]*Small{
+ "19": {Tag: "tag19"},
+ "20": nil,
+ },
+ EmptyMap: map[string]Small{},
+ Slice: []Small{{Tag: "tag20"}, {Tag: "tag21"}},
+ SliceP: []*Small{{Tag: "tag22"}, nil, {Tag: "tag23"}},
+ EmptySlice: []Small{},
+ StringSlice: []string{"str24", "str25", "str26"},
+ ByteSlice: []byte{27, 28, 29},
+ Small: Small{Tag: "tag30"},
+ PSmall: &Small{Tag: "tag31"},
+ Interface: 5.2,
+}
+
+var pallValue = All{
+ PBool: &allValue.Bool,
+ PInt: &allValue.Int,
+ PInt8: &allValue.Int8,
+ PInt16: &allValue.Int16,
+ PInt32: &allValue.Int32,
+ PInt64: &allValue.Int64,
+ PUint: &allValue.Uint,
+ PUint8: &allValue.Uint8,
+ PUint16: &allValue.Uint16,
+ PUint32: &allValue.Uint32,
+ PUint64: &allValue.Uint64,
+ PUintptr: &allValue.Uintptr,
+ PFloat32: &allValue.Float32,
+ PFloat64: &allValue.Float64,
+ PString: &allValue.String,
+ PMap: &allValue.Map,
+ PMapP: &allValue.MapP,
+ PSlice: &allValue.Slice,
+ PSliceP: &allValue.SliceP,
+ PPSmall: &allValue.PSmall,
+ PInterface: &allValue.Interface,
+}
+
+var allValueIndent = `{
+ "Bool": true,
+ "Int": 2,
+ "Int8": 3,
+ "Int16": 4,
+ "Int32": 5,
+ "Int64": 6,
+ "Uint": 7,
+ "Uint8": 8,
+ "Uint16": 9,
+ "Uint32": 10,
+ "Uint64": 11,
+ "Uintptr": 12,
+ "Float32": 14.1,
+ "Float64": 15.1,
+ "bar": "foo",
+ "bar2": "foo2",
+ "IntStr": "42",
+ "UintptrStr": "44",
+ "PBool": null,
+ "PInt": null,
+ "PInt8": null,
+ "PInt16": null,
+ "PInt32": null,
+ "PInt64": null,
+ "PUint": null,
+ "PUint8": null,
+ "PUint16": null,
+ "PUint32": null,
+ "PUint64": null,
+ "PUintptr": null,
+ "PFloat32": null,
+ "PFloat64": null,
+ "String": "16",
+ "PString": null,
+ "Map": {
+ "17": {
+ "Tag": "tag17"
+ },
+ "18": {
+ "Tag": "tag18"
+ }
+ },
+ "MapP": {
+ "19": {
+ "Tag": "tag19"
+ },
+ "20": null
+ },
+ "PMap": null,
+ "PMapP": null,
+ "EmptyMap": {},
+ "NilMap": null,
+ "Slice": [
+ {
+ "Tag": "tag20"
+ },
+ {
+ "Tag": "tag21"
+ }
+ ],
+ "SliceP": [
+ {
+ "Tag": "tag22"
+ },
+ null,
+ {
+ "Tag": "tag23"
+ }
+ ],
+ "PSlice": null,
+ "PSliceP": null,
+ "EmptySlice": [],
+ "NilSlice": null,
+ "StringSlice": [
+ "str24",
+ "str25",
+ "str26"
+ ],
+ "ByteSlice": "Gxwd",
+ "Small": {
+ "Tag": "tag30"
+ },
+ "PSmall": {
+ "Tag": "tag31"
+ },
+ "PPSmall": null,
+ "Interface": 5.2,
+ "PInterface": null
+}`
+
+var allValueCompact = stripWhitespace(allValueIndent)
+
+var pallValueIndent = `{
+ "Bool": false,
+ "Int": 0,
+ "Int8": 0,
+ "Int16": 0,
+ "Int32": 0,
+ "Int64": 0,
+ "Uint": 0,
+ "Uint8": 0,
+ "Uint16": 0,
+ "Uint32": 0,
+ "Uint64": 0,
+ "Uintptr": 0,
+ "Float32": 0,
+ "Float64": 0,
+ "bar": "",
+ "bar2": "",
+ "IntStr": "0",
+ "UintptrStr": "0",
+ "PBool": true,
+ "PInt": 2,
+ "PInt8": 3,
+ "PInt16": 4,
+ "PInt32": 5,
+ "PInt64": 6,
+ "PUint": 7,
+ "PUint8": 8,
+ "PUint16": 9,
+ "PUint32": 10,
+ "PUint64": 11,
+ "PUintptr": 12,
+ "PFloat32": 14.1,
+ "PFloat64": 15.1,
+ "String": "",
+ "PString": "16",
+ "Map": null,
+ "MapP": null,
+ "PMap": {
+ "17": {
+ "Tag": "tag17"
+ },
+ "18": {
+ "Tag": "tag18"
+ }
+ },
+ "PMapP": {
+ "19": {
+ "Tag": "tag19"
+ },
+ "20": null
+ },
+ "EmptyMap": null,
+ "NilMap": null,
+ "Slice": null,
+ "SliceP": null,
+ "PSlice": [
+ {
+ "Tag": "tag20"
+ },
+ {
+ "Tag": "tag21"
+ }
+ ],
+ "PSliceP": [
+ {
+ "Tag": "tag22"
+ },
+ null,
+ {
+ "Tag": "tag23"
+ }
+ ],
+ "EmptySlice": null,
+ "NilSlice": null,
+ "StringSlice": null,
+ "ByteSlice": null,
+ "Small": {
+ "Tag": ""
+ },
+ "PSmall": null,
+ "PPSmall": {
+ "Tag": "tag31"
+ },
+ "Interface": null,
+ "PInterface": 5.2
+}`
+
+var pallValueCompact = stripWhitespace(pallValueIndent)
+
+func TestRefUnmarshal(t *testing.T) {
+ type S struct {
+ // Ref is defined in encode_test.go.
+ R0 Ref
+ R1 *Ref
+ R2 RefText
+ R3 *RefText
+ }
+ want := S{
+ R0: 12,
+ R1: new(Ref),
+ R2: 13,
+ R3: new(RefText),
+ }
+ *want.R1 = 12
+ *want.R3 = 13
+
+ var got S
+ if err := Unmarshal([]byte(`{"R0":"ref","R1":"ref","R2":"ref","R3":"ref"}`), &got); err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+ if !reflect.DeepEqual(got, want) {
+ t.Errorf("Unmarsha:\n\tgot: %+v\n\twant: %+v", got, want)
+ }
+}
+
+// Test that the empty string doesn't panic decoding when ,string is specified
+// Issue 3450
+func TestEmptyString(t *testing.T) {
+ type T2 struct {
+ Number1 int `json:",string"`
+ Number2 int `json:",string"`
+ }
+ data := `{"Number1":"1", "Number2":""}`
+ dec := NewDecoder(strings.NewReader(data))
+ var got T2
+ switch err := dec.Decode(&got); {
+ case err == nil:
+ t.Fatalf("Decode error: got nil, want non-nil")
+ case got.Number1 != 1:
+ t.Fatalf("Decode: got.Number1 = %d, want 1", got.Number1)
+ }
+}
+
+// Test that a null for ,string is not replaced with the previous quoted string (issue 7046).
+// It should also not be an error (issue 2540, issue 8587).
+func TestNullString(t *testing.T) {
+ type T struct {
+ A int `json:",string"`
+ B int `json:",string"`
+ C *int `json:",string"`
+ }
+ data := []byte(`{"A": "1", "B": null, "C": null}`)
+ var s T
+ s.B = 1
+ s.C = new(int)
+ *s.C = 2
+ switch err := Unmarshal(data, &s); {
+ case err != nil:
+ t.Fatalf("Unmarshal error: %v", err)
+ case s.B != 1:
+ t.Fatalf("Unmarshal: s.B = %d, want 1", s.B)
+ case s.C != nil:
+ t.Fatalf("Unmarshal: s.C = %d, want non-nil", s.C)
+ }
+}
+
+func addr[T any](v T) *T {
+ return &v
+}
+
+func TestInterfaceSet(t *testing.T) {
+ errUnmarshal := &UnmarshalTypeError{Value: "object", Offset: 6, Type: reflect.TypeFor[int](), Field: "X"}
+ tests := []struct {
+ CaseName
+ pre any
+ json string
+ post any
+ }{
+ {Name(""), "foo", `"bar"`, "bar"},
+ {Name(""), "foo", `2`, 2.0},
+ {Name(""), "foo", `true`, true},
+ {Name(""), "foo", `null`, nil},
+ {Name(""), map[string]any{}, `true`, true},
+ {Name(""), []string{}, `true`, true},
+
+ {Name(""), any(nil), `null`, any(nil)},
+ {Name(""), (*int)(nil), `null`, any(nil)},
+ {Name(""), (*int)(addr(0)), `null`, any(nil)},
+ {Name(""), (*int)(addr(1)), `null`, any(nil)},
+ {Name(""), (**int)(nil), `null`, any(nil)},
+ {Name(""), (**int)(addr[*int](nil)), `null`, (**int)(addr[*int](nil))},
+ {Name(""), (**int)(addr(addr(1))), `null`, (**int)(addr[*int](nil))},
+ {Name(""), (***int)(nil), `null`, any(nil)},
+ {Name(""), (***int)(addr[**int](nil)), `null`, (***int)(addr[**int](nil))},
+ {Name(""), (***int)(addr(addr[*int](nil))), `null`, (***int)(addr[**int](nil))},
+ {Name(""), (***int)(addr(addr(addr(1)))), `null`, (***int)(addr[**int](nil))},
+
+ {Name(""), any(nil), `2`, float64(2)},
+ {Name(""), (int)(1), `2`, float64(2)},
+ {Name(""), (*int)(nil), `2`, float64(2)},
+ {Name(""), (*int)(addr(0)), `2`, (*int)(addr(2))},
+ {Name(""), (*int)(addr(1)), `2`, (*int)(addr(2))},
+ {Name(""), (**int)(nil), `2`, float64(2)},
+ {Name(""), (**int)(addr[*int](nil)), `2`, (**int)(addr(addr(2)))},
+ {Name(""), (**int)(addr(addr(1))), `2`, (**int)(addr(addr(2)))},
+ {Name(""), (***int)(nil), `2`, float64(2)},
+ {Name(""), (***int)(addr[**int](nil)), `2`, (***int)(addr(addr(addr(2))))},
+ {Name(""), (***int)(addr(addr[*int](nil))), `2`, (***int)(addr(addr(addr(2))))},
+ {Name(""), (***int)(addr(addr(addr(1)))), `2`, (***int)(addr(addr(addr(2))))},
+
+ {Name(""), any(nil), `{}`, map[string]any{}},
+ {Name(""), (int)(1), `{}`, map[string]any{}},
+ {Name(""), (*int)(nil), `{}`, map[string]any{}},
+ {Name(""), (*int)(addr(0)), `{}`, errUnmarshal},
+ {Name(""), (*int)(addr(1)), `{}`, errUnmarshal},
+ {Name(""), (**int)(nil), `{}`, map[string]any{}},
+ {Name(""), (**int)(addr[*int](nil)), `{}`, errUnmarshal},
+ {Name(""), (**int)(addr(addr(1))), `{}`, errUnmarshal},
+ {Name(""), (***int)(nil), `{}`, map[string]any{}},
+ {Name(""), (***int)(addr[**int](nil)), `{}`, errUnmarshal},
+ {Name(""), (***int)(addr(addr[*int](nil))), `{}`, errUnmarshal},
+ {Name(""), (***int)(addr(addr(addr(1)))), `{}`, errUnmarshal},
+ }
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ b := struct{ X any }{tt.pre}
+ blob := `{"X":` + tt.json + `}`
+ if err := Unmarshal([]byte(blob), &b); err != nil {
+ if wantErr, _ := tt.post.(error); equalError(err, wantErr) {
+ return
+ }
+ t.Fatalf("%s: Unmarshal(%#q) error: %v", tt.Where, blob, err)
+ }
+ if !reflect.DeepEqual(b.X, tt.post) {
+ t.Errorf("%s: Unmarshal(%#q):\n\tpre.X: %#v\n\tgot.X: %#v\n\twant.X: %#v", tt.Where, blob, tt.pre, b.X, tt.post)
+ }
+ })
+ }
+}
+
+type NullTest struct {
+ Bool bool
+ Int int
+ Int8 int8
+ Int16 int16
+ Int32 int32
+ Int64 int64
+ Uint uint
+ Uint8 uint8
+ Uint16 uint16
+ Uint32 uint32
+ Uint64 uint64
+ Float32 float32
+ Float64 float64
+ String string
+ PBool *bool
+ Map map[string]string
+ Slice []string
+ Interface any
+
+ PRaw *RawMessage
+ PTime *time.Time
+ PBigInt *big.Int
+ PText *MustNotUnmarshalText
+ PBuffer *bytes.Buffer // has methods, just not relevant ones
+ PStruct *struct{}
+
+ Raw RawMessage
+ Time time.Time
+ BigInt big.Int
+ Text MustNotUnmarshalText
+ Buffer bytes.Buffer
+ Struct struct{}
+}
+
+// JSON null values should be ignored for primitives and string values instead of resulting in an error.
+// Issue 2540
+func TestUnmarshalNulls(t *testing.T) {
+ // Unmarshal docs:
+ // The JSON null value unmarshals into an interface, map, pointer, or slice
+ // by setting that Go value to nil. Because null is often used in JSON to mean
+ // ``not present,'' unmarshaling a JSON null into any other Go type has no effect
+ // on the value and produces no error.
+
+ jsonData := []byte(`{
+ "Bool" : null,
+ "Int" : null,
+ "Int8" : null,
+ "Int16" : null,
+ "Int32" : null,
+ "Int64" : null,
+ "Uint" : null,
+ "Uint8" : null,
+ "Uint16" : null,
+ "Uint32" : null,
+ "Uint64" : null,
+ "Float32" : null,
+ "Float64" : null,
+ "String" : null,
+ "PBool": null,
+ "Map": null,
+ "Slice": null,
+ "Interface": null,
+ "PRaw": null,
+ "PTime": null,
+ "PBigInt": null,
+ "PText": null,
+ "PBuffer": null,
+ "PStruct": null,
+ "Raw": null,
+ "Time": null,
+ "BigInt": null,
+ "Text": null,
+ "Buffer": null,
+ "Struct": null
+ }`)
+ nulls := NullTest{
+ Bool: true,
+ Int: 2,
+ Int8: 3,
+ Int16: 4,
+ Int32: 5,
+ Int64: 6,
+ Uint: 7,
+ Uint8: 8,
+ Uint16: 9,
+ Uint32: 10,
+ Uint64: 11,
+ Float32: 12.1,
+ Float64: 13.1,
+ String: "14",
+ PBool: new(bool),
+ Map: map[string]string{},
+ Slice: []string{},
+ Interface: new(MustNotUnmarshalJSON),
+ PRaw: new(RawMessage),
+ PTime: new(time.Time),
+ PBigInt: new(big.Int),
+ PText: new(MustNotUnmarshalText),
+ PStruct: new(struct{}),
+ PBuffer: new(bytes.Buffer),
+ Raw: RawMessage("123"),
+ Time: time.Unix(123456789, 0),
+ BigInt: *big.NewInt(123),
+ }
+
+ before := nulls.Time.String()
+
+ err := Unmarshal(jsonData, &nulls)
+ if err != nil {
+ t.Errorf("Unmarshal of null values failed: %v", err)
+ }
+ if !nulls.Bool || nulls.Int != 2 || nulls.Int8 != 3 || nulls.Int16 != 4 || nulls.Int32 != 5 || nulls.Int64 != 6 ||
+ nulls.Uint != 7 || nulls.Uint8 != 8 || nulls.Uint16 != 9 || nulls.Uint32 != 10 || nulls.Uint64 != 11 ||
+ nulls.Float32 != 12.1 || nulls.Float64 != 13.1 || nulls.String != "14" {
+ t.Errorf("Unmarshal of null values affected primitives")
+ }
+
+ if nulls.PBool != nil {
+ t.Errorf("Unmarshal of null did not clear nulls.PBool")
+ }
+ if nulls.Map != nil {
+ t.Errorf("Unmarshal of null did not clear nulls.Map")
+ }
+ if nulls.Slice != nil {
+ t.Errorf("Unmarshal of null did not clear nulls.Slice")
+ }
+ if nulls.Interface != nil {
+ t.Errorf("Unmarshal of null did not clear nulls.Interface")
+ }
+ if nulls.PRaw != nil {
+ t.Errorf("Unmarshal of null did not clear nulls.PRaw")
+ }
+ if nulls.PTime != nil {
+ t.Errorf("Unmarshal of null did not clear nulls.PTime")
+ }
+ if nulls.PBigInt != nil {
+ t.Errorf("Unmarshal of null did not clear nulls.PBigInt")
+ }
+ if nulls.PText != nil {
+ t.Errorf("Unmarshal of null did not clear nulls.PText")
+ }
+ if nulls.PBuffer != nil {
+ t.Errorf("Unmarshal of null did not clear nulls.PBuffer")
+ }
+ if nulls.PStruct != nil {
+ t.Errorf("Unmarshal of null did not clear nulls.PStruct")
+ }
+
+ if string(nulls.Raw) != "null" {
+ t.Errorf("Unmarshal of RawMessage null did not record null: %v", string(nulls.Raw))
+ }
+ if nulls.Time.String() != before {
+ t.Errorf("Unmarshal of time.Time null set time to %v", nulls.Time.String())
+ }
+ if nulls.BigInt.String() != "123" {
+ t.Errorf("Unmarshal of big.Int null set int to %v", nulls.BigInt.String())
+ }
+}
+
+type MustNotUnmarshalJSON struct{}
+
+func (x MustNotUnmarshalJSON) UnmarshalJSON(data []byte) error {
+ return errors.New("MustNotUnmarshalJSON was used")
+}
+
+type MustNotUnmarshalText struct{}
+
+func (x MustNotUnmarshalText) UnmarshalText(text []byte) error {
+ return errors.New("MustNotUnmarshalText was used")
+}
+
+func TestStringKind(t *testing.T) {
+ type stringKind string
+ want := map[stringKind]int{"foo": 42}
+ data, err := Marshal(want)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+ var got map[stringKind]int
+ err = Unmarshal(data, &got)
+ if err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+ if !maps.Equal(got, want) {
+ t.Fatalf("Marshal/Unmarshal mismatch:\n\tgot: %v\n\twant: %v", got, want)
+ }
+}
+
+// Custom types with []byte as underlying type could not be marshaled
+// and then unmarshaled.
+// Issue 8962.
+func TestByteKind(t *testing.T) {
+ type byteKind []byte
+ want := byteKind("hello")
+ data, err := Marshal(want)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+ var got byteKind
+ err = Unmarshal(data, &got)
+ if err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+ if !slices.Equal(got, want) {
+ t.Fatalf("Marshal/Unmarshal mismatch:\n\tgot: %v\n\twant: %v", got, want)
+ }
+}
+
+// The fix for issue 8962 introduced a regression.
+// Issue 12921.
+func TestSliceOfCustomByte(t *testing.T) {
+ type Uint8 uint8
+ want := []Uint8("hello")
+ data, err := Marshal(want)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+ var got []Uint8
+ err = Unmarshal(data, &got)
+ if err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+ if !slices.Equal(got, want) {
+ t.Fatalf("Marshal/Unmarshal mismatch:\n\tgot: %v\n\twant: %v", got, want)
+ }
+}
+
+func TestUnmarshalTypeError(t *testing.T) {
+ tests := []struct {
+ CaseName
+ dest any
+ in string
+ }{
+ {Name(""), new(string), `{"user": "name"}`}, // issue 4628.
+ {Name(""), new(error), `{}`}, // issue 4222
+ {Name(""), new(error), `[]`},
+ {Name(""), new(error), `""`},
+ {Name(""), new(error), `123`},
+ {Name(""), new(error), `true`},
+ }
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ err := Unmarshal([]byte(tt.in), tt.dest)
+ if _, ok := err.(*UnmarshalTypeError); !ok {
+ t.Errorf("%s: Unmarshal(%#q, %T):\n\tgot: %T\n\twant: %T",
+ tt.Where, tt.in, tt.dest, err, new(UnmarshalTypeError))
+ }
+ })
+ }
+}
+
+func TestUnmarshalSyntax(t *testing.T) {
+ var x any
+ tests := []struct {
+ CaseName
+ in string
+ }{
+ {Name(""), "tru"},
+ {Name(""), "fals"},
+ {Name(""), "nul"},
+ {Name(""), "123e"},
+ {Name(""), `"hello`},
+ {Name(""), `[1,2,3`},
+ {Name(""), `{"key":1`},
+ {Name(""), `{"key":1,`},
+ }
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ err := Unmarshal([]byte(tt.in), &x)
+ if _, ok := err.(*SyntaxError); !ok {
+ t.Errorf("%s: Unmarshal(%#q, any):\n\tgot: %T\n\twant: %T",
+ tt.Where, tt.in, err, new(SyntaxError))
+ }
+ })
+ }
+}
+
+// Test handling of unexported fields that should be ignored.
+// Issue 4660
+type unexportedFields struct {
+ Name string
+ m map[string]any `json:"-"`
+ m2 map[string]any `json:"abcd"`
+
+ s []int `json:"-"`
+}
+
+func TestUnmarshalUnexported(t *testing.T) {
+ input := `{"Name": "Bob", "m": {"x": 123}, "m2": {"y": 456}, "abcd": {"z": 789}, "s": [2, 3]}`
+ want := &unexportedFields{Name: "Bob"}
+
+ out := &unexportedFields{}
+ err := Unmarshal([]byte(input), out)
+ if err != nil {
+ t.Errorf("Unmarshal error: %v", err)
+ }
+ if !reflect.DeepEqual(out, want) {
+ t.Errorf("Unmarshal:\n\tgot: %+v\n\twant: %+v", out, want)
+ }
+}
+
+// Time3339 is a time.Time which encodes to and from JSON
+// as an RFC 3339 time in UTC.
+type Time3339 time.Time
+
+func (t *Time3339) UnmarshalJSON(b []byte) error {
+ if len(b) < 2 || b[0] != '"' || b[len(b)-1] != '"' {
+ return fmt.Errorf("types: failed to unmarshal non-string value %q as an RFC 3339 time", b)
+ }
+ tm, err := time.Parse(time.RFC3339, string(b[1:len(b)-1]))
+ if err != nil {
+ return err
+ }
+ *t = Time3339(tm)
+ return nil
+}
+
+func TestUnmarshalJSONLiteralError(t *testing.T) {
+ var t3 Time3339
+ switch err := Unmarshal([]byte(`"0000-00-00T00:00:00Z"`), &t3); {
+ case err == nil:
+ t.Fatalf("Unmarshal error: got nil, want non-nil")
+ case !strings.Contains(err.Error(), "range"):
+ t.Errorf("Unmarshal error:\n\tgot: %v\n\twant: out of range", err)
+ }
+}
+
+// Test that extra object elements in an array do not result in a
+// "data changing underfoot" error.
+// Issue 3717
+func TestSkipArrayObjects(t *testing.T) {
+ json := `[{}]`
+ var dest [0]any
+
+ err := Unmarshal([]byte(json), &dest)
+ if err != nil {
+ t.Errorf("Unmarshal error: %v", err)
+ }
+}
+
+// Test semantics of pre-filled data, such as struct fields, map elements,
+// slices, and arrays.
+// Issues 4900 and 8837, among others.
+func TestPrefilled(t *testing.T) {
+ // Values here change, cannot reuse table across runs.
+ tests := []struct {
+ CaseName
+ in string
+ ptr any
+ out any
+ }{{
+ CaseName: Name(""),
+ in: `{"X": 1, "Y": 2}`,
+ ptr: &XYZ{X: float32(3), Y: int16(4), Z: 1.5},
+ out: &XYZ{X: float64(1), Y: float64(2), Z: 1.5},
+ }, {
+ CaseName: Name(""),
+ in: `{"X": 1, "Y": 2}`,
+ ptr: &map[string]any{"X": float32(3), "Y": int16(4), "Z": 1.5},
+ out: &map[string]any{"X": float64(1), "Y": float64(2), "Z": 1.5},
+ }, {
+ CaseName: Name(""),
+ in: `[2]`,
+ ptr: &[]int{1},
+ out: &[]int{2},
+ }, {
+ CaseName: Name(""),
+ in: `[2, 3]`,
+ ptr: &[]int{1},
+ out: &[]int{2, 3},
+ }, {
+ CaseName: Name(""),
+ in: `[2, 3]`,
+ ptr: &[...]int{1},
+ out: &[...]int{2},
+ }, {
+ CaseName: Name(""),
+ in: `[3]`,
+ ptr: &[...]int{1, 2},
+ out: &[...]int{3, 0},
+ }}
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ ptrstr := fmt.Sprintf("%v", tt.ptr)
+ err := Unmarshal([]byte(tt.in), tt.ptr) // tt.ptr edited here
+ if err != nil {
+ t.Errorf("%s: Unmarshal error: %v", tt.Where, err)
+ }
+ if !reflect.DeepEqual(tt.ptr, tt.out) {
+ t.Errorf("%s: Unmarshal(%#q, %T):\n\tgot: %v\n\twant: %v", tt.Where, tt.in, ptrstr, tt.ptr, tt.out)
+ }
+ })
+ }
+}
+
+func TestInvalidUnmarshal(t *testing.T) {
+ tests := []struct {
+ CaseName
+ in string
+ v any
+ wantErr error
+ }{
+ {Name(""), `{"a":"1"}`, nil, &InvalidUnmarshalError{}},
+ {Name(""), `{"a":"1"}`, struct{}{}, &InvalidUnmarshalError{reflect.TypeFor[struct{}]()}},
+ {Name(""), `{"a":"1"}`, (*int)(nil), &InvalidUnmarshalError{reflect.TypeFor[*int]()}},
+ {Name(""), `123`, nil, &InvalidUnmarshalError{}},
+ {Name(""), `123`, struct{}{}, &InvalidUnmarshalError{reflect.TypeFor[struct{}]()}},
+ {Name(""), `123`, (*int)(nil), &InvalidUnmarshalError{reflect.TypeFor[*int]()}},
+ {Name(""), `123`, new(net.IP), &UnmarshalTypeError{Value: "number", Type: reflect.TypeFor[*net.IP](), Offset: 3}},
+ }
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ switch gotErr := Unmarshal([]byte(tt.in), tt.v); {
+ case gotErr == nil:
+ t.Fatalf("%s: Unmarshal error: got nil, want non-nil", tt.Where)
+ case !reflect.DeepEqual(gotErr, tt.wantErr):
+ t.Errorf("%s: Unmarshal error:\n\tgot: %#v\n\twant: %#v", tt.Where, gotErr, tt.wantErr)
+ }
+ })
+ }
+}
+
+// Test that string option is ignored for invalid types.
+// Issue 9812.
+func TestInvalidStringOption(t *testing.T) {
+ num := 0
+ item := struct {
+ T time.Time `json:",string"`
+ M map[string]string `json:",string"`
+ S []string `json:",string"`
+ A [1]string `json:",string"`
+ I any `json:",string"`
+ P *int `json:",string"`
+ }{M: make(map[string]string), S: make([]string, 0), I: num, P: &num}
+
+ data, err := Marshal(item)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+
+ err = Unmarshal(data, &item)
+ if err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+}
+
+// Test unmarshal behavior with regards to embedded unexported structs.
+//
+// (Issue 21357) If the embedded struct is a pointer and is unallocated,
+// this returns an error because unmarshal cannot set the field.
+//
+// (Issue 24152) If the embedded struct is given an explicit name,
+// ensure that the normal unmarshal logic does not panic in reflect.
+//
+// (Issue 28145) If the embedded struct is given an explicit name and has
+// exported methods, don't cause a panic trying to get its value.
+func TestUnmarshalEmbeddedUnexported(t *testing.T) {
+ type (
+ embed1 struct{ Q int }
+ embed2 struct{ Q int }
+ embed3 struct {
+ Q int64 `json:",string"`
+ }
+ S1 struct {
+ *embed1
+ R int
+ }
+ S2 struct {
+ *embed1
+ Q int
+ }
+ S3 struct {
+ embed1
+ R int
+ }
+ S4 struct {
+ *embed1
+ embed2
+ }
+ S5 struct {
+ *embed3
+ R int
+ }
+ S6 struct {
+ embed1 `json:"embed1"`
+ }
+ S7 struct {
+ embed1 `json:"embed1"`
+ embed2
+ }
+ S8 struct {
+ embed1 `json:"embed1"`
+ embed2 `json:"embed2"`
+ Q int
+ }
+ S9 struct {
+ unexportedWithMethods `json:"embed"`
+ }
+ )
+
+ tests := []struct {
+ CaseName
+ in string
+ ptr any
+ out any
+ err error
+ }{{
+ // Error since we cannot set S1.embed1, but still able to set S1.R.
+ CaseName: Name(""),
+ in: `{"R":2,"Q":1}`,
+ ptr: new(S1),
+ out: &S1{R: 2},
+ err: fmt.Errorf("json: cannot set embedded pointer to unexported struct: json.embed1"),
+ }, {
+ // The top level Q field takes precedence.
+ CaseName: Name(""),
+ in: `{"Q":1}`,
+ ptr: new(S2),
+ out: &S2{Q: 1},
+ }, {
+ // No issue with non-pointer variant.
+ CaseName: Name(""),
+ in: `{"R":2,"Q":1}`,
+ ptr: new(S3),
+ out: &S3{embed1: embed1{Q: 1}, R: 2},
+ }, {
+ // No error since both embedded structs have field R, which annihilate each other.
+ // Thus, no attempt is made at setting S4.embed1.
+ CaseName: Name(""),
+ in: `{"R":2}`,
+ ptr: new(S4),
+ out: new(S4),
+ }, {
+ // Error since we cannot set S5.embed1, but still able to set S5.R.
+ CaseName: Name(""),
+ in: `{"R":2,"Q":1}`,
+ ptr: new(S5),
+ out: &S5{R: 2},
+ err: fmt.Errorf("json: cannot set embedded pointer to unexported struct: json.embed3"),
+ }, {
+ // Issue 24152, ensure decodeState.indirect does not panic.
+ CaseName: Name(""),
+ in: `{"embed1": {"Q": 1}}`,
+ ptr: new(S6),
+ out: &S6{embed1{1}},
+ }, {
+ // Issue 24153, check that we can still set forwarded fields even in
+ // the presence of a name conflict.
+ //
+ // This relies on obscure behavior of reflect where it is possible
+ // to set a forwarded exported field on an unexported embedded struct
+ // even though there is a name conflict, even when it would have been
+ // impossible to do so according to Go visibility rules.
+ // Go forbids this because it is ambiguous whether S7.Q refers to
+ // S7.embed1.Q or S7.embed2.Q. Since embed1 and embed2 are unexported,
+ // it should be impossible for an external package to set either Q.
+ //
+ // It is probably okay for a future reflect change to break this.
+ CaseName: Name(""),
+ in: `{"embed1": {"Q": 1}, "Q": 2}`,
+ ptr: new(S7),
+ out: &S7{embed1{1}, embed2{2}},
+ }, {
+ // Issue 24153, similar to the S7 case.
+ CaseName: Name(""),
+ in: `{"embed1": {"Q": 1}, "embed2": {"Q": 2}, "Q": 3}`,
+ ptr: new(S8),
+ out: &S8{embed1{1}, embed2{2}, 3},
+ }, {
+ // Issue 228145, similar to the cases above.
+ CaseName: Name(""),
+ in: `{"embed": {}}`,
+ ptr: new(S9),
+ out: &S9{},
+ }}
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ err := Unmarshal([]byte(tt.in), tt.ptr)
+ if !equalError(err, tt.err) {
+ t.Errorf("%s: Unmarshal error:\n\tgot: %v\n\twant: %v", tt.Where, err, tt.err)
+ }
+ if !reflect.DeepEqual(tt.ptr, tt.out) {
+ t.Errorf("%s: Unmarshal:\n\tgot: %#+v\n\twant: %#+v", tt.Where, tt.ptr, tt.out)
+ }
+ })
+ }
+}
+
+func TestUnmarshalErrorAfterMultipleJSON(t *testing.T) {
+ tests := []struct {
+ CaseName
+ in string
+ err error
+ }{{
+ CaseName: Name(""),
+ in: `1 false null :`,
+ err: &SyntaxError{"invalid character ':' looking for beginning of value", 14},
+ }, {
+ CaseName: Name(""),
+ in: `1 [] [,]`,
+ err: &SyntaxError{"invalid character ',' looking for beginning of value", 7},
+ }, {
+ CaseName: Name(""),
+ in: `1 [] [true:]`,
+ err: &SyntaxError{"invalid character ':' after array element", 11},
+ }, {
+ CaseName: Name(""),
+ in: `1 {} {"x"=}`,
+ err: &SyntaxError{"invalid character '=' after object key", 14},
+ }, {
+ CaseName: Name(""),
+ in: `falsetruenul#`,
+ err: &SyntaxError{"invalid character '#' in literal null (expecting 'l')", 13},
+ }}
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ dec := NewDecoder(strings.NewReader(tt.in))
+ var err error
+ for err == nil {
+ var v any
+ err = dec.Decode(&v)
+ }
+ if !reflect.DeepEqual(err, tt.err) {
+ t.Errorf("%s: Decode error:\n\tgot: %v\n\twant: %v", tt.Where, err, tt.err)
+ }
+ })
+ }
+}
+
+type unmarshalPanic struct{}
+
+func (unmarshalPanic) UnmarshalJSON([]byte) error { panic(0xdead) }
+
+func TestUnmarshalPanic(t *testing.T) {
+ defer func() {
+ if got := recover(); !reflect.DeepEqual(got, 0xdead) {
+ t.Errorf("panic() = (%T)(%v), want 0xdead", got, got)
+ }
+ }()
+ Unmarshal([]byte("{}"), &unmarshalPanic{})
+ t.Fatalf("Unmarshal should have panicked")
+}
+
+// The decoder used to hang if decoding into an interface pointing to its own address.
+// See golang.org/issues/31740.
+func TestUnmarshalRecursivePointer(t *testing.T) {
+ var v any
+ v = &v
+ data := []byte(`{"a": "b"}`)
+
+ if err := Unmarshal(data, v); err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+}
+
+type textUnmarshalerString string
+
+func (m *textUnmarshalerString) UnmarshalText(text []byte) error {
+ *m = textUnmarshalerString(strings.ToLower(string(text)))
+ return nil
+}
+
+// Test unmarshal to a map, where the map key is a user defined type.
+// See golang.org/issues/34437.
+func TestUnmarshalMapWithTextUnmarshalerStringKey(t *testing.T) {
+ var p map[textUnmarshalerString]string
+ if err := Unmarshal([]byte(`{"FOO": "1"}`), &p); err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+
+ if _, ok := p["foo"]; !ok {
+ t.Errorf(`key "foo" missing in map: %v`, p)
+ }
+}
+
+func TestUnmarshalRescanLiteralMangledUnquote(t *testing.T) {
+ // See golang.org/issues/38105.
+ var p map[textUnmarshalerString]string
+ if err := Unmarshal([]byte(`{"开源":"12345开源"}`), &p); err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+ if _, ok := p["开源"]; !ok {
+ t.Errorf(`key "开源" missing in map: %v`, p)
+ }
+
+ // See golang.org/issues/38126.
+ type T struct {
+ F1 string `json:"F1,string"`
+ }
+ wantT := T{"aaa\tbbb"}
+
+ b, err := Marshal(wantT)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+ var gotT T
+ if err := Unmarshal(b, &gotT); err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+ if gotT != wantT {
+ t.Errorf("Marshal/Unmarshal roundtrip:\n\tgot: %q\n\twant: %q", gotT, wantT)
+ }
+
+ // See golang.org/issues/39555.
+ input := map[textUnmarshalerString]string{"FOO": "", `"`: ""}
+
+ encoded, err := Marshal(input)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+ var got map[textUnmarshalerString]string
+ if err := Unmarshal(encoded, &got); err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+ want := map[textUnmarshalerString]string{"foo": "", `"`: ""}
+ if !maps.Equal(got, want) {
+ t.Errorf("Marshal/Unmarshal roundtrip:\n\tgot: %q\n\twant: %q", gotT, wantT)
+ }
+}
+
+func TestUnmarshalMaxDepth(t *testing.T) {
+ tests := []struct {
+ CaseName
+ data string
+ errMaxDepth bool
+ }{{
+ CaseName: Name("ArrayUnderMaxNestingDepth"),
+ data: `{"a":` + strings.Repeat(`[`, 10000-1) + strings.Repeat(`]`, 10000-1) + `}`,
+ errMaxDepth: false,
+ }, {
+ CaseName: Name("ArrayOverMaxNestingDepth"),
+ data: `{"a":` + strings.Repeat(`[`, 10000) + strings.Repeat(`]`, 10000) + `}`,
+ errMaxDepth: true,
+ }, {
+ CaseName: Name("ArrayOverStackDepth"),
+ data: `{"a":` + strings.Repeat(`[`, 3000000) + strings.Repeat(`]`, 3000000) + `}`,
+ errMaxDepth: true,
+ }, {
+ CaseName: Name("ObjectUnderMaxNestingDepth"),
+ data: `{"a":` + strings.Repeat(`{"a":`, 10000-1) + `0` + strings.Repeat(`}`, 10000-1) + `}`,
+ errMaxDepth: false,
+ }, {
+ CaseName: Name("ObjectOverMaxNestingDepth"),
+ data: `{"a":` + strings.Repeat(`{"a":`, 10000) + `0` + strings.Repeat(`}`, 10000) + `}`,
+ errMaxDepth: true,
+ }, {
+ CaseName: Name("ObjectOverStackDepth"),
+ data: `{"a":` + strings.Repeat(`{"a":`, 3000000) + `0` + strings.Repeat(`}`, 3000000) + `}`,
+ errMaxDepth: true,
+ }}
+
+ targets := []struct {
+ CaseName
+ newValue func() any
+ }{{
+ CaseName: Name("unstructured"),
+ newValue: func() any {
+ var v any
+ return &v
+ },
+ }, {
+ CaseName: Name("typed named field"),
+ newValue: func() any {
+ v := struct {
+ A any `json:"a"`
+ }{}
+ return &v
+ },
+ }, {
+ CaseName: Name("typed missing field"),
+ newValue: func() any {
+ v := struct {
+ B any `json:"b"`
+ }{}
+ return &v
+ },
+ }, {
+ CaseName: Name("custom unmarshaler"),
+ newValue: func() any {
+ v := unmarshaler{}
+ return &v
+ },
+ }}
+
+ for _, tt := range tests {
+ for _, target := range targets {
+ t.Run(target.Name+"-"+tt.Name, func(t *testing.T) {
+ err := Unmarshal([]byte(tt.data), target.newValue())
+ if !tt.errMaxDepth {
+ if err != nil {
+ t.Errorf("%s: %s: Unmarshal error: %v", tt.Where, target.Where, err)
+ }
+ } else {
+ if err == nil || !strings.Contains(err.Error(), "exceeded max depth") {
+ t.Errorf("%s: %s: Unmarshal error:\n\tgot: %v\n\twant: exceeded max depth", tt.Where, target.Where, err)
+ }
+ }
+ })
+ }
+ }
+}
diff --git a/pkg/encoders/json/encode.go b/pkg/encoders/json/encode.go
new file mode 100644
index 0000000..951fbc2
--- /dev/null
+++ b/pkg/encoders/json/encode.go
@@ -0,0 +1,1418 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !goexperiment.jsonv2
+
+// Package json implements encoding and decoding of JSON as defined in RFC 7159.
+// The mapping between JSON and Go values is described in the documentation for
+// the Marshal and Unmarshal functions.
+//
+// See "JSON and Go" for an introduction to this package:
+// https://golang.org/doc/articles/json_and_go.html
+//
+// # Security Considerations
+//
+// The JSON standard (RFC 7159) is lax in its definition of a number of parser
+// behaviors. As such, many JSON parsers behave differently in various
+// scenarios. These differences in parsers mean that systems that use multiple
+// independent JSON parser implementations may parse the same JSON object in
+// differing ways.
+//
+// Systems that rely on a JSON object being parsed consistently for security
+// purposes should be careful to understand the behaviors of this parser, as
+// well as how these behaviors may cause interoperability issues with other
+// parser implementations.
+//
+// Due to the Go Backwards Compatibility promise (https://go.dev/doc/go1compat)
+// there are a number of behaviors this package exhibits that may cause
+// interopability issues, but cannot be changed. In particular the following
+// parsing behaviors may cause issues:
+//
+// - If a JSON object contains duplicate keys, keys are processed in the order
+// they are observed, meaning later values will replace or be merged into
+// prior values, depending on the field type (in particular maps and structs
+// will have values merged, while other types have values replaced).
+// - When parsing a JSON object into a Go struct, keys are considered in a
+// case-insensitive fashion.
+// - When parsing a JSON object into a Go struct, unknown keys in the JSON
+// object are ignored (unless a [Decoder] is used and
+// [Decoder.DisallowUnknownFields] has been called).
+// - Invalid UTF-8 bytes in JSON strings are replaced by the Unicode
+// replacement character.
+// - Large JSON number integers will lose precision when unmarshaled into
+// floating-point types.
+package json
+
+import (
+ "bytes"
+ "cmp"
+ "encoding"
+ "encoding/base64"
+ "fmt"
+ "math"
+ "reflect"
+ "slices"
+ "strconv"
+ "strings"
+ "sync"
+ "unicode"
+ "unicode/utf8"
+ _ "unsafe" // for linkname
+)
+
+// Marshal returns the JSON encoding of v.
+//
+// Marshal traverses the value v recursively.
+// If an encountered value implements [Marshaler]
+// and is not a nil pointer, Marshal calls [Marshaler.MarshalJSON]
+// to produce JSON. If no [Marshaler.MarshalJSON] method is present but the
+// value implements [encoding.TextMarshaler] instead, Marshal calls
+// [encoding.TextMarshaler.MarshalText] and encodes the result as a JSON string.
+// The nil pointer exception is not strictly necessary
+// but mimics a similar, necessary exception in the behavior of
+// [Unmarshaler.UnmarshalJSON].
+//
+// Otherwise, Marshal uses the following type-dependent default encodings:
+//
+// Boolean values encode as JSON booleans.
+//
+// Floating point, integer, and [Number] values encode as JSON numbers.
+// NaN and +/-Inf values will return an [UnsupportedValueError].
+//
+// String values encode as JSON strings coerced to valid UTF-8,
+// replacing invalid bytes with the Unicode replacement rune.
+// So that the JSON will be safe to embed inside HTML `,
+ }
+
+ b, err := json.Marshal(&page,
+ // Escape certain runes within a JSON string so that
+ // JSON will be safe to directly embed inside HTML.
+ jsontext.EscapeForHTML(true),
+ jsontext.EscapeForJS(true),
+ jsontext.Multiline(true)) // expand for readability
+ if err != nil {
+ log.Fatal(err)
+ }
+ fmt.Println(string(b))
+
+ // Output:
+ // {
+ // "Title": "Example Embedded Javascript",
+ // "Body": "\u003cscript\u003e console.log(\"Hello, world!\"); \u003c/script\u003e"
+ // }
+}
diff --git a/pkg/encoders/json/jsontext/export.go b/pkg/encoders/json/jsontext/export.go
new file mode 100644
index 0000000..0ecccad
--- /dev/null
+++ b/pkg/encoders/json/jsontext/export.go
@@ -0,0 +1,77 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.jsonv2
+
+package jsontext
+
+import (
+ "io"
+
+ "encoding/json/internal"
+)
+
+// Internal is for internal use only.
+// This is exempt from the Go compatibility agreement.
+var Internal exporter
+
+type exporter struct{}
+
+// Export exposes internal functionality from "jsontext" to "json".
+// This cannot be dynamically called by other packages since
+// they cannot obtain a reference to the internal.AllowInternalUse value.
+func (exporter) Export(p *internal.NotForPublicUse) export {
+ if p != &internal.AllowInternalUse {
+ panic("unauthorized call to Export")
+ }
+ return export{}
+}
+
+// The export type exposes functionality to packages with visibility to
+// the internal.AllowInternalUse variable. The "json" package uses this
+// to modify low-level state in the Encoder and Decoder types.
+// It mutates the state directly instead of calling ReadToken or WriteToken
+// since this is more performant. The public APIs need to track state to ensure
+// that users are constructing a valid JSON value, but the "json" implementation
+// guarantees that it emits valid JSON by the structure of the code itself.
+type export struct{}
+
+// Encoder returns a pointer to the underlying encoderState.
+func (export) Encoder(e *Encoder) *encoderState { return &e.s }
+
+// Decoder returns a pointer to the underlying decoderState.
+func (export) Decoder(d *Decoder) *decoderState { return &d.s }
+
+func (export) GetBufferedEncoder(o ...Options) *Encoder {
+ return getBufferedEncoder(o...)
+}
+func (export) PutBufferedEncoder(e *Encoder) {
+ putBufferedEncoder(e)
+}
+
+func (export) GetStreamingEncoder(w io.Writer, o ...Options) *Encoder {
+ return getStreamingEncoder(w, o...)
+}
+func (export) PutStreamingEncoder(e *Encoder) {
+ putStreamingEncoder(e)
+}
+
+func (export) GetBufferedDecoder(b []byte, o ...Options) *Decoder {
+ return getBufferedDecoder(b, o...)
+}
+func (export) PutBufferedDecoder(d *Decoder) {
+ putBufferedDecoder(d)
+}
+
+func (export) GetStreamingDecoder(r io.Reader, o ...Options) *Decoder {
+ return getStreamingDecoder(r, o...)
+}
+func (export) PutStreamingDecoder(d *Decoder) {
+ putStreamingDecoder(d)
+}
+
+func (export) IsIOError(err error) bool {
+ _, ok := err.(*ioError)
+ return ok
+}
diff --git a/pkg/encoders/json/jsontext/fuzz_test.go b/pkg/encoders/json/jsontext/fuzz_test.go
new file mode 100644
index 0000000..60d16b9
--- /dev/null
+++ b/pkg/encoders/json/jsontext/fuzz_test.go
@@ -0,0 +1,236 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.jsonv2
+
+package jsontext
+
+import (
+ "bytes"
+ "errors"
+ "io"
+ "math/rand"
+ "slices"
+ "testing"
+
+ "encoding/json/internal/jsontest"
+)
+
+func FuzzCoder(f *testing.F) {
+ // Add a number of inputs to the corpus including valid and invalid data.
+ for _, td := range coderTestdata {
+ f.Add(int64(0), []byte(td.in))
+ }
+ for _, td := range decoderErrorTestdata {
+ f.Add(int64(0), []byte(td.in))
+ }
+ for _, td := range encoderErrorTestdata {
+ f.Add(int64(0), []byte(td.wantOut))
+ }
+ for _, td := range jsontest.Data {
+ f.Add(int64(0), td.Data())
+ }
+
+ f.Fuzz(func(t *testing.T, seed int64, b []byte) {
+ var tokVals []tokOrVal
+ rn := rand.NewSource(seed)
+
+ // Read a sequence of tokens or values. Skip the test for any errors
+ // since we expect this with randomly generated fuzz inputs.
+ src := bytes.NewReader(b)
+ dec := NewDecoder(src)
+ for {
+ if rn.Int63()%8 > 0 {
+ tok, err := dec.ReadToken()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+ t.Skipf("Decoder.ReadToken error: %v", err)
+ }
+ tokVals = append(tokVals, tok.Clone())
+ } else {
+ val, err := dec.ReadValue()
+ if err != nil {
+ expectError := dec.PeekKind() == '}' || dec.PeekKind() == ']'
+ if expectError && errors.As(err, new(*SyntacticError)) {
+ continue
+ }
+ if err == io.EOF {
+ break
+ }
+ t.Skipf("Decoder.ReadValue error: %v", err)
+ }
+ tokVals = append(tokVals, append(zeroValue, val...))
+ }
+ }
+
+ // Write a sequence of tokens or values. Fail the test for any errors
+ // since the previous stage guarantees that the input is valid.
+ dst := new(bytes.Buffer)
+ enc := NewEncoder(dst)
+ for _, tokVal := range tokVals {
+ switch tokVal := tokVal.(type) {
+ case Token:
+ if err := enc.WriteToken(tokVal); err != nil {
+ t.Fatalf("Encoder.WriteToken error: %v", err)
+ }
+ case Value:
+ if err := enc.WriteValue(tokVal); err != nil {
+ t.Fatalf("Encoder.WriteValue error: %v", err)
+ }
+ }
+ }
+
+ // Encoded output and original input must decode to the same thing.
+ var got, want []Token
+ for dec := NewDecoder(bytes.NewReader(b)); dec.PeekKind() > 0; {
+ tok, err := dec.ReadToken()
+ if err != nil {
+ t.Fatalf("Decoder.ReadToken error: %v", err)
+ }
+ got = append(got, tok.Clone())
+ }
+ for dec := NewDecoder(dst); dec.PeekKind() > 0; {
+ tok, err := dec.ReadToken()
+ if err != nil {
+ t.Fatalf("Decoder.ReadToken error: %v", err)
+ }
+ want = append(want, tok.Clone())
+ }
+ if !equalTokens(got, want) {
+ t.Fatalf("mismatching output:\ngot %v\nwant %v", got, want)
+ }
+ })
+}
+
+func FuzzResumableDecoder(f *testing.F) {
+ for _, td := range resumableDecoderTestdata {
+ f.Add(int64(0), []byte(td))
+ }
+
+ f.Fuzz(func(t *testing.T, seed int64, b []byte) {
+ rn := rand.NewSource(seed)
+
+ // Regardless of how many bytes the underlying io.Reader produces,
+ // the provided tokens, values, and errors should always be identical.
+ t.Run("ReadToken", func(t *testing.T) {
+ decGot := NewDecoder(&FaultyBuffer{B: b, MaxBytes: 8, Rand: rn})
+ decWant := NewDecoder(bytes.NewReader(b))
+ gotTok, gotErr := decGot.ReadToken()
+ wantTok, wantErr := decWant.ReadToken()
+ if gotTok.String() != wantTok.String() || !equalError(gotErr, wantErr) {
+ t.Errorf("Decoder.ReadToken = (%v, %v), want (%v, %v)", gotTok, gotErr, wantTok, wantErr)
+ }
+ })
+ t.Run("ReadValue", func(t *testing.T) {
+ decGot := NewDecoder(&FaultyBuffer{B: b, MaxBytes: 8, Rand: rn})
+ decWant := NewDecoder(bytes.NewReader(b))
+ gotVal, gotErr := decGot.ReadValue()
+ wantVal, wantErr := decWant.ReadValue()
+ if !slices.Equal(gotVal, wantVal) || !equalError(gotErr, wantErr) {
+ t.Errorf("Decoder.ReadValue = (%s, %v), want (%s, %v)", gotVal, gotErr, wantVal, wantErr)
+ }
+ })
+ })
+}
+
+func FuzzValueFormat(f *testing.F) {
+ for _, td := range valueTestdata {
+ f.Add(int64(0), []byte(td.in))
+ }
+
+ // isValid reports whether b is valid according to the specified options.
+ isValid := func(b []byte, opts ...Options) bool {
+ d := NewDecoder(bytes.NewReader(b), opts...)
+ _, errVal := d.ReadValue()
+ _, errEOF := d.ReadToken()
+ return errVal == nil && errEOF == io.EOF
+ }
+
+ // stripWhitespace removes all JSON whitespace characters from the input.
+ stripWhitespace := func(in []byte) (out []byte) {
+ out = make([]byte, 0, len(in))
+ for _, c := range in {
+ switch c {
+ case ' ', '\n', '\r', '\t':
+ default:
+ out = append(out, c)
+ }
+ }
+ return out
+ }
+
+ allOptions := []Options{
+ AllowDuplicateNames(true),
+ AllowInvalidUTF8(true),
+ EscapeForHTML(true),
+ EscapeForJS(true),
+ PreserveRawStrings(true),
+ CanonicalizeRawInts(true),
+ CanonicalizeRawFloats(true),
+ ReorderRawObjects(true),
+ SpaceAfterColon(true),
+ SpaceAfterComma(true),
+ Multiline(true),
+ WithIndent("\t"),
+ WithIndentPrefix(" "),
+ }
+
+ f.Fuzz(func(t *testing.T, seed int64, b []byte) {
+ validRFC7159 := isValid(b, AllowInvalidUTF8(true), AllowDuplicateNames(true))
+ validRFC8259 := isValid(b, AllowInvalidUTF8(false), AllowDuplicateNames(true))
+ validRFC7493 := isValid(b, AllowInvalidUTF8(false), AllowDuplicateNames(false))
+ switch {
+ case !validRFC7159 && validRFC8259:
+ t.Errorf("invalid input per RFC 7159 implies invalid per RFC 8259")
+ case !validRFC8259 && validRFC7493:
+ t.Errorf("invalid input per RFC 8259 implies invalid per RFC 7493")
+ }
+
+ gotValid := Value(b).IsValid()
+ wantValid := validRFC7493
+ if gotValid != wantValid {
+ t.Errorf("Value.IsValid = %v, want %v", gotValid, wantValid)
+ }
+
+ gotCompacted := Value(string(b))
+ gotCompactOk := gotCompacted.Compact() == nil
+ wantCompactOk := validRFC7159
+ if !bytes.Equal(stripWhitespace(gotCompacted), stripWhitespace(b)) {
+ t.Errorf("stripWhitespace(Value.Compact) = %s, want %s", stripWhitespace(gotCompacted), stripWhitespace(b))
+ }
+ if gotCompactOk != wantCompactOk {
+ t.Errorf("Value.Compact success mismatch: got %v, want %v", gotCompactOk, wantCompactOk)
+ }
+
+ gotIndented := Value(string(b))
+ gotIndentOk := gotIndented.Indent() == nil
+ wantIndentOk := validRFC7159
+ if !bytes.Equal(stripWhitespace(gotIndented), stripWhitespace(b)) {
+ t.Errorf("stripWhitespace(Value.Indent) = %s, want %s", stripWhitespace(gotIndented), stripWhitespace(b))
+ }
+ if gotIndentOk != wantIndentOk {
+ t.Errorf("Value.Indent success mismatch: got %v, want %v", gotIndentOk, wantIndentOk)
+ }
+
+ gotCanonicalized := Value(string(b))
+ gotCanonicalizeOk := gotCanonicalized.Canonicalize() == nil
+ wantCanonicalizeOk := validRFC7493
+ if gotCanonicalizeOk != wantCanonicalizeOk {
+ t.Errorf("Value.Canonicalize success mismatch: got %v, want %v", gotCanonicalizeOk, wantCanonicalizeOk)
+ }
+
+ // Random options should not result in a panic.
+ var opts []Options
+ rn := rand.New(rand.NewSource(seed))
+ for _, opt := range allOptions {
+ if rn.Intn(len(allOptions)/4) == 0 {
+ opts = append(opts, opt)
+ }
+ }
+ v := Value(b)
+ v.Format(opts...) // should not panic
+ })
+}
diff --git a/pkg/encoders/json/jsontext/options.go b/pkg/encoders/json/jsontext/options.go
new file mode 100644
index 0000000..7eb4f9b
--- /dev/null
+++ b/pkg/encoders/json/jsontext/options.go
@@ -0,0 +1,304 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.jsonv2
+
+package jsontext
+
+import (
+ "strings"
+
+ "encoding/json/internal/jsonflags"
+ "encoding/json/internal/jsonopts"
+ "encoding/json/internal/jsonwire"
+)
+
+// Options configures [NewEncoder], [Encoder.Reset], [NewDecoder],
+// and [Decoder.Reset] with specific features.
+// Each function takes in a variadic list of options, where properties
+// set in latter options override the value of previously set properties.
+//
+// There is a single Options type, which is used with both encoding and decoding.
+// Some options affect both operations, while others only affect one operation:
+//
+// - [AllowDuplicateNames] affects encoding and decoding
+// - [AllowInvalidUTF8] affects encoding and decoding
+// - [EscapeForHTML] affects encoding only
+// - [EscapeForJS] affects encoding only
+// - [PreserveRawStrings] affects encoding only
+// - [CanonicalizeRawInts] affects encoding only
+// - [CanonicalizeRawFloats] affects encoding only
+// - [ReorderRawObjects] affects encoding only
+// - [SpaceAfterColon] affects encoding only
+// - [SpaceAfterComma] affects encoding only
+// - [Multiline] affects encoding only
+// - [WithIndent] affects encoding only
+// - [WithIndentPrefix] affects encoding only
+//
+// Options that do not affect a particular operation are ignored.
+//
+// The Options type is identical to [encoding/json.Options] and
+// [encoding/json/v2.Options]. Options from the other packages may
+// be passed to functionality in this package, but are ignored.
+// Options from this package may be used with the other packages.
+type Options = jsonopts.Options
+
+// AllowDuplicateNames specifies that JSON objects may contain
+// duplicate member names. Disabling the duplicate name check may provide
+// performance benefits, but breaks compliance with RFC 7493, section 2.3.
+// The input or output will still be compliant with RFC 8259,
+// which leaves the handling of duplicate names as unspecified behavior.
+//
+// This affects either encoding or decoding.
+func AllowDuplicateNames(v bool) Options {
+ if v {
+ return jsonflags.AllowDuplicateNames | 1
+ } else {
+ return jsonflags.AllowDuplicateNames | 0
+ }
+}
+
+// AllowInvalidUTF8 specifies that JSON strings may contain invalid UTF-8,
+// which will be mangled as the Unicode replacement character, U+FFFD.
+// This causes the encoder or decoder to break compliance with
+// RFC 7493, section 2.1, and RFC 8259, section 8.1.
+//
+// This affects either encoding or decoding.
+func AllowInvalidUTF8(v bool) Options {
+ if v {
+ return jsonflags.AllowInvalidUTF8 | 1
+ } else {
+ return jsonflags.AllowInvalidUTF8 | 0
+ }
+}
+
+// EscapeForHTML specifies that '<', '>', and '&' characters within JSON strings
+// should be escaped as a hexadecimal Unicode codepoint (e.g., \u003c) so that
+// the output is safe to embed within HTML.
+//
+// This only affects encoding and is ignored when decoding.
+func EscapeForHTML(v bool) Options {
+ if v {
+ return jsonflags.EscapeForHTML | 1
+ } else {
+ return jsonflags.EscapeForHTML | 0
+ }
+}
+
+// EscapeForJS specifies that U+2028 and U+2029 characters within JSON strings
+// should be escaped as a hexadecimal Unicode codepoint (e.g., \u2028) so that
+// the output is valid to embed within JavaScript. See RFC 8259, section 12.
+//
+// This only affects encoding and is ignored when decoding.
+func EscapeForJS(v bool) Options {
+ if v {
+ return jsonflags.EscapeForJS | 1
+ } else {
+ return jsonflags.EscapeForJS | 0
+ }
+}
+
+// PreserveRawStrings specifies that when encoding a raw JSON string in a
+// [Token] or [Value], pre-escaped sequences
+// in a JSON string are preserved to the output.
+// However, raw strings still respect [EscapeForHTML] and [EscapeForJS]
+// such that the relevant characters are escaped.
+// If [AllowInvalidUTF8] is enabled, bytes of invalid UTF-8
+// are preserved to the output.
+//
+// This only affects encoding and is ignored when decoding.
+func PreserveRawStrings(v bool) Options {
+ if v {
+ return jsonflags.PreserveRawStrings | 1
+ } else {
+ return jsonflags.PreserveRawStrings | 0
+ }
+}
+
+// CanonicalizeRawInts specifies that when encoding a raw JSON
+// integer number (i.e., a number without a fraction and exponent) in a
+// [Token] or [Value], the number is canonicalized
+// according to RFC 8785, section 3.2.2.3. As a special case,
+// the number -0 is canonicalized as 0.
+//
+// JSON numbers are treated as IEEE 754 double precision numbers.
+// Any numbers with precision beyond what is representable by that form
+// will lose their precision when canonicalized. For example,
+// integer values beyond ±2⁵³ will lose their precision.
+// For example, 1234567890123456789 is formatted as 1234567890123456800.
+//
+// This only affects encoding and is ignored when decoding.
+func CanonicalizeRawInts(v bool) Options {
+ if v {
+ return jsonflags.CanonicalizeRawInts | 1
+ } else {
+ return jsonflags.CanonicalizeRawInts | 0
+ }
+}
+
+// CanonicalizeRawFloats specifies that when encoding a raw JSON
+// floating-point number (i.e., a number with a fraction or exponent) in a
+// [Token] or [Value], the number is canonicalized
+// according to RFC 8785, section 3.2.2.3. As a special case,
+// the number -0 is canonicalized as 0.
+//
+// JSON numbers are treated as IEEE 754 double precision numbers.
+// It is safe to canonicalize a serialized single precision number and
+// parse it back as a single precision number and expect the same value.
+// If a number exceeds ±1.7976931348623157e+308, which is the maximum
+// finite number, then it saturated at that value and formatted as such.
+//
+// This only affects encoding and is ignored when decoding.
+func CanonicalizeRawFloats(v bool) Options {
+ if v {
+ return jsonflags.CanonicalizeRawFloats | 1
+ } else {
+ return jsonflags.CanonicalizeRawFloats | 0
+ }
+}
+
+// ReorderRawObjects specifies that when encoding a raw JSON object in a
+// [Value], the object members are reordered according to
+// RFC 8785, section 3.2.3.
+//
+// This only affects encoding and is ignored when decoding.
+func ReorderRawObjects(v bool) Options {
+ if v {
+ return jsonflags.ReorderRawObjects | 1
+ } else {
+ return jsonflags.ReorderRawObjects | 0
+ }
+}
+
+// SpaceAfterColon specifies that the JSON output should emit a space character
+// after each colon separator following a JSON object name.
+// If false, then no space character appears after the colon separator.
+//
+// This only affects encoding and is ignored when decoding.
+func SpaceAfterColon(v bool) Options {
+ if v {
+ return jsonflags.SpaceAfterColon | 1
+ } else {
+ return jsonflags.SpaceAfterColon | 0
+ }
+}
+
+// SpaceAfterComma specifies that the JSON output should emit a space character
+// after each comma separator following a JSON object value or array element.
+// If false, then no space character appears after the comma separator.
+//
+// This only affects encoding and is ignored when decoding.
+func SpaceAfterComma(v bool) Options {
+ if v {
+ return jsonflags.SpaceAfterComma | 1
+ } else {
+ return jsonflags.SpaceAfterComma | 0
+ }
+}
+
+// Multiline specifies that the JSON output should expand to multiple lines,
+// where every JSON object member or JSON array element appears on
+// a new, indented line according to the nesting depth.
+//
+// If [SpaceAfterColon] is not specified, then the default is true.
+// If [SpaceAfterComma] is not specified, then the default is false.
+// If [WithIndent] is not specified, then the default is "\t".
+//
+// If set to false, then the output is a single-line,
+// where the only whitespace emitted is determined by the current
+// values of [SpaceAfterColon] and [SpaceAfterComma].
+//
+// This only affects encoding and is ignored when decoding.
+func Multiline(v bool) Options {
+ if v {
+ return jsonflags.Multiline | 1
+ } else {
+ return jsonflags.Multiline | 0
+ }
+}
+
+// WithIndent specifies that the encoder should emit multiline output
+// where each element in a JSON object or array begins on a new, indented line
+// beginning with the indent prefix (see [WithIndentPrefix])
+// followed by one or more copies of indent according to the nesting depth.
+// The indent must only be composed of space or tab characters.
+//
+// If the intent to emit indented output without a preference for
+// the particular indent string, then use [Multiline] instead.
+//
+// This only affects encoding and is ignored when decoding.
+// Use of this option implies [Multiline] being set to true.
+func WithIndent(indent string) Options {
+ // Fast-path: Return a constant for common indents, which avoids allocating.
+ // These are derived from analyzing the Go module proxy on 2023-07-01.
+ switch indent {
+ case "\t":
+ return jsonopts.Indent("\t") // ~14k usages
+ case " ":
+ return jsonopts.Indent(" ") // ~18k usages
+ case " ":
+ return jsonopts.Indent(" ") // ~1.7k usages
+ case " ":
+ return jsonopts.Indent(" ") // ~52k usages
+ case " ":
+ return jsonopts.Indent(" ") // ~12k usages
+ case "":
+ return jsonopts.Indent("") // ~1.5k usages
+ }
+
+ // Otherwise, allocate for this unique value.
+ if s := strings.Trim(indent, " \t"); len(s) > 0 {
+ panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent")
+ }
+ return jsonopts.Indent(indent)
+}
+
+// WithIndentPrefix specifies that the encoder should emit multiline output
+// where each element in a JSON object or array begins on a new, indented line
+// beginning with the indent prefix followed by one or more copies of indent
+// (see [WithIndent]) according to the nesting depth.
+// The prefix must only be composed of space or tab characters.
+//
+// This only affects encoding and is ignored when decoding.
+// Use of this option implies [Multiline] being set to true.
+func WithIndentPrefix(prefix string) Options {
+ if s := strings.Trim(prefix, " \t"); len(s) > 0 {
+ panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent prefix")
+ }
+ return jsonopts.IndentPrefix(prefix)
+}
+
+/*
+// TODO(https://go.dev/issue/56733): Implement WithByteLimit and WithDepthLimit.
+// Remember to also update the "Security Considerations" section.
+
+// WithByteLimit sets a limit on the number of bytes of input or output bytes
+// that may be consumed or produced for each top-level JSON value.
+// If a [Decoder] or [Encoder] method call would need to consume/produce
+// more than a total of n bytes to make progress on the top-level JSON value,
+// then the call will report an error.
+// Whitespace before and within the top-level value are counted against the limit.
+// Whitespace after a top-level value are counted against the limit
+// for the next top-level value.
+//
+// A non-positive limit is equivalent to no limit at all.
+// If unspecified, the default limit is no limit at all.
+// This affects either encoding or decoding.
+func WithByteLimit(n int64) Options {
+ return jsonopts.ByteLimit(max(n, 0))
+}
+
+// WithDepthLimit sets a limit on the maximum depth of JSON nesting
+// that may be consumed or produced for each top-level JSON value.
+// If a [Decoder] or [Encoder] method call would need to consume or produce
+// a depth greater than n to make progress on the top-level JSON value,
+// then the call will report an error.
+//
+// A non-positive limit is equivalent to no limit at all.
+// If unspecified, the default limit is 10000.
+// This affects either encoding or decoding.
+func WithDepthLimit(n int) Options {
+ return jsonopts.DepthLimit(max(n, 0))
+}
+*/
diff --git a/pkg/encoders/json/jsontext/pools.go b/pkg/encoders/json/jsontext/pools.go
new file mode 100644
index 0000000..4f9e0ea
--- /dev/null
+++ b/pkg/encoders/json/jsontext/pools.go
@@ -0,0 +1,152 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.jsonv2
+
+package jsontext
+
+import (
+ "bytes"
+ "io"
+ "math/bits"
+ "sync"
+)
+
+// TODO(https://go.dev/issue/47657): Use sync.PoolOf.
+
+var (
+ // This owns the internal buffer since there is no io.Writer to output to.
+ // Since the buffer can get arbitrarily large in normal usage,
+ // there is statistical tracking logic to determine whether to recycle
+ // the internal buffer or not based on a history of utilization.
+ bufferedEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
+
+ // This owns the internal buffer, but it is only used to temporarily store
+ // buffered JSON before flushing it to the underlying io.Writer.
+ // In a sufficiently efficient streaming mode, we do not expect the buffer
+ // to grow arbitrarily large. Thus, we avoid recycling large buffers.
+ streamingEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
+
+ // This does not own the internal buffer since
+ // it is taken directly from the provided bytes.Buffer.
+ bytesBufferEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
+)
+
+// bufferStatistics is statistics to track buffer utilization.
+// It is used to determine whether to recycle a buffer or not
+// to avoid https://go.dev/issue/23199.
+type bufferStatistics struct {
+ strikes int // number of times the buffer was under-utilized
+ prevLen int // length of previous buffer
+}
+
+func getBufferedEncoder(opts ...Options) *Encoder {
+ e := bufferedEncoderPool.Get().(*Encoder)
+ if e.s.Buf == nil {
+ // Round up to nearest 2ⁿ to make best use of malloc size classes.
+ // See runtime/sizeclasses.go on Go1.15.
+ // Logical OR with 63 to ensure 64 as the minimum buffer size.
+ n := 1 << bits.Len(uint(e.s.bufStats.prevLen|63))
+ e.s.Buf = make([]byte, 0, n)
+ }
+ e.s.reset(e.s.Buf[:0], nil, opts...)
+ return e
+}
+func putBufferedEncoder(e *Encoder) {
+ // Recycle large buffers only if sufficiently utilized.
+ // If a buffer is under-utilized enough times sequentially,
+ // then it is discarded, ensuring that a single large buffer
+ // won't be kept alive by a continuous stream of small usages.
+ //
+ // The worst case utilization is computed as:
+ // MIN_UTILIZATION_THRESHOLD / (1 + MAX_NUM_STRIKES)
+ //
+ // For the constants chosen below, this is (25%)/(1+4) ⇒ 5%.
+ // This may seem low, but it ensures a lower bound on
+ // the absolute worst-case utilization. Without this check,
+ // this would be theoretically 0%, which is infinitely worse.
+ //
+ // See https://go.dev/issue/27735.
+ switch {
+ case cap(e.s.Buf) <= 4<<10: // always recycle buffers smaller than 4KiB
+ e.s.bufStats.strikes = 0
+ case cap(e.s.Buf)/4 <= len(e.s.Buf): // at least 25% utilization
+ e.s.bufStats.strikes = 0
+ case e.s.bufStats.strikes < 4: // at most 4 strikes
+ e.s.bufStats.strikes++
+ default: // discard the buffer; too large and too often under-utilized
+ e.s.bufStats.strikes = 0
+ e.s.bufStats.prevLen = len(e.s.Buf) // heuristic for size to allocate next time
+ e.s.Buf = nil
+ }
+ bufferedEncoderPool.Put(e)
+}
+
+func getStreamingEncoder(w io.Writer, opts ...Options) *Encoder {
+ if _, ok := w.(*bytes.Buffer); ok {
+ e := bytesBufferEncoderPool.Get().(*Encoder)
+ e.s.reset(nil, w, opts...) // buffer taken from bytes.Buffer
+ return e
+ } else {
+ e := streamingEncoderPool.Get().(*Encoder)
+ e.s.reset(e.s.Buf[:0], w, opts...) // preserve existing buffer
+ return e
+ }
+}
+func putStreamingEncoder(e *Encoder) {
+ if _, ok := e.s.wr.(*bytes.Buffer); ok {
+ bytesBufferEncoderPool.Put(e)
+ } else {
+ if cap(e.s.Buf) > 64<<10 {
+ e.s.Buf = nil // avoid pinning arbitrarily large amounts of memory
+ }
+ streamingEncoderPool.Put(e)
+ }
+}
+
+var (
+ // This does not own the internal buffer since it is externally provided.
+ bufferedDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }}
+
+ // This owns the internal buffer, but it is only used to temporarily store
+ // buffered JSON fetched from the underlying io.Reader.
+ // In a sufficiently efficient streaming mode, we do not expect the buffer
+ // to grow arbitrarily large. Thus, we avoid recycling large buffers.
+ streamingDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }}
+
+ // This does not own the internal buffer since
+ // it is taken directly from the provided bytes.Buffer.
+ bytesBufferDecoderPool = bufferedDecoderPool
+)
+
+func getBufferedDecoder(b []byte, opts ...Options) *Decoder {
+ d := bufferedDecoderPool.Get().(*Decoder)
+ d.s.reset(b, nil, opts...)
+ return d
+}
+func putBufferedDecoder(d *Decoder) {
+ bufferedDecoderPool.Put(d)
+}
+
+func getStreamingDecoder(r io.Reader, opts ...Options) *Decoder {
+ if _, ok := r.(*bytes.Buffer); ok {
+ d := bytesBufferDecoderPool.Get().(*Decoder)
+ d.s.reset(nil, r, opts...) // buffer taken from bytes.Buffer
+ return d
+ } else {
+ d := streamingDecoderPool.Get().(*Decoder)
+ d.s.reset(d.s.buf[:0], r, opts...) // preserve existing buffer
+ return d
+ }
+}
+func putStreamingDecoder(d *Decoder) {
+ if _, ok := d.s.rd.(*bytes.Buffer); ok {
+ bytesBufferDecoderPool.Put(d)
+ } else {
+ if cap(d.s.buf) > 64<<10 {
+ d.s.buf = nil // avoid pinning arbitrarily large amounts of memory
+ }
+ streamingDecoderPool.Put(d)
+ }
+}
diff --git a/pkg/encoders/json/jsontext/quote.go b/pkg/encoders/json/jsontext/quote.go
new file mode 100644
index 0000000..5ecfdbc
--- /dev/null
+++ b/pkg/encoders/json/jsontext/quote.go
@@ -0,0 +1,41 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.jsonv2
+
+package jsontext
+
+import (
+ "encoding/json/internal/jsonflags"
+ "encoding/json/internal/jsonwire"
+)
+
+// AppendQuote appends a double-quoted JSON string literal representing src
+// to dst and returns the extended buffer.
+// It uses the minimal string representation per RFC 8785, section 3.2.2.2.
+// Invalid UTF-8 bytes are replaced with the Unicode replacement character
+// and an error is returned at the end indicating the presence of invalid UTF-8.
+// The dst must not overlap with the src.
+func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) {
+ dst, err := jsonwire.AppendQuote(dst, src, &jsonflags.Flags{})
+ if err != nil {
+ err = &SyntacticError{Err: err}
+ }
+ return dst, err
+}
+
+// AppendUnquote appends the decoded interpretation of src as a
+// double-quoted JSON string literal to dst and returns the extended buffer.
+// The input src must be a JSON string without any surrounding whitespace.
+// Invalid UTF-8 bytes are replaced with the Unicode replacement character
+// and an error is returned at the end indicating the presence of invalid UTF-8.
+// Any trailing bytes after the JSON string literal results in an error.
+// The dst must not overlap with the src.
+func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) {
+ dst, err := jsonwire.AppendUnquote(dst, src)
+ if err != nil {
+ err = &SyntacticError{Err: err}
+ }
+ return dst, err
+}
diff --git a/pkg/encoders/json/jsontext/state.go b/pkg/encoders/json/jsontext/state.go
new file mode 100644
index 0000000..d214fd5
--- /dev/null
+++ b/pkg/encoders/json/jsontext/state.go
@@ -0,0 +1,828 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.jsonv2
+
+package jsontext
+
+import (
+ "errors"
+ "iter"
+ "math"
+ "strconv"
+ "strings"
+ "unicode/utf8"
+
+ "encoding/json/internal/jsonwire"
+)
+
+// ErrDuplicateName indicates that a JSON token could not be
+// encoded or decoded because it results in a duplicate JSON object name.
+// This error is directly wrapped within a [SyntacticError] when produced.
+//
+// The name of a duplicate JSON object member can be extracted as:
+//
+// err := ...
+// var serr jsontext.SyntacticError
+// if errors.As(err, &serr) && serr.Err == jsontext.ErrDuplicateName {
+// ptr := serr.JSONPointer // JSON pointer to duplicate name
+// name := ptr.LastToken() // duplicate name itself
+// ...
+// }
+//
+// This error is only returned if [AllowDuplicateNames] is false.
+var ErrDuplicateName = errors.New("duplicate object member name")
+
+// ErrNonStringName indicates that a JSON token could not be
+// encoded or decoded because it is not a string,
+// as required for JSON object names according to RFC 8259, section 4.
+// This error is directly wrapped within a [SyntacticError] when produced.
+var ErrNonStringName = errors.New("object member name must be a string")
+
+var (
+ errMissingValue = errors.New("missing value after object name")
+ errMismatchDelim = errors.New("mismatching structural token for object or array")
+ errMaxDepth = errors.New("exceeded max depth")
+
+ errInvalidNamespace = errors.New("object namespace is in an invalid state")
+)
+
+// Per RFC 8259, section 9, implementations may enforce a maximum depth.
+// Such a limit is necessary to prevent stack overflows.
+const maxNestingDepth = 10000
+
+type state struct {
+ // Tokens validates whether the next token kind is valid.
+ Tokens stateMachine
+
+ // Names is a stack of object names.
+ Names objectNameStack
+
+ // Namespaces is a stack of object namespaces.
+ // For performance reasons, Encoder or Decoder may not update this
+ // if Marshal or Unmarshal is able to track names in a more efficient way.
+ // See makeMapArshaler and makeStructArshaler.
+ // Not used if AllowDuplicateNames is true.
+ Namespaces objectNamespaceStack
+}
+
+// needObjectValue reports whether the next token should be an object value.
+// This method is used by [wrapSyntacticError].
+func (s *state) needObjectValue() bool {
+ return s.Tokens.Last.needObjectValue()
+}
+
+func (s *state) reset() {
+ s.Tokens.reset()
+ s.Names.reset()
+ s.Namespaces.reset()
+}
+
+// Pointer is a JSON Pointer (RFC 6901) that references a particular JSON value
+// relative to the root of the top-level JSON value.
+//
+// A Pointer is a slash-separated list of tokens, where each token is
+// either a JSON object name or an index to a JSON array element
+// encoded as a base-10 integer value.
+// It is impossible to distinguish between an array index and an object name
+// (that happens to be an base-10 encoded integer) without also knowing
+// the structure of the top-level JSON value that the pointer refers to.
+//
+// There is exactly one representation of a pointer to a particular value,
+// so comparability of Pointer values is equivalent to checking whether
+// they both point to the exact same value.
+type Pointer string
+
+// IsValid reports whether p is a valid JSON Pointer according to RFC 6901.
+// Note that the concatenation of two valid pointers produces a valid pointer.
+func (p Pointer) IsValid() bool {
+ for i, r := range p {
+ switch {
+ case r == '~' && (i+1 == len(p) || (p[i+1] != '0' && p[i+1] != '1')):
+ return false // invalid escape
+ case r == '\ufffd' && !strings.HasPrefix(string(p[i:]), "\ufffd"):
+ return false // invalid UTF-8
+ }
+ }
+ return len(p) == 0 || p[0] == '/'
+}
+
+// Contains reports whether the JSON value that p points to
+// is equal to or contains the JSON value that pc points to.
+func (p Pointer) Contains(pc Pointer) bool {
+ // Invariant: len(p) <= len(pc) if p.Contains(pc)
+ suffix, ok := strings.CutPrefix(string(pc), string(p))
+ return ok && (suffix == "" || suffix[0] == '/')
+}
+
+// Parent strips off the last token and returns the remaining pointer.
+// The parent of an empty p is an empty string.
+func (p Pointer) Parent() Pointer {
+ return p[:max(strings.LastIndexByte(string(p), '/'), 0)]
+}
+
+// LastToken returns the last token in the pointer.
+// The last token of an empty p is an empty string.
+func (p Pointer) LastToken() string {
+ last := p[max(strings.LastIndexByte(string(p), '/'), 0):]
+ return unescapePointerToken(strings.TrimPrefix(string(last), "/"))
+}
+
+// AppendToken appends a token to the end of p and returns the full pointer.
+func (p Pointer) AppendToken(tok string) Pointer {
+ return Pointer(appendEscapePointerName([]byte(p+"/"), tok))
+}
+
+// TODO: Add Pointer.AppendTokens,
+// but should this take in a ...string or an iter.Seq[string]?
+
+// Tokens returns an iterator over the reference tokens in the JSON pointer,
+// starting from the first token until the last token (unless stopped early).
+func (p Pointer) Tokens() iter.Seq[string] {
+ return func(yield func(string) bool) {
+ for len(p) > 0 {
+ p = Pointer(strings.TrimPrefix(string(p), "/"))
+ i := min(uint(strings.IndexByte(string(p), '/')), uint(len(p)))
+ if !yield(unescapePointerToken(string(p)[:i])) {
+ return
+ }
+ p = p[i:]
+ }
+ }
+}
+
+func unescapePointerToken(token string) string {
+ if strings.Contains(token, "~") {
+ // Per RFC 6901, section 3, unescape '~' and '/' characters.
+ token = strings.ReplaceAll(token, "~1", "/")
+ token = strings.ReplaceAll(token, "~0", "~")
+ }
+ return token
+}
+
+// appendStackPointer appends a JSON Pointer (RFC 6901) to the current value.
+//
+// - If where is -1, then it points to the previously processed token.
+//
+// - If where is 0, then it points to the parent JSON object or array,
+// or an object member if in-between an object member key and value.
+// This is useful when the position is ambiguous whether
+// we are interested in the previous or next token, or
+// when we are uncertain whether the next token
+// continues or terminates the current object or array.
+//
+// - If where is +1, then it points to the next expected value,
+// assuming that it continues the current JSON object or array.
+// As a special case, if the next token is a JSON object name,
+// then it points to the parent JSON object.
+//
+// Invariant: Must call s.names.copyQuotedBuffer beforehand.
+func (s state) appendStackPointer(b []byte, where int) []byte {
+ var objectDepth int
+ for i := 1; i < s.Tokens.Depth(); i++ {
+ e := s.Tokens.index(i)
+ arrayDelta := -1 // by default point to previous array element
+ if isLast := i == s.Tokens.Depth()-1; isLast {
+ switch {
+ case where < 0 && e.Length() == 0 || where == 0 && !e.needObjectValue() || where > 0 && e.NeedObjectName():
+ return b
+ case where > 0 && e.isArray():
+ arrayDelta = 0 // point to next array element
+ }
+ }
+ switch {
+ case e.isObject():
+ b = appendEscapePointerName(append(b, '/'), s.Names.getUnquoted(objectDepth))
+ objectDepth++
+ case e.isArray():
+ b = strconv.AppendUint(append(b, '/'), uint64(e.Length()+int64(arrayDelta)), 10)
+ }
+ }
+ return b
+}
+
+func appendEscapePointerName[Bytes ~[]byte | ~string](b []byte, name Bytes) []byte {
+ for _, r := range string(name) {
+ // Per RFC 6901, section 3, escape '~' and '/' characters.
+ switch r {
+ case '~':
+ b = append(b, "~0"...)
+ case '/':
+ b = append(b, "~1"...)
+ default:
+ b = utf8.AppendRune(b, r)
+ }
+ }
+ return b
+}
+
+// stateMachine is a push-down automaton that validates whether
+// a sequence of tokens is valid or not according to the JSON grammar.
+// It is useful for both encoding and decoding.
+//
+// It is a stack where each entry represents a nested JSON object or array.
+// The stack has a minimum depth of 1 where the first level is a
+// virtual JSON array to handle a stream of top-level JSON values.
+// The top-level virtual JSON array is special in that it doesn't require commas
+// between each JSON value.
+//
+// For performance, most methods are carefully written to be inlinable.
+// The zero value is a valid state machine ready for use.
+type stateMachine struct {
+ Stack []stateEntry
+ Last stateEntry
+}
+
+// reset resets the state machine.
+// The machine always starts with a minimum depth of 1.
+func (m *stateMachine) reset() {
+ m.Stack = m.Stack[:0]
+ if cap(m.Stack) > 1<<10 {
+ m.Stack = nil
+ }
+ m.Last = stateTypeArray
+}
+
+// Depth is the current nested depth of JSON objects and arrays.
+// It is one-indexed (i.e., top-level values have a depth of 1).
+func (m stateMachine) Depth() int {
+ return len(m.Stack) + 1
+}
+
+// index returns a reference to the ith entry.
+// It is only valid until the next push method call.
+func (m *stateMachine) index(i int) *stateEntry {
+ if i == len(m.Stack) {
+ return &m.Last
+ }
+ return &m.Stack[i]
+}
+
+// DepthLength reports the current nested depth and
+// the length of the last JSON object or array.
+func (m stateMachine) DepthLength() (int, int64) {
+ return m.Depth(), m.Last.Length()
+}
+
+// appendLiteral appends a JSON literal as the next token in the sequence.
+// If an error is returned, the state is not mutated.
+func (m *stateMachine) appendLiteral() error {
+ switch {
+ case m.Last.NeedObjectName():
+ return ErrNonStringName
+ case !m.Last.isValidNamespace():
+ return errInvalidNamespace
+ default:
+ m.Last.Increment()
+ return nil
+ }
+}
+
+// appendString appends a JSON string as the next token in the sequence.
+// If an error is returned, the state is not mutated.
+func (m *stateMachine) appendString() error {
+ switch {
+ case !m.Last.isValidNamespace():
+ return errInvalidNamespace
+ default:
+ m.Last.Increment()
+ return nil
+ }
+}
+
+// appendNumber appends a JSON number as the next token in the sequence.
+// If an error is returned, the state is not mutated.
+func (m *stateMachine) appendNumber() error {
+ return m.appendLiteral()
+}
+
+// pushObject appends a JSON begin object token as next in the sequence.
+// If an error is returned, the state is not mutated.
+func (m *stateMachine) pushObject() error {
+ switch {
+ case m.Last.NeedObjectName():
+ return ErrNonStringName
+ case !m.Last.isValidNamespace():
+ return errInvalidNamespace
+ case len(m.Stack) == maxNestingDepth:
+ return errMaxDepth
+ default:
+ m.Last.Increment()
+ m.Stack = append(m.Stack, m.Last)
+ m.Last = stateTypeObject
+ return nil
+ }
+}
+
+// popObject appends a JSON end object token as next in the sequence.
+// If an error is returned, the state is not mutated.
+func (m *stateMachine) popObject() error {
+ switch {
+ case !m.Last.isObject():
+ return errMismatchDelim
+ case m.Last.needObjectValue():
+ return errMissingValue
+ case !m.Last.isValidNamespace():
+ return errInvalidNamespace
+ default:
+ m.Last = m.Stack[len(m.Stack)-1]
+ m.Stack = m.Stack[:len(m.Stack)-1]
+ return nil
+ }
+}
+
+// pushArray appends a JSON begin array token as next in the sequence.
+// If an error is returned, the state is not mutated.
+func (m *stateMachine) pushArray() error {
+ switch {
+ case m.Last.NeedObjectName():
+ return ErrNonStringName
+ case !m.Last.isValidNamespace():
+ return errInvalidNamespace
+ case len(m.Stack) == maxNestingDepth:
+ return errMaxDepth
+ default:
+ m.Last.Increment()
+ m.Stack = append(m.Stack, m.Last)
+ m.Last = stateTypeArray
+ return nil
+ }
+}
+
+// popArray appends a JSON end array token as next in the sequence.
+// If an error is returned, the state is not mutated.
+func (m *stateMachine) popArray() error {
+ switch {
+ case !m.Last.isArray() || len(m.Stack) == 0: // forbid popping top-level virtual JSON array
+ return errMismatchDelim
+ case !m.Last.isValidNamespace():
+ return errInvalidNamespace
+ default:
+ m.Last = m.Stack[len(m.Stack)-1]
+ m.Stack = m.Stack[:len(m.Stack)-1]
+ return nil
+ }
+}
+
+// NeedIndent reports whether indent whitespace should be injected.
+// A zero value means that no whitespace should be injected.
+// A positive value means '\n', indentPrefix, and (n-1) copies of indentBody
+// should be appended to the output immediately before the next token.
+func (m stateMachine) NeedIndent(next Kind) (n int) {
+ willEnd := next == '}' || next == ']'
+ switch {
+ case m.Depth() == 1:
+ return 0 // top-level values are never indented
+ case m.Last.Length() == 0 && willEnd:
+ return 0 // an empty object or array is never indented
+ case m.Last.Length() == 0 || m.Last.needImplicitComma(next):
+ return m.Depth()
+ case willEnd:
+ return m.Depth() - 1
+ default:
+ return 0
+ }
+}
+
+// MayAppendDelim appends a colon or comma that may precede the next token.
+func (m stateMachine) MayAppendDelim(b []byte, next Kind) []byte {
+ switch {
+ case m.Last.needImplicitColon():
+ return append(b, ':')
+ case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
+ return append(b, ',')
+ default:
+ return b
+ }
+}
+
+// needDelim reports whether a colon or comma token should be implicitly emitted
+// before the next token of the specified kind.
+// A zero value means no delimiter should be emitted.
+func (m stateMachine) needDelim(next Kind) (delim byte) {
+ switch {
+ case m.Last.needImplicitColon():
+ return ':'
+ case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
+ return ','
+ default:
+ return 0
+ }
+}
+
+// InvalidateDisabledNamespaces marks all disabled namespaces as invalid.
+//
+// For efficiency, Marshal and Unmarshal may disable namespaces since there are
+// more efficient ways to track duplicate names. However, if an error occurs,
+// the namespaces in Encoder or Decoder will be left in an inconsistent state.
+// Mark the namespaces as invalid so that future method calls on
+// Encoder or Decoder will return an error.
+func (m *stateMachine) InvalidateDisabledNamespaces() {
+ for i := range m.Depth() {
+ e := m.index(i)
+ if !e.isActiveNamespace() {
+ e.invalidateNamespace()
+ }
+ }
+}
+
+// stateEntry encodes several artifacts within a single unsigned integer:
+// - whether this represents a JSON object or array,
+// - whether this object should check for duplicate names, and
+// - how many elements are in this JSON object or array.
+type stateEntry uint64
+
+const (
+ // The type mask (1 bit) records whether this is a JSON object or array.
+ stateTypeMask stateEntry = 0x8000_0000_0000_0000
+ stateTypeObject stateEntry = 0x8000_0000_0000_0000
+ stateTypeArray stateEntry = 0x0000_0000_0000_0000
+
+ // The name check mask (2 bit) records whether to update
+ // the namespaces for the current JSON object and
+ // whether the namespace is valid.
+ stateNamespaceMask stateEntry = 0x6000_0000_0000_0000
+ stateDisableNamespace stateEntry = 0x4000_0000_0000_0000
+ stateInvalidNamespace stateEntry = 0x2000_0000_0000_0000
+
+ // The count mask (61 bits) records the number of elements.
+ stateCountMask stateEntry = 0x1fff_ffff_ffff_ffff
+ stateCountLSBMask stateEntry = 0x0000_0000_0000_0001
+ stateCountOdd stateEntry = 0x0000_0000_0000_0001
+ stateCountEven stateEntry = 0x0000_0000_0000_0000
+)
+
+// Length reports the number of elements in the JSON object or array.
+// Each name and value in an object entry is treated as a separate element.
+func (e stateEntry) Length() int64 {
+ return int64(e & stateCountMask)
+}
+
+// isObject reports whether this is a JSON object.
+func (e stateEntry) isObject() bool {
+ return e&stateTypeMask == stateTypeObject
+}
+
+// isArray reports whether this is a JSON array.
+func (e stateEntry) isArray() bool {
+ return e&stateTypeMask == stateTypeArray
+}
+
+// NeedObjectName reports whether the next token must be a JSON string,
+// which is necessary for JSON object names.
+func (e stateEntry) NeedObjectName() bool {
+ return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven
+}
+
+// needImplicitColon reports whether an colon should occur next,
+// which always occurs after JSON object names.
+func (e stateEntry) needImplicitColon() bool {
+ return e.needObjectValue()
+}
+
+// needObjectValue reports whether the next token must be a JSON value,
+// which is necessary after every JSON object name.
+func (e stateEntry) needObjectValue() bool {
+ return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountOdd
+}
+
+// needImplicitComma reports whether an comma should occur next,
+// which always occurs after a value in a JSON object or array
+// before the next value (or name).
+func (e stateEntry) needImplicitComma(next Kind) bool {
+ return !e.needObjectValue() && e.Length() > 0 && next != '}' && next != ']'
+}
+
+// Increment increments the number of elements for the current object or array.
+// This assumes that overflow won't practically be an issue since
+// 1< 0.
+func (e *stateEntry) decrement() {
+ (*e)--
+}
+
+// DisableNamespace disables the JSON object namespace such that the
+// Encoder or Decoder no longer updates the namespace.
+func (e *stateEntry) DisableNamespace() {
+ *e |= stateDisableNamespace
+}
+
+// isActiveNamespace reports whether the JSON object namespace is actively
+// being updated and used for duplicate name checks.
+func (e stateEntry) isActiveNamespace() bool {
+ return e&(stateDisableNamespace) == 0
+}
+
+// invalidateNamespace marks the JSON object namespace as being invalid.
+func (e *stateEntry) invalidateNamespace() {
+ *e |= stateInvalidNamespace
+}
+
+// isValidNamespace reports whether the JSON object namespace is valid.
+func (e stateEntry) isValidNamespace() bool {
+ return e&(stateInvalidNamespace) == 0
+}
+
+// objectNameStack is a stack of names when descending into a JSON object.
+// In contrast to objectNamespaceStack, this only has to remember a single name
+// per JSON object.
+//
+// This data structure may contain offsets to encodeBuffer or decodeBuffer.
+// It violates clean abstraction of layers, but is significantly more efficient.
+// This ensures that popping and pushing in the common case is a trivial
+// push/pop of an offset integer.
+//
+// The zero value is an empty names stack ready for use.
+type objectNameStack struct {
+ // offsets is a stack of offsets for each name.
+ // A non-negative offset is the ending offset into the local names buffer.
+ // A negative offset is the bit-wise inverse of a starting offset into
+ // a remote buffer (e.g., encodeBuffer or decodeBuffer).
+ // A math.MinInt offset at the end implies that the last object is empty.
+ // Invariant: Positive offsets always occur before negative offsets.
+ offsets []int
+ // unquotedNames is a back-to-back concatenation of names.
+ unquotedNames []byte
+}
+
+func (ns *objectNameStack) reset() {
+ ns.offsets = ns.offsets[:0]
+ ns.unquotedNames = ns.unquotedNames[:0]
+ if cap(ns.offsets) > 1<<6 {
+ ns.offsets = nil // avoid pinning arbitrarily large amounts of memory
+ }
+ if cap(ns.unquotedNames) > 1<<10 {
+ ns.unquotedNames = nil // avoid pinning arbitrarily large amounts of memory
+ }
+}
+
+func (ns *objectNameStack) length() int {
+ return len(ns.offsets)
+}
+
+// getUnquoted retrieves the ith unquoted name in the stack.
+// It returns an empty string if the last object is empty.
+//
+// Invariant: Must call copyQuotedBuffer beforehand.
+func (ns *objectNameStack) getUnquoted(i int) []byte {
+ ns.ensureCopiedBuffer()
+ if i == 0 {
+ return ns.unquotedNames[:ns.offsets[0]]
+ } else {
+ return ns.unquotedNames[ns.offsets[i-1]:ns.offsets[i-0]]
+ }
+}
+
+// invalidOffset indicates that the last JSON object currently has no name.
+const invalidOffset = math.MinInt
+
+// push descends into a nested JSON object.
+func (ns *objectNameStack) push() {
+ ns.offsets = append(ns.offsets, invalidOffset)
+}
+
+// ReplaceLastQuotedOffset replaces the last name with the starting offset
+// to the quoted name in some remote buffer. All offsets provided must be
+// relative to the same buffer until copyQuotedBuffer is called.
+func (ns *objectNameStack) ReplaceLastQuotedOffset(i int) {
+ // Use bit-wise inversion instead of naive multiplication by -1 to avoid
+ // ambiguity regarding zero (which is a valid offset into the names field).
+ // Bit-wise inversion is mathematically equivalent to -i-1,
+ // such that 0 becomes -1, 1 becomes -2, and so forth.
+ // This ensures that remote offsets are always negative.
+ ns.offsets[len(ns.offsets)-1] = ^i
+}
+
+// replaceLastUnquotedName replaces the last name with the provided name.
+//
+// Invariant: Must call copyQuotedBuffer beforehand.
+func (ns *objectNameStack) replaceLastUnquotedName(s string) {
+ ns.ensureCopiedBuffer()
+ var startOffset int
+ if len(ns.offsets) > 1 {
+ startOffset = ns.offsets[len(ns.offsets)-2]
+ }
+ ns.unquotedNames = append(ns.unquotedNames[:startOffset], s...)
+ ns.offsets[len(ns.offsets)-1] = len(ns.unquotedNames)
+}
+
+// clearLast removes any name in the last JSON object.
+// It is semantically equivalent to ns.push followed by ns.pop.
+func (ns *objectNameStack) clearLast() {
+ ns.offsets[len(ns.offsets)-1] = invalidOffset
+}
+
+// pop ascends out of a nested JSON object.
+func (ns *objectNameStack) pop() {
+ ns.offsets = ns.offsets[:len(ns.offsets)-1]
+}
+
+// copyQuotedBuffer copies names from the remote buffer into the local names
+// buffer so that there are no more offset references into the remote buffer.
+// This allows the remote buffer to change contents without affecting
+// the names that this data structure is trying to remember.
+func (ns *objectNameStack) copyQuotedBuffer(b []byte) {
+ // Find the first negative offset.
+ var i int
+ for i = len(ns.offsets) - 1; i >= 0 && ns.offsets[i] < 0; i-- {
+ continue
+ }
+
+ // Copy each name from the remote buffer into the local buffer.
+ for i = i + 1; i < len(ns.offsets); i++ {
+ if i == len(ns.offsets)-1 && ns.offsets[i] == invalidOffset {
+ if i == 0 {
+ ns.offsets[i] = 0
+ } else {
+ ns.offsets[i] = ns.offsets[i-1]
+ }
+ break // last JSON object had a push without any names
+ }
+
+ // As a form of Hyrum proofing, we write an invalid character into the
+ // buffer to make misuse of Decoder.ReadToken more obvious.
+ // We need to undo that mutation here.
+ quotedName := b[^ns.offsets[i]:]
+ if quotedName[0] == invalidateBufferByte {
+ quotedName[0] = '"'
+ }
+
+ // Append the unquoted name to the local buffer.
+ var startOffset int
+ if i > 0 {
+ startOffset = ns.offsets[i-1]
+ }
+ if n := jsonwire.ConsumeSimpleString(quotedName); n > 0 {
+ ns.unquotedNames = append(ns.unquotedNames[:startOffset], quotedName[len(`"`):n-len(`"`)]...)
+ } else {
+ ns.unquotedNames, _ = jsonwire.AppendUnquote(ns.unquotedNames[:startOffset], quotedName)
+ }
+ ns.offsets[i] = len(ns.unquotedNames)
+ }
+}
+
+func (ns *objectNameStack) ensureCopiedBuffer() {
+ if len(ns.offsets) > 0 && ns.offsets[len(ns.offsets)-1] < 0 {
+ panic("BUG: copyQuotedBuffer not called beforehand")
+ }
+}
+
+// objectNamespaceStack is a stack of object namespaces.
+// This data structure assists in detecting duplicate names.
+type objectNamespaceStack []objectNamespace
+
+// reset resets the object namespace stack.
+func (nss *objectNamespaceStack) reset() {
+ if cap(*nss) > 1<<10 {
+ *nss = nil
+ }
+ *nss = (*nss)[:0]
+}
+
+// push starts a new namespace for a nested JSON object.
+func (nss *objectNamespaceStack) push() {
+ if cap(*nss) > len(*nss) {
+ *nss = (*nss)[:len(*nss)+1]
+ nss.Last().reset()
+ } else {
+ *nss = append(*nss, objectNamespace{})
+ }
+}
+
+// Last returns a pointer to the last JSON object namespace.
+func (nss objectNamespaceStack) Last() *objectNamespace {
+ return &nss[len(nss)-1]
+}
+
+// pop terminates the namespace for a nested JSON object.
+func (nss *objectNamespaceStack) pop() {
+ *nss = (*nss)[:len(*nss)-1]
+}
+
+// objectNamespace is the namespace for a JSON object.
+// In contrast to objectNameStack, this needs to remember a all names
+// per JSON object.
+//
+// The zero value is an empty namespace ready for use.
+type objectNamespace struct {
+ // It relies on a linear search over all the names before switching
+ // to use a Go map for direct lookup.
+
+ // endOffsets is a list of offsets to the end of each name in buffers.
+ // The length of offsets is the number of names in the namespace.
+ endOffsets []uint
+ // allUnquotedNames is a back-to-back concatenation of every name in the namespace.
+ allUnquotedNames []byte
+ // mapNames is a Go map containing every name in the namespace.
+ // Only valid if non-nil.
+ mapNames map[string]struct{}
+}
+
+// reset resets the namespace to be empty.
+func (ns *objectNamespace) reset() {
+ ns.endOffsets = ns.endOffsets[:0]
+ ns.allUnquotedNames = ns.allUnquotedNames[:0]
+ ns.mapNames = nil
+ if cap(ns.endOffsets) > 1<<6 {
+ ns.endOffsets = nil // avoid pinning arbitrarily large amounts of memory
+ }
+ if cap(ns.allUnquotedNames) > 1<<10 {
+ ns.allUnquotedNames = nil // avoid pinning arbitrarily large amounts of memory
+ }
+}
+
+// length reports the number of names in the namespace.
+func (ns *objectNamespace) length() int {
+ return len(ns.endOffsets)
+}
+
+// getUnquoted retrieves the ith unquoted name in the namespace.
+func (ns *objectNamespace) getUnquoted(i int) []byte {
+ if i == 0 {
+ return ns.allUnquotedNames[:ns.endOffsets[0]]
+ } else {
+ return ns.allUnquotedNames[ns.endOffsets[i-1]:ns.endOffsets[i-0]]
+ }
+}
+
+// lastUnquoted retrieves the last name in the namespace.
+func (ns *objectNamespace) lastUnquoted() []byte {
+ return ns.getUnquoted(ns.length() - 1)
+}
+
+// insertQuoted inserts a name and reports whether it was inserted,
+// which only occurs if name is not already in the namespace.
+// The provided name must be a valid JSON string.
+func (ns *objectNamespace) insertQuoted(name []byte, isVerbatim bool) bool {
+ if isVerbatim {
+ name = name[len(`"`) : len(name)-len(`"`)]
+ }
+ return ns.insert(name, !isVerbatim)
+}
+func (ns *objectNamespace) InsertUnquoted(name []byte) bool {
+ return ns.insert(name, false)
+}
+func (ns *objectNamespace) insert(name []byte, quoted bool) bool {
+ var allNames []byte
+ if quoted {
+ allNames, _ = jsonwire.AppendUnquote(ns.allUnquotedNames, name)
+ } else {
+ allNames = append(ns.allUnquotedNames, name...)
+ }
+ name = allNames[len(ns.allUnquotedNames):]
+
+ // Switch to a map if the buffer is too large for linear search.
+ // This does not add the current name to the map.
+ if ns.mapNames == nil && (ns.length() > 64 || len(ns.allUnquotedNames) > 1024) {
+ ns.mapNames = make(map[string]struct{})
+ var startOffset uint
+ for _, endOffset := range ns.endOffsets {
+ name := ns.allUnquotedNames[startOffset:endOffset]
+ ns.mapNames[string(name)] = struct{}{} // allocates a new string
+ startOffset = endOffset
+ }
+ }
+
+ if ns.mapNames == nil {
+ // Perform linear search over the buffer to find matching names.
+ // It provides O(n) lookup, but does not require any allocations.
+ var startOffset uint
+ for _, endOffset := range ns.endOffsets {
+ if string(ns.allUnquotedNames[startOffset:endOffset]) == string(name) {
+ return false
+ }
+ startOffset = endOffset
+ }
+ } else {
+ // Use the map if it is populated.
+ // It provides O(1) lookup, but requires a string allocation per name.
+ if _, ok := ns.mapNames[string(name)]; ok {
+ return false
+ }
+ ns.mapNames[string(name)] = struct{}{} // allocates a new string
+ }
+
+ ns.allUnquotedNames = allNames
+ ns.endOffsets = append(ns.endOffsets, uint(len(ns.allUnquotedNames)))
+ return true
+}
+
+// removeLast removes the last name in the namespace.
+func (ns *objectNamespace) removeLast() {
+ if ns.mapNames != nil {
+ delete(ns.mapNames, string(ns.lastUnquoted()))
+ }
+ if ns.length()-1 == 0 {
+ ns.endOffsets = ns.endOffsets[:0]
+ ns.allUnquotedNames = ns.allUnquotedNames[:0]
+ } else {
+ ns.endOffsets = ns.endOffsets[:ns.length()-1]
+ ns.allUnquotedNames = ns.allUnquotedNames[:ns.endOffsets[ns.length()-1]]
+ }
+}
diff --git a/pkg/encoders/json/jsontext/state_test.go b/pkg/encoders/json/jsontext/state_test.go
new file mode 100644
index 0000000..c227600
--- /dev/null
+++ b/pkg/encoders/json/jsontext/state_test.go
@@ -0,0 +1,396 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.jsonv2
+
+package jsontext
+
+import (
+ "fmt"
+ "slices"
+ "strings"
+ "testing"
+ "unicode/utf8"
+)
+
+func TestPointer(t *testing.T) {
+ tests := []struct {
+ in Pointer
+ wantParent Pointer
+ wantLast string
+ wantTokens []string
+ wantValid bool
+ }{
+ {"", "", "", nil, true},
+ {"a", "", "a", []string{"a"}, false},
+ {"~", "", "~", []string{"~"}, false},
+ {"/a", "", "a", []string{"a"}, true},
+ {"/foo/bar", "/foo", "bar", []string{"foo", "bar"}, true},
+ {"///", "//", "", []string{"", "", ""}, true},
+ {"/~0~1", "", "~/", []string{"~/"}, true},
+ {"/\xde\xad\xbe\xef", "", "\xde\xad\xbe\xef", []string{"\xde\xad\xbe\xef"}, false},
+ }
+ for _, tt := range tests {
+ if got := tt.in.Parent(); got != tt.wantParent {
+ t.Errorf("Pointer(%q).Parent = %q, want %q", tt.in, got, tt.wantParent)
+ }
+ if got := tt.in.LastToken(); got != tt.wantLast {
+ t.Errorf("Pointer(%q).Last = %q, want %q", tt.in, got, tt.wantLast)
+ }
+ if strings.HasPrefix(string(tt.in), "/") {
+ wantRoundtrip := tt.in
+ if !utf8.ValidString(string(wantRoundtrip)) {
+ // Replace bytes of invalid UTF-8 with Unicode replacement character.
+ wantRoundtrip = Pointer([]rune(wantRoundtrip))
+ }
+ if got := tt.in.Parent().AppendToken(tt.in.LastToken()); got != wantRoundtrip {
+ t.Errorf("Pointer(%q).Parent().AppendToken(LastToken()) = %q, want %q", tt.in, got, tt.in)
+ }
+ in := tt.in
+ for {
+ if (in + "x").Contains(tt.in) {
+ t.Errorf("Pointer(%q).Contains(%q) = true, want false", in+"x", tt.in)
+ }
+ if !in.Contains(tt.in) {
+ t.Errorf("Pointer(%q).Contains(%q) = false, want true", in, tt.in)
+ }
+ if in == in.Parent() {
+ break
+ }
+ in = in.Parent()
+ }
+ }
+ if got := slices.Collect(tt.in.Tokens()); !slices.Equal(got, tt.wantTokens) {
+ t.Errorf("Pointer(%q).Tokens = %q, want %q", tt.in, got, tt.wantTokens)
+ }
+ if got := tt.in.IsValid(); got != tt.wantValid {
+ t.Errorf("Pointer(%q).IsValid = %v, want %v", tt.in, got, tt.wantValid)
+ }
+ }
+}
+
+func TestStateMachine(t *testing.T) {
+ // To test a state machine, we pass an ordered sequence of operations and
+ // check whether the current state is as expected.
+ // The operation type is a union type of various possible operations,
+ // which either call mutating methods on the state machine or
+ // call accessor methods on state machine and verify the results.
+ type operation any
+ type (
+ // stackLengths checks the results of stateEntry.length accessors.
+ stackLengths []int64
+
+ // appendTokens is sequence of token kinds to append where
+ // none of them are expected to fail.
+ //
+ // For example: `[nft]` is equivalent to the following sequence:
+ //
+ // pushArray()
+ // appendLiteral()
+ // appendString()
+ // appendNumber()
+ // popArray()
+ //
+ appendTokens string
+
+ // appendToken is a single token kind to append with the expected error.
+ appendToken struct {
+ kind Kind
+ want error
+ }
+
+ // needDelim checks the result of the needDelim accessor.
+ needDelim struct {
+ next Kind
+ want byte
+ }
+ )
+
+ // Each entry is a sequence of tokens to pass to the state machine.
+ tests := []struct {
+ label string
+ ops []operation
+ }{{
+ "TopLevelValues",
+ []operation{
+ stackLengths{0},
+ needDelim{'n', 0},
+ appendTokens(`nft`),
+ stackLengths{3},
+ needDelim{'"', 0},
+ appendTokens(`"0[]{}`),
+ stackLengths{7},
+ },
+ }, {
+ "ArrayValues",
+ []operation{
+ stackLengths{0},
+ needDelim{'[', 0},
+ appendTokens(`[`),
+ stackLengths{1, 0},
+ needDelim{'n', 0},
+ appendTokens(`nft`),
+ stackLengths{1, 3},
+ needDelim{'"', ','},
+ appendTokens(`"0[]{}`),
+ stackLengths{1, 7},
+ needDelim{']', 0},
+ appendTokens(`]`),
+ stackLengths{1},
+ },
+ }, {
+ "ObjectValues",
+ []operation{
+ stackLengths{0},
+ needDelim{'{', 0},
+ appendTokens(`{`),
+ stackLengths{1, 0},
+ needDelim{'"', 0},
+ appendTokens(`"`),
+ stackLengths{1, 1},
+ needDelim{'n', ':'},
+ appendTokens(`n`),
+ stackLengths{1, 2},
+ needDelim{'"', ','},
+ appendTokens(`"f"t`),
+ stackLengths{1, 6},
+ appendTokens(`"""0"[]"{}`),
+ stackLengths{1, 14},
+ needDelim{'}', 0},
+ appendTokens(`}`),
+ stackLengths{1},
+ },
+ }, {
+ "ObjectCardinality",
+ []operation{
+ appendTokens(`{`),
+
+ // Appending any kind other than string for object name is an error.
+ appendToken{'n', ErrNonStringName},
+ appendToken{'f', ErrNonStringName},
+ appendToken{'t', ErrNonStringName},
+ appendToken{'0', ErrNonStringName},
+ appendToken{'{', ErrNonStringName},
+ appendToken{'[', ErrNonStringName},
+ appendTokens(`"`),
+
+ // Appending '}' without first appending any value is an error.
+ appendToken{'}', errMissingValue},
+ appendTokens(`"`),
+
+ appendTokens(`}`),
+ },
+ }, {
+ "MismatchingDelims",
+ []operation{
+ appendToken{'}', errMismatchDelim}, // appending '}' without preceding '{'
+ appendTokens(`[[{`),
+ appendToken{']', errMismatchDelim}, // appending ']' that mismatches preceding '{'
+ appendTokens(`}]`),
+ appendToken{'}', errMismatchDelim}, // appending '}' that mismatches preceding '['
+ appendTokens(`]`),
+ appendToken{']', errMismatchDelim}, // appending ']' without preceding '['
+ },
+ }}
+
+ for _, tt := range tests {
+ t.Run(tt.label, func(t *testing.T) {
+ // Flatten appendTokens to sequence of appendToken entries.
+ var ops []operation
+ for _, op := range tt.ops {
+ if toks, ok := op.(appendTokens); ok {
+ for _, k := range []byte(toks) {
+ ops = append(ops, appendToken{Kind(k), nil})
+ }
+ continue
+ }
+ ops = append(ops, op)
+ }
+
+ // Append each token to the state machine and check the output.
+ var state stateMachine
+ state.reset()
+ var sequence []Kind
+ for _, op := range ops {
+ switch op := op.(type) {
+ case stackLengths:
+ var got []int64
+ for i := range state.Depth() {
+ e := state.index(i)
+ got = append(got, e.Length())
+ }
+ want := []int64(op)
+ if !slices.Equal(got, want) {
+ t.Fatalf("%s: stack lengths mismatch:\ngot %v\nwant %v", sequence, got, want)
+ }
+ case appendToken:
+ got := state.append(op.kind)
+ if !equalError(got, op.want) {
+ t.Fatalf("%s: append('%c') = %v, want %v", sequence, op.kind, got, op.want)
+ }
+ if got == nil {
+ sequence = append(sequence, op.kind)
+ }
+ case needDelim:
+ if got := state.needDelim(op.next); got != op.want {
+ t.Fatalf("%s: needDelim('%c') = '%c', want '%c'", sequence, op.next, got, op.want)
+ }
+ default:
+ panic(fmt.Sprintf("unknown operation: %T", op))
+ }
+ }
+ })
+ }
+}
+
+// append is a thin wrapper over the other append, pop, or push methods
+// based on the token kind.
+func (s *stateMachine) append(k Kind) error {
+ switch k {
+ case 'n', 'f', 't':
+ return s.appendLiteral()
+ case '"':
+ return s.appendString()
+ case '0':
+ return s.appendNumber()
+ case '{':
+ return s.pushObject()
+ case '}':
+ return s.popObject()
+ case '[':
+ return s.pushArray()
+ case ']':
+ return s.popArray()
+ default:
+ panic(fmt.Sprintf("invalid token kind: '%c'", k))
+ }
+}
+
+func TestObjectNamespace(t *testing.T) {
+ type operation any
+ type (
+ insert struct {
+ name string
+ wantInserted bool
+ }
+ removeLast struct{}
+ )
+
+ // Sequence of insert operations to perform (order matters).
+ ops := []operation{
+ insert{`""`, true},
+ removeLast{},
+ insert{`""`, true},
+ insert{`""`, false},
+
+ // Test insertion of the same name with different formatting.
+ insert{`"alpha"`, true},
+ insert{`"ALPHA"`, true}, // case-sensitive matching
+ insert{`"alpha"`, false},
+ insert{`"\u0061\u006c\u0070\u0068\u0061"`, false}, // unescapes to "alpha"
+ removeLast{}, // removes "ALPHA"
+ insert{`"alpha"`, false},
+ removeLast{}, // removes "alpha"
+ insert{`"alpha"`, true},
+ removeLast{},
+
+ // Bulk insert simple names.
+ insert{`"alpha"`, true},
+ insert{`"bravo"`, true},
+ insert{`"charlie"`, true},
+ insert{`"delta"`, true},
+ insert{`"echo"`, true},
+ insert{`"foxtrot"`, true},
+ insert{`"golf"`, true},
+ insert{`"hotel"`, true},
+ insert{`"india"`, true},
+ insert{`"juliet"`, true},
+ insert{`"kilo"`, true},
+ insert{`"lima"`, true},
+ insert{`"mike"`, true},
+ insert{`"november"`, true},
+ insert{`"oscar"`, true},
+ insert{`"papa"`, true},
+ insert{`"quebec"`, true},
+ insert{`"romeo"`, true},
+ insert{`"sierra"`, true},
+ insert{`"tango"`, true},
+ insert{`"uniform"`, true},
+ insert{`"victor"`, true},
+ insert{`"whiskey"`, true},
+ insert{`"xray"`, true},
+ insert{`"yankee"`, true},
+ insert{`"zulu"`, true},
+
+ // Test insertion of invalid UTF-8.
+ insert{`"` + "\ufffd" + `"`, true},
+ insert{`"` + "\ufffd" + `"`, false},
+ insert{`"\ufffd"`, false}, // unescapes to Unicode replacement character
+ insert{`"\uFFFD"`, false}, // unescapes to Unicode replacement character
+ insert{`"` + "\xff" + `"`, false}, // mangles as Unicode replacement character
+ removeLast{},
+ insert{`"` + "\ufffd" + `"`, true},
+
+ // Test insertion of unicode characters.
+ insert{`"☺☻☹"`, true},
+ insert{`"☺☻☹"`, false},
+ removeLast{},
+ insert{`"☺☻☹"`, true},
+ }
+
+ // Execute the sequence of operations twice:
+ // 1) on a fresh namespace and 2) on a namespace that has been reset.
+ var ns objectNamespace
+ wantNames := []string{}
+ for _, reset := range []bool{false, true} {
+ if reset {
+ ns.reset()
+ wantNames = nil
+ }
+
+ // Execute the operations and ensure the state is consistent.
+ for i, op := range ops {
+ switch op := op.(type) {
+ case insert:
+ gotInserted := ns.insertQuoted([]byte(op.name), false)
+ if gotInserted != op.wantInserted {
+ t.Fatalf("%d: objectNamespace{%v}.insert(%v) = %v, want %v", i, strings.Join(wantNames, " "), op.name, gotInserted, op.wantInserted)
+ }
+ if gotInserted {
+ b, _ := AppendUnquote(nil, []byte(op.name))
+ wantNames = append(wantNames, string(b))
+ }
+ case removeLast:
+ ns.removeLast()
+ wantNames = wantNames[:len(wantNames)-1]
+ default:
+ panic(fmt.Sprintf("unknown operation: %T", op))
+ }
+
+ // Check that the namespace is consistent.
+ gotNames := []string{}
+ for i := range ns.length() {
+ gotNames = append(gotNames, string(ns.getUnquoted(i)))
+ }
+ if !slices.Equal(gotNames, wantNames) {
+ t.Fatalf("%d: objectNamespace = {%v}, want {%v}", i, strings.Join(gotNames, " "), strings.Join(wantNames, " "))
+ }
+ }
+
+ // Verify that we have not switched to using a Go map.
+ if ns.mapNames != nil {
+ t.Errorf("objectNamespace.mapNames = non-nil, want nil")
+ }
+
+ // Insert a large number of names.
+ for i := range 64 {
+ ns.InsertUnquoted([]byte(fmt.Sprintf(`name%d`, i)))
+ }
+
+ // Verify that we did switch to using a Go map.
+ if ns.mapNames == nil {
+ t.Errorf("objectNamespace.mapNames = nil, want non-nil")
+ }
+ }
+}
diff --git a/pkg/encoders/json/jsontext/token.go b/pkg/encoders/json/jsontext/token.go
new file mode 100644
index 0000000..e78c3f8
--- /dev/null
+++ b/pkg/encoders/json/jsontext/token.go
@@ -0,0 +1,527 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.jsonv2
+
+package jsontext
+
+import (
+ "bytes"
+ "errors"
+ "math"
+ "strconv"
+
+ "encoding/json/internal/jsonflags"
+ "encoding/json/internal/jsonwire"
+)
+
+// NOTE: Token is analogous to v1 json.Token.
+
+const (
+ maxInt64 = math.MaxInt64
+ minInt64 = math.MinInt64
+ maxUint64 = math.MaxUint64
+ minUint64 = 0 // for consistency and readability purposes
+
+ invalidTokenPanic = "invalid jsontext.Token; it has been voided by a subsequent json.Decoder call"
+)
+
+var errInvalidToken = errors.New("invalid jsontext.Token")
+
+// Token represents a lexical JSON token, which may be one of the following:
+// - a JSON literal (i.e., null, true, or false)
+// - a JSON string (e.g., "hello, world!")
+// - a JSON number (e.g., 123.456)
+// - a begin or end delimiter for a JSON object (i.e., { or } )
+// - a begin or end delimiter for a JSON array (i.e., [ or ] )
+//
+// A Token cannot represent entire array or object values, while a [Value] can.
+// There is no Token to represent commas and colons since
+// these structural tokens can be inferred from the surrounding context.
+type Token struct {
+ nonComparable
+
+ // Tokens can exist in either a "raw" or an "exact" form.
+ // Tokens produced by the Decoder are in the "raw" form.
+ // Tokens returned by constructors are usually in the "exact" form.
+ // The Encoder accepts Tokens in either the "raw" or "exact" form.
+ //
+ // The following chart shows the possible values for each Token type:
+ // ╔═════════════════╦════════════╤════════════╤════════════╗
+ // ║ Token type ║ raw field │ str field │ num field ║
+ // ╠═════════════════╬════════════╪════════════╪════════════╣
+ // ║ null (raw) ║ "null" │ "" │ 0 ║
+ // ║ false (raw) ║ "false" │ "" │ 0 ║
+ // ║ true (raw) ║ "true" │ "" │ 0 ║
+ // ║ string (raw) ║ non-empty │ "" │ offset ║
+ // ║ string (string) ║ nil │ non-empty │ 0 ║
+ // ║ number (raw) ║ non-empty │ "" │ offset ║
+ // ║ number (float) ║ nil │ "f" │ non-zero ║
+ // ║ number (int64) ║ nil │ "i" │ non-zero ║
+ // ║ number (uint64) ║ nil │ "u" │ non-zero ║
+ // ║ object (delim) ║ "{" or "}" │ "" │ 0 ║
+ // ║ array (delim) ║ "[" or "]" │ "" │ 0 ║
+ // ╚═════════════════╩════════════╧════════════╧════════════╝
+ //
+ // Notes:
+ // - For tokens stored in "raw" form, the num field contains the
+ // absolute offset determined by raw.previousOffsetStart().
+ // The buffer itself is stored in raw.previousBuffer().
+ // - JSON literals and structural characters are always in the "raw" form.
+ // - JSON strings and numbers can be in either "raw" or "exact" forms.
+ // - The exact zero value of JSON strings and numbers in the "exact" forms
+ // have ambiguous representation. Thus, they are always represented
+ // in the "raw" form.
+
+ // raw contains a reference to the raw decode buffer.
+ // If non-nil, then its value takes precedence over str and num.
+ // It is only valid if num == raw.previousOffsetStart().
+ raw *decodeBuffer
+
+ // str is the unescaped JSON string if num is zero.
+ // Otherwise, it is "f", "i", or "u" if num should be interpreted
+ // as a float64, int64, or uint64, respectively.
+ str string
+
+ // num is a float64, int64, or uint64 stored as a uint64 value.
+ // It is non-zero for any JSON number in the "exact" form.
+ num uint64
+}
+
+// TODO: Does representing 1-byte delimiters as *decodeBuffer cause performance issues?
+
+var (
+ Null Token = rawToken("null")
+ False Token = rawToken("false")
+ True Token = rawToken("true")
+
+ BeginObject Token = rawToken("{")
+ EndObject Token = rawToken("}")
+ BeginArray Token = rawToken("[")
+ EndArray Token = rawToken("]")
+
+ zeroString Token = rawToken(`""`)
+ zeroNumber Token = rawToken(`0`)
+
+ nanString Token = String("NaN")
+ pinfString Token = String("Infinity")
+ ninfString Token = String("-Infinity")
+)
+
+func rawToken(s string) Token {
+ return Token{raw: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}}
+}
+
+// Bool constructs a Token representing a JSON boolean.
+func Bool(b bool) Token {
+ if b {
+ return True
+ }
+ return False
+}
+
+// String constructs a Token representing a JSON string.
+// The provided string should contain valid UTF-8, otherwise invalid characters
+// may be mangled as the Unicode replacement character.
+func String(s string) Token {
+ if len(s) == 0 {
+ return zeroString
+ }
+ return Token{str: s}
+}
+
+// Float constructs a Token representing a JSON number.
+// The values NaN, +Inf, and -Inf will be represented
+// as a JSON string with the values "NaN", "Infinity", and "-Infinity".
+func Float(n float64) Token {
+ switch {
+ case math.Float64bits(n) == 0:
+ return zeroNumber
+ case math.IsNaN(n):
+ return nanString
+ case math.IsInf(n, +1):
+ return pinfString
+ case math.IsInf(n, -1):
+ return ninfString
+ }
+ return Token{str: "f", num: math.Float64bits(n)}
+}
+
+// Int constructs a Token representing a JSON number from an int64.
+func Int(n int64) Token {
+ if n == 0 {
+ return zeroNumber
+ }
+ return Token{str: "i", num: uint64(n)}
+}
+
+// Uint constructs a Token representing a JSON number from a uint64.
+func Uint(n uint64) Token {
+ if n == 0 {
+ return zeroNumber
+ }
+ return Token{str: "u", num: uint64(n)}
+}
+
+// Clone makes a copy of the Token such that its value remains valid
+// even after a subsequent [Decoder.Read] call.
+func (t Token) Clone() Token {
+ // TODO: Allow caller to avoid any allocations?
+ if raw := t.raw; raw != nil {
+ // Avoid copying globals.
+ if t.raw.prevStart == 0 {
+ switch t.raw {
+ case Null.raw:
+ return Null
+ case False.raw:
+ return False
+ case True.raw:
+ return True
+ case BeginObject.raw:
+ return BeginObject
+ case EndObject.raw:
+ return EndObject
+ case BeginArray.raw:
+ return BeginArray
+ case EndArray.raw:
+ return EndArray
+ }
+ }
+
+ if uint64(raw.previousOffsetStart()) != t.num {
+ panic(invalidTokenPanic)
+ }
+ buf := bytes.Clone(raw.previousBuffer())
+ return Token{raw: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}}
+ }
+ return t
+}
+
+// Bool returns the value for a JSON boolean.
+// It panics if the token kind is not a JSON boolean.
+func (t Token) Bool() bool {
+ switch t.raw {
+ case True.raw:
+ return true
+ case False.raw:
+ return false
+ default:
+ panic("invalid JSON token kind: " + t.Kind().String())
+ }
+}
+
+// appendString appends a JSON string to dst and returns it.
+// It panics if t is not a JSON string.
+func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
+ if raw := t.raw; raw != nil {
+ // Handle raw string value.
+ buf := raw.previousBuffer()
+ if Kind(buf[0]) == '"' {
+ if jsonwire.ConsumeSimpleString(buf) == len(buf) {
+ return append(dst, buf...), nil
+ }
+ dst, _, err := jsonwire.ReformatString(dst, buf, flags)
+ return dst, err
+ }
+ } else if len(t.str) != 0 && t.num == 0 {
+ // Handle exact string value.
+ return jsonwire.AppendQuote(dst, t.str, flags)
+ }
+
+ panic("invalid JSON token kind: " + t.Kind().String())
+}
+
+// String returns the unescaped string value for a JSON string.
+// For other JSON kinds, this returns the raw JSON representation.
+func (t Token) String() string {
+ // This is inlinable to take advantage of "function outlining".
+ // This avoids an allocation for the string(b) conversion
+ // if the caller does not use the string in an escaping manner.
+ // See https://blog.filippo.io/efficient-go-apis-with-the-inliner/
+ s, b := t.string()
+ if len(b) > 0 {
+ return string(b)
+ }
+ return s
+}
+func (t Token) string() (string, []byte) {
+ if raw := t.raw; raw != nil {
+ if uint64(raw.previousOffsetStart()) != t.num {
+ panic(invalidTokenPanic)
+ }
+ buf := raw.previousBuffer()
+ if buf[0] == '"' {
+ // TODO: Preserve ValueFlags in Token?
+ isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf)
+ return "", jsonwire.UnquoteMayCopy(buf, isVerbatim)
+ }
+ // Handle tokens that are not JSON strings for fmt.Stringer.
+ return "", buf
+ }
+ if len(t.str) != 0 && t.num == 0 {
+ return t.str, nil
+ }
+ // Handle tokens that are not JSON strings for fmt.Stringer.
+ if t.num > 0 {
+ switch t.str[0] {
+ case 'f':
+ return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.num), 64)), nil
+ case 'i':
+ return strconv.FormatInt(int64(t.num), 10), nil
+ case 'u':
+ return strconv.FormatUint(uint64(t.num), 10), nil
+ }
+ }
+ return "", nil
+}
+
+// appendNumber appends a JSON number to dst and returns it.
+// It panics if t is not a JSON number.
+func (t Token) appendNumber(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
+ if raw := t.raw; raw != nil {
+ // Handle raw number value.
+ buf := raw.previousBuffer()
+ if Kind(buf[0]).normalize() == '0' {
+ dst, _, err := jsonwire.ReformatNumber(dst, buf, flags)
+ return dst, err
+ }
+ } else if t.num != 0 {
+ // Handle exact number value.
+ switch t.str[0] {
+ case 'f':
+ return jsonwire.AppendFloat(dst, math.Float64frombits(t.num), 64), nil
+ case 'i':
+ return strconv.AppendInt(dst, int64(t.num), 10), nil
+ case 'u':
+ return strconv.AppendUint(dst, uint64(t.num), 10), nil
+ }
+ }
+
+ panic("invalid JSON token kind: " + t.Kind().String())
+}
+
+// Float returns the floating-point value for a JSON number.
+// It returns a NaN, +Inf, or -Inf value for any JSON string
+// with the values "NaN", "Infinity", or "-Infinity".
+// It panics for all other cases.
+func (t Token) Float() float64 {
+ if raw := t.raw; raw != nil {
+ // Handle raw number value.
+ if uint64(raw.previousOffsetStart()) != t.num {
+ panic(invalidTokenPanic)
+ }
+ buf := raw.previousBuffer()
+ if Kind(buf[0]).normalize() == '0' {
+ fv, _ := jsonwire.ParseFloat(buf, 64)
+ return fv
+ }
+ } else if t.num != 0 {
+ // Handle exact number value.
+ switch t.str[0] {
+ case 'f':
+ return math.Float64frombits(t.num)
+ case 'i':
+ return float64(int64(t.num))
+ case 'u':
+ return float64(uint64(t.num))
+ }
+ }
+
+ // Handle string values with "NaN", "Infinity", or "-Infinity".
+ if t.Kind() == '"' {
+ switch t.String() {
+ case "NaN":
+ return math.NaN()
+ case "Infinity":
+ return math.Inf(+1)
+ case "-Infinity":
+ return math.Inf(-1)
+ }
+ }
+
+ panic("invalid JSON token kind: " + t.Kind().String())
+}
+
+// Int returns the signed integer value for a JSON number.
+// The fractional component of any number is ignored (truncation toward zero).
+// Any number beyond the representation of an int64 will be saturated
+// to the closest representable value.
+// It panics if the token kind is not a JSON number.
+func (t Token) Int() int64 {
+ if raw := t.raw; raw != nil {
+ // Handle raw integer value.
+ if uint64(raw.previousOffsetStart()) != t.num {
+ panic(invalidTokenPanic)
+ }
+ neg := false
+ buf := raw.previousBuffer()
+ if len(buf) > 0 && buf[0] == '-' {
+ neg, buf = true, buf[1:]
+ }
+ if numAbs, ok := jsonwire.ParseUint(buf); ok {
+ if neg {
+ if numAbs > -minInt64 {
+ return minInt64
+ }
+ return -1 * int64(numAbs)
+ } else {
+ if numAbs > +maxInt64 {
+ return maxInt64
+ }
+ return +1 * int64(numAbs)
+ }
+ }
+ } else if t.num != 0 {
+ // Handle exact integer value.
+ switch t.str[0] {
+ case 'i':
+ return int64(t.num)
+ case 'u':
+ if t.num > maxInt64 {
+ return maxInt64
+ }
+ return int64(t.num)
+ }
+ }
+
+ // Handle JSON number that is a floating-point value.
+ if t.Kind() == '0' {
+ switch fv := t.Float(); {
+ case fv >= maxInt64:
+ return maxInt64
+ case fv <= minInt64:
+ return minInt64
+ default:
+ return int64(fv) // truncation toward zero
+ }
+ }
+
+ panic("invalid JSON token kind: " + t.Kind().String())
+}
+
+// Uint returns the unsigned integer value for a JSON number.
+// The fractional component of any number is ignored (truncation toward zero).
+// Any number beyond the representation of an uint64 will be saturated
+// to the closest representable value.
+// It panics if the token kind is not a JSON number.
+func (t Token) Uint() uint64 {
+ // NOTE: This accessor returns 0 for any negative JSON number,
+ // which might be surprising, but is at least consistent with the behavior
+ // of saturating out-of-bounds numbers to the closest representable number.
+
+ if raw := t.raw; raw != nil {
+ // Handle raw integer value.
+ if uint64(raw.previousOffsetStart()) != t.num {
+ panic(invalidTokenPanic)
+ }
+ neg := false
+ buf := raw.previousBuffer()
+ if len(buf) > 0 && buf[0] == '-' {
+ neg, buf = true, buf[1:]
+ }
+ if num, ok := jsonwire.ParseUint(buf); ok {
+ if neg {
+ return minUint64
+ }
+ return num
+ }
+ } else if t.num != 0 {
+ // Handle exact integer value.
+ switch t.str[0] {
+ case 'u':
+ return t.num
+ case 'i':
+ if int64(t.num) < minUint64 {
+ return minUint64
+ }
+ return uint64(int64(t.num))
+ }
+ }
+
+ // Handle JSON number that is a floating-point value.
+ if t.Kind() == '0' {
+ switch fv := t.Float(); {
+ case fv >= maxUint64:
+ return maxUint64
+ case fv <= minUint64:
+ return minUint64
+ default:
+ return uint64(fv) // truncation toward zero
+ }
+ }
+
+ panic("invalid JSON token kind: " + t.Kind().String())
+}
+
+// Kind returns the token kind.
+func (t Token) Kind() Kind {
+ switch {
+ case t.raw != nil:
+ raw := t.raw
+ if uint64(raw.previousOffsetStart()) != t.num {
+ panic(invalidTokenPanic)
+ }
+ return Kind(t.raw.buf[raw.prevStart]).normalize()
+ case t.num != 0:
+ return '0'
+ case len(t.str) != 0:
+ return '"'
+ default:
+ return invalidKind
+ }
+}
+
+// Kind represents each possible JSON token kind with a single byte,
+// which is conveniently the first byte of that kind's grammar
+// with the restriction that numbers always be represented with '0':
+//
+// - 'n': null
+// - 'f': false
+// - 't': true
+// - '"': string
+// - '0': number
+// - '{': object begin
+// - '}': object end
+// - '[': array begin
+// - ']': array end
+//
+// An invalid kind is usually represented using 0,
+// but may be non-zero due to invalid JSON data.
+type Kind byte
+
+const invalidKind Kind = 0
+
+// String prints the kind in a humanly readable fashion.
+func (k Kind) String() string {
+ switch k {
+ case 'n':
+ return "null"
+ case 'f':
+ return "false"
+ case 't':
+ return "true"
+ case '"':
+ return "string"
+ case '0':
+ return "number"
+ case '{':
+ return "{"
+ case '}':
+ return "}"
+ case '[':
+ return "["
+ case ']':
+ return "]"
+ default:
+ return ""
+ }
+}
+
+// normalize coalesces all possible starting characters of a number as just '0'.
+func (k Kind) normalize() Kind {
+ if k == '-' || ('0' <= k && k <= '9') {
+ return '0'
+ }
+ return k
+}
diff --git a/pkg/encoders/json/jsontext/token_test.go b/pkg/encoders/json/jsontext/token_test.go
new file mode 100644
index 0000000..ebe324e
--- /dev/null
+++ b/pkg/encoders/json/jsontext/token_test.go
@@ -0,0 +1,168 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.jsonv2
+
+package jsontext
+
+import (
+ "math"
+ "reflect"
+ "testing"
+)
+
+func TestTokenStringAllocations(t *testing.T) {
+ if testing.CoverMode() != "" {
+ t.Skip("coverage mode breaks the compiler optimization this depends on")
+ }
+
+ tok := rawToken(`"hello"`)
+ var m map[string]bool
+ got := int(testing.AllocsPerRun(10, func() {
+ // This function uses tok.String() is a non-escaping manner
+ // (i.e., looking it up in a Go map). It should not allocate.
+ if m[tok.String()] {
+ panic("never executed")
+ }
+ }))
+ if got > 0 {
+ t.Errorf("Token.String allocated %d times, want 0", got)
+ }
+}
+
+func TestTokenAccessors(t *testing.T) {
+ type token struct {
+ Bool bool
+ String string
+ Float float64
+ Int int64
+ Uint uint64
+ Kind Kind
+ }
+
+ tests := []struct {
+ in Token
+ want token
+ }{
+ {Token{}, token{String: ""}},
+ {Null, token{String: "null", Kind: 'n'}},
+ {False, token{Bool: false, String: "false", Kind: 'f'}},
+ {True, token{Bool: true, String: "true", Kind: 't'}},
+ {Bool(false), token{Bool: false, String: "false", Kind: 'f'}},
+ {Bool(true), token{Bool: true, String: "true", Kind: 't'}},
+ {BeginObject, token{String: "{", Kind: '{'}},
+ {EndObject, token{String: "}", Kind: '}'}},
+ {BeginArray, token{String: "[", Kind: '['}},
+ {EndArray, token{String: "]", Kind: ']'}},
+ {String(""), token{String: "", Kind: '"'}},
+ {String("hello, world!"), token{String: "hello, world!", Kind: '"'}},
+ {rawToken(`"hello, world!"`), token{String: "hello, world!", Kind: '"'}},
+ {Float(0), token{String: "0", Float: 0, Int: 0, Uint: 0, Kind: '0'}},
+ {Float(math.Copysign(0, -1)), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}},
+ {Float(math.NaN()), token{String: "NaN", Float: math.NaN(), Int: 0, Uint: 0, Kind: '"'}},
+ {Float(math.Inf(+1)), token{String: "Infinity", Float: math.Inf(+1), Kind: '"'}},
+ {Float(math.Inf(-1)), token{String: "-Infinity", Float: math.Inf(-1), Kind: '"'}},
+ {Int(minInt64), token{String: "-9223372036854775808", Float: minInt64, Int: minInt64, Uint: minUint64, Kind: '0'}},
+ {Int(minInt64 + 1), token{String: "-9223372036854775807", Float: minInt64 + 1, Int: minInt64 + 1, Uint: minUint64, Kind: '0'}},
+ {Int(-1), token{String: "-1", Float: -1, Int: -1, Uint: minUint64, Kind: '0'}},
+ {Int(0), token{String: "0", Float: 0, Int: 0, Uint: 0, Kind: '0'}},
+ {Int(+1), token{String: "1", Float: +1, Int: +1, Uint: +1, Kind: '0'}},
+ {Int(maxInt64 - 1), token{String: "9223372036854775806", Float: maxInt64 - 1, Int: maxInt64 - 1, Uint: maxInt64 - 1, Kind: '0'}},
+ {Int(maxInt64), token{String: "9223372036854775807", Float: maxInt64, Int: maxInt64, Uint: maxInt64, Kind: '0'}},
+ {Uint(minUint64), token{String: "0", Kind: '0'}},
+ {Uint(minUint64 + 1), token{String: "1", Float: minUint64 + 1, Int: minUint64 + 1, Uint: minUint64 + 1, Kind: '0'}},
+ {Uint(maxUint64 - 1), token{String: "18446744073709551614", Float: maxUint64 - 1, Int: maxInt64, Uint: maxUint64 - 1, Kind: '0'}},
+ {Uint(maxUint64), token{String: "18446744073709551615", Float: maxUint64, Int: maxInt64, Uint: maxUint64, Kind: '0'}},
+ {rawToken(`-0`), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}},
+ {rawToken(`1e1000`), token{String: "1e1000", Float: math.MaxFloat64, Int: maxInt64, Uint: maxUint64, Kind: '0'}},
+ {rawToken(`-1e1000`), token{String: "-1e1000", Float: -math.MaxFloat64, Int: minInt64, Uint: minUint64, Kind: '0'}},
+ {rawToken(`0.1`), token{String: "0.1", Float: 0.1, Int: 0, Uint: 0, Kind: '0'}},
+ {rawToken(`0.5`), token{String: "0.5", Float: 0.5, Int: 0, Uint: 0, Kind: '0'}},
+ {rawToken(`0.9`), token{String: "0.9", Float: 0.9, Int: 0, Uint: 0, Kind: '0'}},
+ {rawToken(`1.1`), token{String: "1.1", Float: 1.1, Int: 1, Uint: 1, Kind: '0'}},
+ {rawToken(`-0.1`), token{String: "-0.1", Float: -0.1, Int: 0, Uint: 0, Kind: '0'}},
+ {rawToken(`-0.5`), token{String: "-0.5", Float: -0.5, Int: 0, Uint: 0, Kind: '0'}},
+ {rawToken(`-0.9`), token{String: "-0.9", Float: -0.9, Int: 0, Uint: 0, Kind: '0'}},
+ {rawToken(`-1.1`), token{String: "-1.1", Float: -1.1, Int: -1, Uint: 0, Kind: '0'}},
+ {rawToken(`99999999999999999999`), token{String: "99999999999999999999", Float: 1e20 - 1, Int: maxInt64, Uint: maxUint64, Kind: '0'}},
+ {rawToken(`-99999999999999999999`), token{String: "-99999999999999999999", Float: -1e20 - 1, Int: minInt64, Uint: minUint64, Kind: '0'}},
+ }
+
+ for _, tt := range tests {
+ t.Run("", func(t *testing.T) {
+ got := token{
+ Bool: func() bool {
+ defer func() { recover() }()
+ return tt.in.Bool()
+ }(),
+ String: tt.in.String(),
+ Float: func() float64 {
+ defer func() { recover() }()
+ return tt.in.Float()
+ }(),
+ Int: func() int64 {
+ defer func() { recover() }()
+ return tt.in.Int()
+ }(),
+ Uint: func() uint64 {
+ defer func() { recover() }()
+ return tt.in.Uint()
+ }(),
+ Kind: tt.in.Kind(),
+ }
+
+ if got.Bool != tt.want.Bool {
+ t.Errorf("Token(%s).Bool() = %v, want %v", tt.in, got.Bool, tt.want.Bool)
+ }
+ if got.String != tt.want.String {
+ t.Errorf("Token(%s).String() = %v, want %v", tt.in, got.String, tt.want.String)
+ }
+ if math.Float64bits(got.Float) != math.Float64bits(tt.want.Float) {
+ t.Errorf("Token(%s).Float() = %v, want %v", tt.in, got.Float, tt.want.Float)
+ }
+ if got.Int != tt.want.Int {
+ t.Errorf("Token(%s).Int() = %v, want %v", tt.in, got.Int, tt.want.Int)
+ }
+ if got.Uint != tt.want.Uint {
+ t.Errorf("Token(%s).Uint() = %v, want %v", tt.in, got.Uint, tt.want.Uint)
+ }
+ if got.Kind != tt.want.Kind {
+ t.Errorf("Token(%s).Kind() = %v, want %v", tt.in, got.Kind, tt.want.Kind)
+ }
+ })
+ }
+}
+
+func TestTokenClone(t *testing.T) {
+ tests := []struct {
+ in Token
+ wantExactRaw bool
+ }{
+ {Token{}, true},
+ {Null, true},
+ {False, true},
+ {True, true},
+ {BeginObject, true},
+ {EndObject, true},
+ {BeginArray, true},
+ {EndArray, true},
+ {String("hello, world!"), true},
+ {rawToken(`"hello, world!"`), false},
+ {Float(3.14159), true},
+ {rawToken(`3.14159`), false},
+ }
+
+ for _, tt := range tests {
+ t.Run("", func(t *testing.T) {
+ got := tt.in.Clone()
+ if !reflect.DeepEqual(got, tt.in) {
+ t.Errorf("Token(%s) == Token(%s).Clone() = false, want true", tt.in, tt.in)
+ }
+ gotExactRaw := got.raw == tt.in.raw
+ if gotExactRaw != tt.wantExactRaw {
+ t.Errorf("Token(%s).raw == Token(%s).Clone().raw = %v, want %v", tt.in, tt.in, gotExactRaw, tt.wantExactRaw)
+ }
+ })
+ }
+}
diff --git a/pkg/encoders/json/jsontext/value.go b/pkg/encoders/json/jsontext/value.go
new file mode 100644
index 0000000..a4b06b2
--- /dev/null
+++ b/pkg/encoders/json/jsontext/value.go
@@ -0,0 +1,395 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.jsonv2
+
+package jsontext
+
+import (
+ "bytes"
+ "errors"
+ "io"
+ "slices"
+ "sync"
+
+ "encoding/json/internal/jsonflags"
+ "encoding/json/internal/jsonwire"
+)
+
+// NOTE: Value is analogous to v1 json.RawMessage.
+
+// AppendFormat formats the JSON value in src and appends it to dst
+// according to the specified options.
+// See [Value.Format] for more details about the formatting behavior.
+//
+// The dst and src may overlap.
+// If an error is reported, then the entirety of src is appended to dst.
+func AppendFormat(dst, src []byte, opts ...Options) ([]byte, error) {
+ e := getBufferedEncoder(opts...)
+ defer putBufferedEncoder(e)
+ e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
+ if err := e.s.WriteValue(src); err != nil {
+ return append(dst, src...), err
+ }
+ return append(dst, e.s.Buf...), nil
+}
+
+// Value represents a single raw JSON value, which may be one of the following:
+// - a JSON literal (i.e., null, true, or false)
+// - a JSON string (e.g., "hello, world!")
+// - a JSON number (e.g., 123.456)
+// - an entire JSON object (e.g., {"fizz":"buzz"} )
+// - an entire JSON array (e.g., [1,2,3] )
+//
+// Value can represent entire array or object values, while [Token] cannot.
+// Value may contain leading and/or trailing whitespace.
+type Value []byte
+
+// Clone returns a copy of v.
+func (v Value) Clone() Value {
+ return bytes.Clone(v)
+}
+
+// String returns the string formatting of v.
+func (v Value) String() string {
+ if v == nil {
+ return "null"
+ }
+ return string(v)
+}
+
+// IsValid reports whether the raw JSON value is syntactically valid
+// according to the specified options.
+//
+// By default (if no options are specified), it validates according to RFC 7493.
+// It verifies whether the input is properly encoded as UTF-8,
+// that escape sequences within strings decode to valid Unicode codepoints, and
+// that all names in each object are unique.
+// It does not verify whether numbers are representable within the limits
+// of any common numeric type (e.g., float64, int64, or uint64).
+//
+// Relevant options include:
+// - [AllowDuplicateNames]
+// - [AllowInvalidUTF8]
+//
+// All other options are ignored.
+func (v Value) IsValid(opts ...Options) bool {
+ // TODO: Document support for [WithByteLimit] and [WithDepthLimit].
+ d := getBufferedDecoder(v, opts...)
+ defer putBufferedDecoder(d)
+ _, errVal := d.ReadValue()
+ _, errEOF := d.ReadToken()
+ return errVal == nil && errEOF == io.EOF
+}
+
+// Format formats the raw JSON value in place.
+//
+// By default (if no options are specified), it validates according to RFC 7493
+// and produces the minimal JSON representation, where
+// all whitespace is elided and JSON strings use the shortest encoding.
+//
+// Relevant options include:
+// - [AllowDuplicateNames]
+// - [AllowInvalidUTF8]
+// - [EscapeForHTML]
+// - [EscapeForJS]
+// - [PreserveRawStrings]
+// - [CanonicalizeRawInts]
+// - [CanonicalizeRawFloats]
+// - [ReorderRawObjects]
+// - [SpaceAfterColon]
+// - [SpaceAfterComma]
+// - [Multiline]
+// - [WithIndent]
+// - [WithIndentPrefix]
+//
+// All other options are ignored.
+//
+// It is guaranteed to succeed if the value is valid according to the same options.
+// If the value is already formatted, then the buffer is not mutated.
+func (v *Value) Format(opts ...Options) error {
+ // TODO: Document support for [WithByteLimit] and [WithDepthLimit].
+ return v.format(opts, nil)
+}
+
+// format accepts two []Options to avoid the allocation appending them together.
+// It is equivalent to v.Format(append(opts1, opts2...)...).
+func (v *Value) format(opts1, opts2 []Options) error {
+ e := getBufferedEncoder(opts1...)
+ defer putBufferedEncoder(e)
+ e.s.Join(opts2...)
+ e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
+ if err := e.s.WriteValue(*v); err != nil {
+ return err
+ }
+ if !bytes.Equal(*v, e.s.Buf) {
+ *v = append((*v)[:0], e.s.Buf...)
+ }
+ return nil
+}
+
+// Compact removes all whitespace from the raw JSON value.
+//
+// It does not reformat JSON strings or numbers to use any other representation.
+// To maximize the set of JSON values that can be formatted,
+// this permits values with duplicate names and invalid UTF-8.
+//
+// Compact is equivalent to calling [Value.Format] with the following options:
+// - [AllowDuplicateNames](true)
+// - [AllowInvalidUTF8](true)
+// - [PreserveRawStrings](true)
+//
+// Any options specified by the caller are applied after the initial set
+// and may deliberately override prior options.
+func (v *Value) Compact(opts ...Options) error {
+ return v.format([]Options{
+ AllowDuplicateNames(true),
+ AllowInvalidUTF8(true),
+ PreserveRawStrings(true),
+ }, opts)
+}
+
+// Indent reformats the whitespace in the raw JSON value so that each element
+// in a JSON object or array begins on a indented line according to the nesting.
+//
+// It does not reformat JSON strings or numbers to use any other representation.
+// To maximize the set of JSON values that can be formatted,
+// this permits values with duplicate names and invalid UTF-8.
+//
+// Indent is equivalent to calling [Value.Format] with the following options:
+// - [AllowDuplicateNames](true)
+// - [AllowInvalidUTF8](true)
+// - [PreserveRawStrings](true)
+// - [Multiline](true)
+//
+// Any options specified by the caller are applied after the initial set
+// and may deliberately override prior options.
+func (v *Value) Indent(opts ...Options) error {
+ return v.format([]Options{
+ AllowDuplicateNames(true),
+ AllowInvalidUTF8(true),
+ PreserveRawStrings(true),
+ Multiline(true),
+ }, opts)
+}
+
+// Canonicalize canonicalizes the raw JSON value according to the
+// JSON Canonicalization Scheme (JCS) as defined by RFC 8785
+// where it produces a stable representation of a JSON value.
+//
+// JSON strings are formatted to use their minimal representation,
+// JSON numbers are formatted as double precision numbers according
+// to some stable serialization algorithm.
+// JSON object members are sorted in ascending order by name.
+// All whitespace is removed.
+//
+// The output stability is dependent on the stability of the application data
+// (see RFC 8785, Appendix E). It cannot produce stable output from
+// fundamentally unstable input. For example, if the JSON value
+// contains ephemeral data (e.g., a frequently changing timestamp),
+// then the value is still unstable regardless of whether this is called.
+//
+// Canonicalize is equivalent to calling [Value.Format] with the following options:
+// - [CanonicalizeRawInts](true)
+// - [CanonicalizeRawFloats](true)
+// - [ReorderRawObjects](true)
+//
+// Any options specified by the caller are applied after the initial set
+// and may deliberately override prior options.
+//
+// Note that JCS treats all JSON numbers as IEEE 754 double precision numbers.
+// Any numbers with precision beyond what is representable by that form
+// will lose their precision when canonicalized. For example, integer values
+// beyond ±2⁵³ will lose their precision. To preserve the original representation
+// of JSON integers, additionally set [CanonicalizeRawInts] to false:
+//
+// v.Canonicalize(jsontext.CanonicalizeRawInts(false))
+func (v *Value) Canonicalize(opts ...Options) error {
+ return v.format([]Options{
+ CanonicalizeRawInts(true),
+ CanonicalizeRawFloats(true),
+ ReorderRawObjects(true),
+ }, opts)
+}
+
+// MarshalJSON returns v as the JSON encoding of v.
+// It returns the stored value as the raw JSON output without any validation.
+// If v is nil, then this returns a JSON null.
+func (v Value) MarshalJSON() ([]byte, error) {
+ // NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON.
+ if v == nil {
+ return []byte("null"), nil
+ }
+ return v, nil
+}
+
+// UnmarshalJSON sets v as the JSON encoding of b.
+// It stores a copy of the provided raw JSON input without any validation.
+func (v *Value) UnmarshalJSON(b []byte) error {
+ // NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON.
+ if v == nil {
+ return errors.New("jsontext.Value: UnmarshalJSON on nil pointer")
+ }
+ *v = append((*v)[:0], b...)
+ return nil
+}
+
+// Kind returns the starting token kind.
+// For a valid value, this will never include '}' or ']'.
+func (v Value) Kind() Kind {
+ if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 {
+ return Kind(v[0]).normalize()
+ }
+ return invalidKind
+}
+
+const commaAndWhitespace = ", \n\r\t"
+
+type objectMember struct {
+ // name is the unquoted name.
+ name []byte // e.g., "name"
+ // buffer is the entirety of the raw JSON object member
+ // starting from right after the previous member (or opening '{')
+ // until right after the member value.
+ buffer []byte // e.g., `, \n\r\t"name": "value"`
+}
+
+func (x objectMember) Compare(y objectMember) int {
+ if c := jsonwire.CompareUTF16(x.name, y.name); c != 0 {
+ return c
+ }
+ // With [AllowDuplicateNames] or [AllowInvalidUTF8],
+ // names could be identical, so also sort using the member value.
+ return jsonwire.CompareUTF16(
+ bytes.TrimLeft(x.buffer, commaAndWhitespace),
+ bytes.TrimLeft(y.buffer, commaAndWhitespace))
+}
+
+var objectMemberPool = sync.Pool{New: func() any { return new([]objectMember) }}
+
+func getObjectMembers() *[]objectMember {
+ ns := objectMemberPool.Get().(*[]objectMember)
+ *ns = (*ns)[:0]
+ return ns
+}
+func putObjectMembers(ns *[]objectMember) {
+ if cap(*ns) < 1<<10 {
+ clear(*ns) // avoid pinning name and buffer
+ objectMemberPool.Put(ns)
+ }
+}
+
+// mustReorderObjects reorders in-place all object members in a JSON value,
+// which must be valid otherwise it panics.
+func mustReorderObjects(b []byte) {
+ // Obtain a buffered encoder just to use its internal buffer as
+ // a scratch buffer for reordering object members.
+ e2 := getBufferedEncoder()
+ defer putBufferedEncoder(e2)
+
+ // Disable unnecessary checks to syntactically parse the JSON value.
+ d := getBufferedDecoder(b)
+ defer putBufferedDecoder(d)
+ d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1)
+ mustReorderObjectsFromDecoder(d, &e2.s.Buf) // per RFC 8785, section 3.2.3
+}
+
+// mustReorderObjectsFromDecoder recursively reorders all object members in place
+// according to the ordering specified in RFC 8785, section 3.2.3.
+//
+// Pre-conditions:
+// - The value is valid (i.e., no decoder errors should ever occur).
+// - Initial call is provided a Decoder reading from the start of v.
+//
+// Post-conditions:
+// - Exactly one JSON value is read from the Decoder.
+// - All fully-parsed JSON objects are reordered by directly moving
+// the members in the value buffer.
+//
+// The runtime is approximately O(n·log(n)) + O(m·log(m)),
+// where n is len(v) and m is the total number of object members.
+func mustReorderObjectsFromDecoder(d *Decoder, scratch *[]byte) {
+ switch tok, err := d.ReadToken(); tok.Kind() {
+ case '{':
+ // Iterate and collect the name and offsets for every object member.
+ members := getObjectMembers()
+ defer putObjectMembers(members)
+ var prevMember objectMember
+ isSorted := true
+
+ beforeBody := d.InputOffset() // offset after '{'
+ for d.PeekKind() != '}' {
+ beforeName := d.InputOffset()
+ var flags jsonwire.ValueFlags
+ name, _ := d.s.ReadValue(&flags)
+ name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim())
+ mustReorderObjectsFromDecoder(d, scratch)
+ afterValue := d.InputOffset()
+
+ currMember := objectMember{name, d.s.buf[beforeName:afterValue]}
+ if isSorted && len(*members) > 0 {
+ isSorted = objectMember.Compare(prevMember, currMember) < 0
+ }
+ *members = append(*members, currMember)
+ prevMember = currMember
+ }
+ afterBody := d.InputOffset() // offset before '}'
+ d.ReadToken()
+
+ // Sort the members; return early if it's already sorted.
+ if isSorted {
+ return
+ }
+ firstBufferBeforeSorting := (*members)[0].buffer
+ slices.SortFunc(*members, objectMember.Compare)
+ firstBufferAfterSorting := (*members)[0].buffer
+
+ // Append the reordered members to a new buffer,
+ // then copy the reordered members back over the original members.
+ // Avoid swapping in place since each member may be a different size
+ // where moving a member over a smaller member may corrupt the data
+ // for subsequent members before they have been moved.
+ //
+ // The following invariant must hold:
+ // sum([m.after-m.before for m in members]) == afterBody-beforeBody
+ commaAndWhitespacePrefix := func(b []byte) []byte {
+ return b[:len(b)-len(bytes.TrimLeft(b, commaAndWhitespace))]
+ }
+ sorted := (*scratch)[:0]
+ for i, member := range *members {
+ switch {
+ case i == 0 && &member.buffer[0] != &firstBufferBeforeSorting[0]:
+ // First member after sorting is not the first member before sorting,
+ // so use the prefix of the first member before sorting.
+ sorted = append(sorted, commaAndWhitespacePrefix(firstBufferBeforeSorting)...)
+ sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
+ case i != 0 && &member.buffer[0] == &firstBufferBeforeSorting[0]:
+ // Later member after sorting is the first member before sorting,
+ // so use the prefix of the first member after sorting.
+ sorted = append(sorted, commaAndWhitespacePrefix(firstBufferAfterSorting)...)
+ sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
+ default:
+ sorted = append(sorted, member.buffer...)
+ }
+ }
+ if int(afterBody-beforeBody) != len(sorted) {
+ panic("BUG: length invariant violated")
+ }
+ copy(d.s.buf[beforeBody:afterBody], sorted)
+
+ // Update scratch buffer to the largest amount ever used.
+ if len(sorted) > len(*scratch) {
+ *scratch = sorted
+ }
+ case '[':
+ for d.PeekKind() != ']' {
+ mustReorderObjectsFromDecoder(d, scratch)
+ }
+ d.ReadToken()
+ default:
+ if err != nil {
+ panic("BUG: " + err.Error())
+ }
+ }
+}
diff --git a/pkg/encoders/json/jsontext/value_test.go b/pkg/encoders/json/jsontext/value_test.go
new file mode 100644
index 0000000..184a27d
--- /dev/null
+++ b/pkg/encoders/json/jsontext/value_test.go
@@ -0,0 +1,200 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.jsonv2
+
+package jsontext
+
+import (
+ "io"
+ "strings"
+ "testing"
+
+ "encoding/json/internal/jsontest"
+ "encoding/json/internal/jsonwire"
+)
+
+type valueTestdataEntry struct {
+ name jsontest.CaseName
+ in string
+ wantValid bool
+ wantCompacted string
+ wantCompactErr error // implies wantCompacted is in
+ wantIndented string // wantCompacted if empty; uses "\t" for indent prefix and " " for indent
+ wantIndentErr error // implies wantCompacted is in
+ wantCanonicalized string // wantCompacted if empty
+ wantCanonicalizeErr error // implies wantCompacted is in
+}
+
+var valueTestdata = append(func() (out []valueTestdataEntry) {
+ // Initialize valueTestdata from coderTestdata.
+ for _, td := range coderTestdata {
+ // NOTE: The Compact method preserves the raw formatting of strings,
+ // while the Encoder (by default) does not.
+ if td.name.Name == "ComplicatedString" {
+ td.outCompacted = strings.TrimSpace(td.in)
+ }
+ out = append(out, valueTestdataEntry{
+ name: td.name,
+ in: td.in,
+ wantValid: true,
+ wantCompacted: td.outCompacted,
+ wantIndented: td.outIndented,
+ wantCanonicalized: td.outCanonicalized,
+ })
+ }
+ return out
+}(), []valueTestdataEntry{{
+ name: jsontest.Name("RFC8785/Primitives"),
+ in: `{
+ "numbers": [333333333.33333329, 1E30, 4.50,
+ 2e-3, 0.000000000000000000000000001, -0],
+ "string": "\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/",
+ "literals": [null, true, false]
+ }`,
+ wantValid: true,
+ wantCompacted: `{"numbers":[333333333.33333329,1E30,4.50,2e-3,0.000000000000000000000000001,-0],"string":"\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/","literals":[null,true,false]}`,
+ wantIndented: `{
+ "numbers": [
+ 333333333.33333329,
+ 1E30,
+ 4.50,
+ 2e-3,
+ 0.000000000000000000000000001,
+ -0
+ ],
+ "string": "\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/",
+ "literals": [
+ null,
+ true,
+ false
+ ]
+ }`,
+ wantCanonicalized: `{"literals":[null,true,false],"numbers":[333333333.3333333,1e+30,4.5,0.002,1e-27,0],"string":"€$\u000f\nA'B\"\\\\\"/"}`,
+}, {
+ name: jsontest.Name("RFC8785/ObjectOrdering"),
+ in: `{
+ "\u20ac": "Euro Sign",
+ "\r": "Carriage Return",
+ "\ufb33": "Hebrew Letter Dalet With Dagesh",
+ "1": "One",
+ "\ud83d\ude00": "Emoji: Grinning Face",
+ "\u0080": "Control",
+ "\u00f6": "Latin Small Letter O With Diaeresis"
+ }`,
+ wantValid: true,
+ wantCompacted: `{"\u20ac":"Euro Sign","\r":"Carriage Return","\ufb33":"Hebrew Letter Dalet With Dagesh","1":"One","\ud83d\ude00":"Emoji: Grinning Face","\u0080":"Control","\u00f6":"Latin Small Letter O With Diaeresis"}`,
+ wantIndented: `{
+ "\u20ac": "Euro Sign",
+ "\r": "Carriage Return",
+ "\ufb33": "Hebrew Letter Dalet With Dagesh",
+ "1": "One",
+ "\ud83d\ude00": "Emoji: Grinning Face",
+ "\u0080": "Control",
+ "\u00f6": "Latin Small Letter O With Diaeresis"
+ }`,
+ wantCanonicalized: `{"\r":"Carriage Return","1":"One","":"Control","ö":"Latin Small Letter O With Diaeresis","€":"Euro Sign","😀":"Emoji: Grinning Face","דּ":"Hebrew Letter Dalet With Dagesh"}`,
+}, {
+ name: jsontest.Name("LargeIntegers"),
+ in: ` [ -9223372036854775808 , 9223372036854775807 ] `,
+ wantValid: true,
+ wantCompacted: `[-9223372036854775808,9223372036854775807]`,
+ wantIndented: `[
+ -9223372036854775808,
+ 9223372036854775807
+ ]`,
+ wantCanonicalized: `[-9223372036854776000,9223372036854776000]`, // NOTE: Loss of precision due to numbers being treated as floats.
+}, {
+ name: jsontest.Name("InvalidUTF8"),
+ in: ` "living` + "\xde\xad\xbe\xef" + `\ufffd�" `,
+ wantValid: false, // uses RFC 7493 as the definition; which validates UTF-8
+ wantCompacted: `"living` + "\xde\xad\xbe\xef" + `\ufffd�"`,
+ wantCanonicalizeErr: E(jsonwire.ErrInvalidUTF8).withPos(` "living`+"\xde\xad", ""),
+}, {
+ name: jsontest.Name("InvalidUTF8/SurrogateHalf"),
+ in: `"\ud800"`,
+ wantValid: false, // uses RFC 7493 as the definition; which validates UTF-8
+ wantCompacted: `"\ud800"`,
+ wantCanonicalizeErr: newInvalidEscapeSequenceError(`\ud800"`).withPos(`"`, ""),
+}, {
+ name: jsontest.Name("UppercaseEscaped"),
+ in: `"\u000B"`,
+ wantValid: true,
+ wantCompacted: `"\u000B"`,
+ wantCanonicalized: `"\u000b"`,
+}, {
+ name: jsontest.Name("DuplicateNames"),
+ in: ` { "0" : 0 , "1" : 1 , "0" : 0 }`,
+ wantValid: false, // uses RFC 7493 as the definition; which does check for object uniqueness
+ wantCompacted: `{"0":0,"1":1,"0":0}`,
+ wantIndented: `{
+ "0": 0,
+ "1": 1,
+ "0": 0
+ }`,
+ wantCanonicalizeErr: E(ErrDuplicateName).withPos(` { "0" : 0 , "1" : 1 , `, "/0"),
+}, {
+ name: jsontest.Name("Whitespace"),
+ in: " \n\r\t",
+ wantValid: false,
+ wantCompacted: " \n\r\t",
+ wantCompactErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""),
+ wantIndentErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""),
+ wantCanonicalizeErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""),
+}}...)
+
+func TestValueMethods(t *testing.T) {
+ for _, td := range valueTestdata {
+ t.Run(td.name.Name, func(t *testing.T) {
+ if td.wantIndented == "" {
+ td.wantIndented = td.wantCompacted
+ }
+ if td.wantCanonicalized == "" {
+ td.wantCanonicalized = td.wantCompacted
+ }
+ if td.wantCompactErr != nil {
+ td.wantCompacted = td.in
+ }
+ if td.wantIndentErr != nil {
+ td.wantIndented = td.in
+ }
+ if td.wantCanonicalizeErr != nil {
+ td.wantCanonicalized = td.in
+ }
+
+ v := Value(td.in)
+ gotValid := v.IsValid()
+ if gotValid != td.wantValid {
+ t.Errorf("%s: Value.IsValid = %v, want %v", td.name.Where, gotValid, td.wantValid)
+ }
+
+ gotCompacted := Value(td.in)
+ gotCompactErr := gotCompacted.Compact()
+ if string(gotCompacted) != td.wantCompacted {
+ t.Errorf("%s: Value.Compact = %s, want %s", td.name.Where, gotCompacted, td.wantCompacted)
+ }
+ if !equalError(gotCompactErr, td.wantCompactErr) {
+ t.Errorf("%s: Value.Compact error mismatch:\ngot %v\nwant %v", td.name.Where, gotCompactErr, td.wantCompactErr)
+ }
+
+ gotIndented := Value(td.in)
+ gotIndentErr := gotIndented.Indent(WithIndentPrefix("\t"), WithIndent(" "))
+ if string(gotIndented) != td.wantIndented {
+ t.Errorf("%s: Value.Indent = %s, want %s", td.name.Where, gotIndented, td.wantIndented)
+ }
+ if !equalError(gotIndentErr, td.wantIndentErr) {
+ t.Errorf("%s: Value.Indent error mismatch:\ngot %v\nwant %v", td.name.Where, gotIndentErr, td.wantIndentErr)
+ }
+
+ gotCanonicalized := Value(td.in)
+ gotCanonicalizeErr := gotCanonicalized.Canonicalize()
+ if string(gotCanonicalized) != td.wantCanonicalized {
+ t.Errorf("%s: Value.Canonicalize = %s, want %s", td.name.Where, gotCanonicalized, td.wantCanonicalized)
+ }
+ if !equalError(gotCanonicalizeErr, td.wantCanonicalizeErr) {
+ t.Errorf("%s: Value.Canonicalize error mismatch:\ngot %v\nwant %v", td.name.Where, gotCanonicalizeErr, td.wantCanonicalizeErr)
+ }
+ })
+ }
+}
diff --git a/pkg/encoders/json/number_test.go b/pkg/encoders/json/number_test.go
new file mode 100644
index 0000000..69eccaa
--- /dev/null
+++ b/pkg/encoders/json/number_test.go
@@ -0,0 +1,120 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !goexperiment.jsonv2
+
+package json
+
+import (
+ "regexp"
+ "testing"
+)
+
+func TestNumberIsValid(t *testing.T) {
+ // From: https://stackoverflow.com/a/13340826
+ var jsonNumberRegexp = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`)
+
+ validTests := []string{
+ "0",
+ "-0",
+ "1",
+ "-1",
+ "0.1",
+ "-0.1",
+ "1234",
+ "-1234",
+ "12.34",
+ "-12.34",
+ "12E0",
+ "12E1",
+ "12e34",
+ "12E-0",
+ "12e+1",
+ "12e-34",
+ "-12E0",
+ "-12E1",
+ "-12e34",
+ "-12E-0",
+ "-12e+1",
+ "-12e-34",
+ "1.2E0",
+ "1.2E1",
+ "1.2e34",
+ "1.2E-0",
+ "1.2e+1",
+ "1.2e-34",
+ "-1.2E0",
+ "-1.2E1",
+ "-1.2e34",
+ "-1.2E-0",
+ "-1.2e+1",
+ "-1.2e-34",
+ "0E0",
+ "0E1",
+ "0e34",
+ "0E-0",
+ "0e+1",
+ "0e-34",
+ "-0E0",
+ "-0E1",
+ "-0e34",
+ "-0E-0",
+ "-0e+1",
+ "-0e-34",
+ }
+
+ for _, test := range validTests {
+ if !isValidNumber(test) {
+ t.Errorf("%s should be valid", test)
+ }
+
+ var f float64
+ if err := Unmarshal([]byte(test), &f); err != nil {
+ t.Errorf("%s should be valid but Unmarshal failed: %v", test, err)
+ }
+
+ if !jsonNumberRegexp.MatchString(test) {
+ t.Errorf("%s should be valid but regexp does not match", test)
+ }
+ }
+
+ invalidTests := []string{
+ "",
+ "invalid",
+ "1.0.1",
+ "1..1",
+ "-1-2",
+ "012a42",
+ "01.2",
+ "012",
+ "12E12.12",
+ "1e2e3",
+ "1e+-2",
+ "1e--23",
+ "1e",
+ "e1",
+ "1e+",
+ "1ea",
+ "1a",
+ "1.a",
+ "1.",
+ "01",
+ "1.e1",
+ }
+
+ for _, test := range invalidTests {
+ if isValidNumber(test) {
+ t.Errorf("%s should be invalid", test)
+ }
+
+ var f float64
+ if err := Unmarshal([]byte(test), &f); err == nil {
+ t.Errorf("%s should be invalid but unmarshal wrote %v", test, f)
+ }
+
+ if jsonNumberRegexp.MatchString(test) {
+ t.Errorf("%s should be invalid but matches regexp", test)
+ }
+ }
+}
diff --git a/pkg/encoders/json/scanner.go b/pkg/encoders/json/scanner.go
new file mode 100644
index 0000000..f408618
--- /dev/null
+++ b/pkg/encoders/json/scanner.go
@@ -0,0 +1,612 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !goexperiment.jsonv2
+
+package json
+
+// JSON value parser state machine.
+// Just about at the limit of what is reasonable to write by hand.
+// Some parts are a bit tedious, but overall it nicely factors out the
+// otherwise common code from the multiple scanning functions
+// in this package (Compact, Indent, checkValid, etc).
+//
+// This file starts with two simple examples using the scanner
+// before diving into the scanner itself.
+
+import (
+ "strconv"
+ "sync"
+)
+
+// Valid reports whether data is a valid JSON encoding.
+func Valid(data []byte) bool {
+ scan := newScanner()
+ defer freeScanner(scan)
+ return checkValid(data, scan) == nil
+}
+
+// checkValid verifies that data is valid JSON-encoded data.
+// scan is passed in for use by checkValid to avoid an allocation.
+// checkValid returns nil or a SyntaxError.
+func checkValid(data []byte, scan *scanner) error {
+ scan.reset()
+ for _, c := range data {
+ scan.bytes++
+ if scan.step(scan, c) == scanError {
+ return scan.err
+ }
+ }
+ if scan.eof() == scanError {
+ return scan.err
+ }
+ return nil
+}
+
+// A SyntaxError is a description of a JSON syntax error.
+// [Unmarshal] will return a SyntaxError if the JSON can't be parsed.
+type SyntaxError struct {
+ msg string // description of error
+ Offset int64 // error occurred after reading Offset bytes
+}
+
+func (e *SyntaxError) Error() string { return e.msg }
+
+// A scanner is a JSON scanning state machine.
+// Callers call scan.reset and then pass bytes in one at a time
+// by calling scan.step(&scan, c) for each byte.
+// The return value, referred to as an opcode, tells the
+// caller about significant parsing events like beginning
+// and ending literals, objects, and arrays, so that the
+// caller can follow along if it wishes.
+// The return value scanEnd indicates that a single top-level
+// JSON value has been completed, *before* the byte that
+// just got passed in. (The indication must be delayed in order
+// to recognize the end of numbers: is 123 a whole value or
+// the beginning of 12345e+6?).
+type scanner struct {
+ // The step is a func to be called to execute the next transition.
+ // Also tried using an integer constant and a single func
+ // with a switch, but using the func directly was 10% faster
+ // on a 64-bit Mac Mini, and it's nicer to read.
+ step func(*scanner, byte) int
+
+ // Reached end of top-level value.
+ endTop bool
+
+ // Stack of what we're in the middle of - array values, object keys, object values.
+ parseState []int
+
+ // Error that happened, if any.
+ err error
+
+ // total bytes consumed, updated by decoder.Decode (and deliberately
+ // not set to zero by scan.reset)
+ bytes int64
+}
+
+var scannerPool = sync.Pool{
+ New: func() any {
+ return &scanner{}
+ },
+}
+
+func newScanner() *scanner {
+ scan := scannerPool.Get().(*scanner)
+ // scan.reset by design doesn't set bytes to zero
+ scan.bytes = 0
+ scan.reset()
+ return scan
+}
+
+func freeScanner(scan *scanner) {
+ // Avoid hanging on to too much memory in extreme cases.
+ if len(scan.parseState) > 1024 {
+ scan.parseState = nil
+ }
+ scannerPool.Put(scan)
+}
+
+// These values are returned by the state transition functions
+// assigned to scanner.state and the method scanner.eof.
+// They give details about the current state of the scan that
+// callers might be interested to know about.
+// It is okay to ignore the return value of any particular
+// call to scanner.state: if one call returns scanError,
+// every subsequent call will return scanError too.
+const (
+ // Continue.
+ scanContinue = iota // uninteresting byte
+ scanBeginLiteral // end implied by next result != scanContinue
+ scanBeginObject // begin object
+ scanObjectKey // just finished object key (string)
+ scanObjectValue // just finished non-last object value
+ scanEndObject // end object (implies scanObjectValue if possible)
+ scanBeginArray // begin array
+ scanArrayValue // just finished array value
+ scanEndArray // end array (implies scanArrayValue if possible)
+ scanSkipSpace // space byte; can skip; known to be last "continue" result
+
+ // Stop.
+ scanEnd // top-level value ended *before* this byte; known to be first "stop" result
+ scanError // hit an error, scanner.err.
+)
+
+// These values are stored in the parseState stack.
+// They give the current state of a composite value
+// being scanned. If the parser is inside a nested value
+// the parseState describes the nested state, outermost at entry 0.
+const (
+ parseObjectKey = iota // parsing object key (before colon)
+ parseObjectValue // parsing object value (after colon)
+ parseArrayValue // parsing array value
+)
+
+// This limits the max nesting depth to prevent stack overflow.
+// This is permitted by https://tools.ietf.org/html/rfc7159#section-9
+const maxNestingDepth = 10000
+
+// reset prepares the scanner for use.
+// It must be called before calling s.step.
+func (s *scanner) reset() {
+ s.step = stateBeginValue
+ s.parseState = s.parseState[0:0]
+ s.err = nil
+ s.endTop = false
+}
+
+// eof tells the scanner that the end of input has been reached.
+// It returns a scan status just as s.step does.
+func (s *scanner) eof() int {
+ if s.err != nil {
+ return scanError
+ }
+ if s.endTop {
+ return scanEnd
+ }
+ s.step(s, ' ')
+ if s.endTop {
+ return scanEnd
+ }
+ if s.err == nil {
+ s.err = &SyntaxError{"unexpected end of JSON input", s.bytes}
+ }
+ return scanError
+}
+
+// pushParseState pushes a new parse state newParseState onto the parse stack.
+// an error state is returned if maxNestingDepth was exceeded, otherwise successState is returned.
+func (s *scanner) pushParseState(c byte, newParseState int, successState int) int {
+ s.parseState = append(s.parseState, newParseState)
+ if len(s.parseState) <= maxNestingDepth {
+ return successState
+ }
+ return s.error(c, "exceeded max depth")
+}
+
+// popParseState pops a parse state (already obtained) off the stack
+// and updates s.step accordingly.
+func (s *scanner) popParseState() {
+ n := len(s.parseState) - 1
+ s.parseState = s.parseState[0:n]
+ if n == 0 {
+ s.step = stateEndTop
+ s.endTop = true
+ } else {
+ s.step = stateEndValue
+ }
+}
+
+func isSpace(c byte) bool {
+ return c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n')
+}
+
+// stateBeginValueOrEmpty is the state after reading `[`.
+func stateBeginValueOrEmpty(s *scanner, c byte) int {
+ if isSpace(c) {
+ return scanSkipSpace
+ }
+ if c == ']' {
+ return stateEndValue(s, c)
+ }
+ return stateBeginValue(s, c)
+}
+
+// stateBeginValue is the state at the beginning of the input.
+func stateBeginValue(s *scanner, c byte) int {
+ if isSpace(c) {
+ return scanSkipSpace
+ }
+ switch c {
+ case '{':
+ s.step = stateBeginStringOrEmpty
+ return s.pushParseState(c, parseObjectKey, scanBeginObject)
+ case '[':
+ s.step = stateBeginValueOrEmpty
+ return s.pushParseState(c, parseArrayValue, scanBeginArray)
+ case '"':
+ s.step = stateInString
+ return scanBeginLiteral
+ case '-':
+ s.step = stateNeg
+ return scanBeginLiteral
+ case '0': // beginning of 0.123
+ s.step = state0
+ return scanBeginLiteral
+ case 't': // beginning of true
+ s.step = stateT
+ return scanBeginLiteral
+ case 'f': // beginning of false
+ s.step = stateF
+ return scanBeginLiteral
+ case 'n': // beginning of null
+ s.step = stateN
+ return scanBeginLiteral
+ }
+ if '1' <= c && c <= '9' { // beginning of 1234.5
+ s.step = state1
+ return scanBeginLiteral
+ }
+ return s.error(c, "looking for beginning of value")
+}
+
+// stateBeginStringOrEmpty is the state after reading `{`.
+func stateBeginStringOrEmpty(s *scanner, c byte) int {
+ if isSpace(c) {
+ return scanSkipSpace
+ }
+ if c == '}' {
+ n := len(s.parseState)
+ s.parseState[n-1] = parseObjectValue
+ return stateEndValue(s, c)
+ }
+ return stateBeginString(s, c)
+}
+
+// stateBeginString is the state after reading `{"key": value,`.
+func stateBeginString(s *scanner, c byte) int {
+ if isSpace(c) {
+ return scanSkipSpace
+ }
+ if c == '"' {
+ s.step = stateInString
+ return scanBeginLiteral
+ }
+ return s.error(c, "looking for beginning of object key string")
+}
+
+// stateEndValue is the state after completing a value,
+// such as after reading `{}` or `true` or `["x"`.
+func stateEndValue(s *scanner, c byte) int {
+ n := len(s.parseState)
+ if n == 0 {
+ // Completed top-level before the current byte.
+ s.step = stateEndTop
+ s.endTop = true
+ return stateEndTop(s, c)
+ }
+ if isSpace(c) {
+ s.step = stateEndValue
+ return scanSkipSpace
+ }
+ ps := s.parseState[n-1]
+ switch ps {
+ case parseObjectKey:
+ if c == ':' {
+ s.parseState[n-1] = parseObjectValue
+ s.step = stateBeginValue
+ return scanObjectKey
+ }
+ return s.error(c, "after object key")
+ case parseObjectValue:
+ if c == ',' {
+ s.parseState[n-1] = parseObjectKey
+ s.step = stateBeginString
+ return scanObjectValue
+ }
+ if c == '}' {
+ s.popParseState()
+ return scanEndObject
+ }
+ return s.error(c, "after object key:value pair")
+ case parseArrayValue:
+ if c == ',' {
+ s.step = stateBeginValue
+ return scanArrayValue
+ }
+ if c == ']' {
+ s.popParseState()
+ return scanEndArray
+ }
+ return s.error(c, "after array element")
+ }
+ return s.error(c, "")
+}
+
+// stateEndTop is the state after finishing the top-level value,
+// such as after reading `{}` or `[1,2,3]`.
+// Only space characters should be seen now.
+func stateEndTop(s *scanner, c byte) int {
+ if !isSpace(c) {
+ // Complain about non-space byte on next call.
+ s.error(c, "after top-level value")
+ }
+ return scanEnd
+}
+
+// stateInString is the state after reading `"`.
+func stateInString(s *scanner, c byte) int {
+ if c == '"' {
+ s.step = stateEndValue
+ return scanContinue
+ }
+ if c == '\\' {
+ s.step = stateInStringEsc
+ return scanContinue
+ }
+ if c < 0x20 {
+ return s.error(c, "in string literal")
+ }
+ return scanContinue
+}
+
+// stateInStringEsc is the state after reading `"\` during a quoted string.
+func stateInStringEsc(s *scanner, c byte) int {
+ switch c {
+ case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
+ s.step = stateInString
+ return scanContinue
+ case 'u':
+ s.step = stateInStringEscU
+ return scanContinue
+ }
+ return s.error(c, "in string escape code")
+}
+
+// stateInStringEscU is the state after reading `"\u` during a quoted string.
+func stateInStringEscU(s *scanner, c byte) int {
+ if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
+ s.step = stateInStringEscU1
+ return scanContinue
+ }
+ // numbers
+ return s.error(c, "in \\u hexadecimal character escape")
+}
+
+// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
+func stateInStringEscU1(s *scanner, c byte) int {
+ if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
+ s.step = stateInStringEscU12
+ return scanContinue
+ }
+ // numbers
+ return s.error(c, "in \\u hexadecimal character escape")
+}
+
+// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
+func stateInStringEscU12(s *scanner, c byte) int {
+ if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
+ s.step = stateInStringEscU123
+ return scanContinue
+ }
+ // numbers
+ return s.error(c, "in \\u hexadecimal character escape")
+}
+
+// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
+func stateInStringEscU123(s *scanner, c byte) int {
+ if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
+ s.step = stateInString
+ return scanContinue
+ }
+ // numbers
+ return s.error(c, "in \\u hexadecimal character escape")
+}
+
+// stateNeg is the state after reading `-` during a number.
+func stateNeg(s *scanner, c byte) int {
+ if c == '0' {
+ s.step = state0
+ return scanContinue
+ }
+ if '1' <= c && c <= '9' {
+ s.step = state1
+ return scanContinue
+ }
+ return s.error(c, "in numeric literal")
+}
+
+// state1 is the state after reading a non-zero integer during a number,
+// such as after reading `1` or `100` but not `0`.
+func state1(s *scanner, c byte) int {
+ if '0' <= c && c <= '9' {
+ s.step = state1
+ return scanContinue
+ }
+ return state0(s, c)
+}
+
+// state0 is the state after reading `0` during a number.
+func state0(s *scanner, c byte) int {
+ if c == '.' {
+ s.step = stateDot
+ return scanContinue
+ }
+ if c == 'e' || c == 'E' {
+ s.step = stateE
+ return scanContinue
+ }
+ return stateEndValue(s, c)
+}
+
+// stateDot is the state after reading the integer and decimal point in a number,
+// such as after reading `1.`.
+func stateDot(s *scanner, c byte) int {
+ if '0' <= c && c <= '9' {
+ s.step = stateDot0
+ return scanContinue
+ }
+ return s.error(c, "after decimal point in numeric literal")
+}
+
+// stateDot0 is the state after reading the integer, decimal point, and subsequent
+// digits of a number, such as after reading `3.14`.
+func stateDot0(s *scanner, c byte) int {
+ if '0' <= c && c <= '9' {
+ return scanContinue
+ }
+ if c == 'e' || c == 'E' {
+ s.step = stateE
+ return scanContinue
+ }
+ return stateEndValue(s, c)
+}
+
+// stateE is the state after reading the mantissa and e in a number,
+// such as after reading `314e` or `0.314e`.
+func stateE(s *scanner, c byte) int {
+ if c == '+' || c == '-' {
+ s.step = stateESign
+ return scanContinue
+ }
+ return stateESign(s, c)
+}
+
+// stateESign is the state after reading the mantissa, e, and sign in a number,
+// such as after reading `314e-` or `0.314e+`.
+func stateESign(s *scanner, c byte) int {
+ if '0' <= c && c <= '9' {
+ s.step = stateE0
+ return scanContinue
+ }
+ return s.error(c, "in exponent of numeric literal")
+}
+
+// stateE0 is the state after reading the mantissa, e, optional sign,
+// and at least one digit of the exponent in a number,
+// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
+func stateE0(s *scanner, c byte) int {
+ if '0' <= c && c <= '9' {
+ return scanContinue
+ }
+ return stateEndValue(s, c)
+}
+
+// stateT is the state after reading `t`.
+func stateT(s *scanner, c byte) int {
+ if c == 'r' {
+ s.step = stateTr
+ return scanContinue
+ }
+ return s.error(c, "in literal true (expecting 'r')")
+}
+
+// stateTr is the state after reading `tr`.
+func stateTr(s *scanner, c byte) int {
+ if c == 'u' {
+ s.step = stateTru
+ return scanContinue
+ }
+ return s.error(c, "in literal true (expecting 'u')")
+}
+
+// stateTru is the state after reading `tru`.
+func stateTru(s *scanner, c byte) int {
+ if c == 'e' {
+ s.step = stateEndValue
+ return scanContinue
+ }
+ return s.error(c, "in literal true (expecting 'e')")
+}
+
+// stateF is the state after reading `f`.
+func stateF(s *scanner, c byte) int {
+ if c == 'a' {
+ s.step = stateFa
+ return scanContinue
+ }
+ return s.error(c, "in literal false (expecting 'a')")
+}
+
+// stateFa is the state after reading `fa`.
+func stateFa(s *scanner, c byte) int {
+ if c == 'l' {
+ s.step = stateFal
+ return scanContinue
+ }
+ return s.error(c, "in literal false (expecting 'l')")
+}
+
+// stateFal is the state after reading `fal`.
+func stateFal(s *scanner, c byte) int {
+ if c == 's' {
+ s.step = stateFals
+ return scanContinue
+ }
+ return s.error(c, "in literal false (expecting 's')")
+}
+
+// stateFals is the state after reading `fals`.
+func stateFals(s *scanner, c byte) int {
+ if c == 'e' {
+ s.step = stateEndValue
+ return scanContinue
+ }
+ return s.error(c, "in literal false (expecting 'e')")
+}
+
+// stateN is the state after reading `n`.
+func stateN(s *scanner, c byte) int {
+ if c == 'u' {
+ s.step = stateNu
+ return scanContinue
+ }
+ return s.error(c, "in literal null (expecting 'u')")
+}
+
+// stateNu is the state after reading `nu`.
+func stateNu(s *scanner, c byte) int {
+ if c == 'l' {
+ s.step = stateNul
+ return scanContinue
+ }
+ return s.error(c, "in literal null (expecting 'l')")
+}
+
+// stateNul is the state after reading `nul`.
+func stateNul(s *scanner, c byte) int {
+ if c == 'l' {
+ s.step = stateEndValue
+ return scanContinue
+ }
+ return s.error(c, "in literal null (expecting 'l')")
+}
+
+// stateError is the state after reaching a syntax error,
+// such as after reading `[1}` or `5.1.2`.
+func stateError(s *scanner, c byte) int {
+ return scanError
+}
+
+// error records an error and switches to the error state.
+func (s *scanner) error(c byte, context string) int {
+ s.step = stateError
+ s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes}
+ return scanError
+}
+
+// quoteChar formats c as a quoted character literal.
+func quoteChar(c byte) string {
+ // special cases - different from quoted strings
+ if c == '\'' {
+ return `'\''`
+ }
+ if c == '"' {
+ return `'"'`
+ }
+
+ // use quoted string with different quotation marks
+ s := strconv.Quote(string(c))
+ return "'" + s[1:len(s)-1] + "'"
+}
diff --git a/pkg/encoders/json/scanner_test.go b/pkg/encoders/json/scanner_test.go
new file mode 100644
index 0000000..fb64463
--- /dev/null
+++ b/pkg/encoders/json/scanner_test.go
@@ -0,0 +1,306 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !goexperiment.jsonv2
+
+package json
+
+import (
+ "bytes"
+ "math"
+ "math/rand"
+ "reflect"
+ "strings"
+ "testing"
+)
+
+func indentNewlines(s string) string {
+ return strings.Join(strings.Split(s, "\n"), "\n\t")
+}
+
+func stripWhitespace(s string) string {
+ return strings.Map(func(r rune) rune {
+ if r == ' ' || r == '\n' || r == '\r' || r == '\t' {
+ return -1
+ }
+ return r
+ }, s)
+}
+
+func TestValid(t *testing.T) {
+ tests := []struct {
+ CaseName
+ data string
+ ok bool
+ }{
+ {Name(""), `foo`, false},
+ {Name(""), `}{`, false},
+ {Name(""), `{]`, false},
+ {Name(""), `{}`, true},
+ {Name(""), `{"foo":"bar"}`, true},
+ {Name(""), `{"foo":"bar","bar":{"baz":["qux"]}}`, true},
+ }
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ if ok := Valid([]byte(tt.data)); ok != tt.ok {
+ t.Errorf("%s: Valid(`%s`) = %v, want %v", tt.Where, tt.data, ok, tt.ok)
+ }
+ })
+ }
+}
+
+func TestCompactAndIndent(t *testing.T) {
+ tests := []struct {
+ CaseName
+ compact string
+ indent string
+ }{
+ {Name(""), `1`, `1`},
+ {Name(""), `{}`, `{}`},
+ {Name(""), `[]`, `[]`},
+ {Name(""), `{"":2}`, "{\n\t\"\": 2\n}"},
+ {Name(""), `[3]`, "[\n\t3\n]"},
+ {Name(""), `[1,2,3]`, "[\n\t1,\n\t2,\n\t3\n]"},
+ {Name(""), `{"x":1}`, "{\n\t\"x\": 1\n}"},
+ {Name(""), `[true,false,null,"x",1,1.5,0,-5e+2]`, `[
+ true,
+ false,
+ null,
+ "x",
+ 1,
+ 1.5,
+ 0,
+ -5e+2
+]`},
+ {Name(""), "{\"\":\"<>&\u2028\u2029\"}", "{\n\t\"\": \"<>&\u2028\u2029\"\n}"}, // See golang.org/issue/34070
+ }
+ var buf bytes.Buffer
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ buf.Reset()
+ if err := Compact(&buf, []byte(tt.compact)); err != nil {
+ t.Errorf("%s: Compact error: %v", tt.Where, err)
+ } else if got := buf.String(); got != tt.compact {
+ t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.compact))
+ }
+
+ buf.Reset()
+ if err := Compact(&buf, []byte(tt.indent)); err != nil {
+ t.Errorf("%s: Compact error: %v", tt.Where, err)
+ } else if got := buf.String(); got != tt.compact {
+ t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.compact))
+ }
+
+ buf.Reset()
+ if err := Indent(&buf, []byte(tt.indent), "", "\t"); err != nil {
+ t.Errorf("%s: Indent error: %v", tt.Where, err)
+ } else if got := buf.String(); got != tt.indent {
+ t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.indent))
+ }
+
+ buf.Reset()
+ if err := Indent(&buf, []byte(tt.compact), "", "\t"); err != nil {
+ t.Errorf("%s: Indent error: %v", tt.Where, err)
+ } else if got := buf.String(); got != tt.indent {
+ t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.indent))
+ }
+ })
+ }
+}
+
+func TestCompactSeparators(t *testing.T) {
+ // U+2028 and U+2029 should be escaped inside strings.
+ // They should not appear outside strings.
+ tests := []struct {
+ CaseName
+ in, compact string
+ }{
+ {Name(""), "{\"\u2028\": 1}", "{\"\u2028\":1}"},
+ {Name(""), "{\"\u2029\" :2}", "{\"\u2029\":2}"},
+ }
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ var buf bytes.Buffer
+ if err := Compact(&buf, []byte(tt.in)); err != nil {
+ t.Errorf("%s: Compact error: %v", tt.Where, err)
+ } else if got := buf.String(); got != tt.compact {
+ t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.compact))
+ }
+ })
+ }
+}
+
+// Tests of a large random structure.
+
+func TestCompactBig(t *testing.T) {
+ initBig()
+ var buf bytes.Buffer
+ if err := Compact(&buf, jsonBig); err != nil {
+ t.Fatalf("Compact error: %v", err)
+ }
+ b := buf.Bytes()
+ if !bytes.Equal(b, jsonBig) {
+ t.Error("Compact:")
+ diff(t, b, jsonBig)
+ return
+ }
+}
+
+func TestIndentBig(t *testing.T) {
+ t.Parallel()
+ initBig()
+ var buf bytes.Buffer
+ if err := Indent(&buf, jsonBig, "", "\t"); err != nil {
+ t.Fatalf("Indent error: %v", err)
+ }
+ b := buf.Bytes()
+ if len(b) == len(jsonBig) {
+ // jsonBig is compact (no unnecessary spaces);
+ // indenting should make it bigger
+ t.Fatalf("Indent did not expand the input")
+ }
+
+ // should be idempotent
+ var buf1 bytes.Buffer
+ if err := Indent(&buf1, b, "", "\t"); err != nil {
+ t.Fatalf("Indent error: %v", err)
+ }
+ b1 := buf1.Bytes()
+ if !bytes.Equal(b1, b) {
+ t.Error("Indent(Indent(jsonBig)) != Indent(jsonBig):")
+ diff(t, b1, b)
+ return
+ }
+
+ // should get back to original
+ buf1.Reset()
+ if err := Compact(&buf1, b); err != nil {
+ t.Fatalf("Compact error: %v", err)
+ }
+ b1 = buf1.Bytes()
+ if !bytes.Equal(b1, jsonBig) {
+ t.Error("Compact(Indent(jsonBig)) != jsonBig:")
+ diff(t, b1, jsonBig)
+ return
+ }
+}
+
+func TestIndentErrors(t *testing.T) {
+ tests := []struct {
+ CaseName
+ in string
+ err error
+ }{
+ {Name(""), `{"X": "foo", "Y"}`, &SyntaxError{"invalid character '}' after object key", 17}},
+ {Name(""), `{"X": "foo" "Y": "bar"}`, &SyntaxError{"invalid character '\"' after object key:value pair", 13}},
+ }
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ slice := make([]uint8, 0)
+ buf := bytes.NewBuffer(slice)
+ if err := Indent(buf, []uint8(tt.in), "", ""); err != nil {
+ if !reflect.DeepEqual(err, tt.err) {
+ t.Fatalf("%s: Indent error:\n\tgot: %v\n\twant: %v", tt.Where, err, tt.err)
+ }
+ }
+ })
+ }
+}
+
+func diff(t *testing.T, a, b []byte) {
+ t.Helper()
+ for i := 0; ; i++ {
+ if i >= len(a) || i >= len(b) || a[i] != b[i] {
+ j := i - 10
+ if j < 0 {
+ j = 0
+ }
+ t.Errorf("diverge at %d: «%s» vs «%s»", i, trim(a[j:]), trim(b[j:]))
+ return
+ }
+ }
+}
+
+func trim(b []byte) []byte {
+ return b[:min(len(b), 20)]
+}
+
+// Generate a random JSON object.
+
+var jsonBig []byte
+
+func initBig() {
+ n := 10000
+ if testing.Short() {
+ n = 100
+ }
+ b, err := Marshal(genValue(n))
+ if err != nil {
+ panic(err)
+ }
+ jsonBig = b
+}
+
+func genValue(n int) any {
+ if n > 1 {
+ switch rand.Intn(2) {
+ case 0:
+ return genArray(n)
+ case 1:
+ return genMap(n)
+ }
+ }
+ switch rand.Intn(3) {
+ case 0:
+ return rand.Intn(2) == 0
+ case 1:
+ return rand.NormFloat64()
+ case 2:
+ return genString(30)
+ }
+ panic("unreachable")
+}
+
+func genString(stddev float64) string {
+ n := int(math.Abs(rand.NormFloat64()*stddev + stddev/2))
+ c := make([]rune, n)
+ for i := range c {
+ f := math.Abs(rand.NormFloat64()*64 + 32)
+ if f > 0x10ffff {
+ f = 0x10ffff
+ }
+ c[i] = rune(f)
+ }
+ return string(c)
+}
+
+func genArray(n int) []any {
+ f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2)))
+ if f > n {
+ f = n
+ }
+ if f < 1 {
+ f = 1
+ }
+ x := make([]any, f)
+ for i := range x {
+ x[i] = genValue(((i+1)*n)/f - (i*n)/f)
+ }
+ return x
+}
+
+func genMap(n int) map[string]any {
+ f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2)))
+ if f > n {
+ f = n
+ }
+ if n > 0 && f == 0 {
+ f = 1
+ }
+ x := make(map[string]any)
+ for i := 0; i < f; i++ {
+ x[genString(10)] = genValue(((i+1)*n)/f - (i*n)/f)
+ }
+ return x
+}
diff --git a/pkg/encoders/json/stream.go b/pkg/encoders/json/stream.go
new file mode 100644
index 0000000..fc480c9
--- /dev/null
+++ b/pkg/encoders/json/stream.go
@@ -0,0 +1,514 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !goexperiment.jsonv2
+
+package json
+
+import (
+ "bytes"
+ "errors"
+ "io"
+)
+
+// A Decoder reads and decodes JSON values from an input stream.
+type Decoder struct {
+ r io.Reader
+ buf []byte
+ d decodeState
+ scanp int // start of unread data in buf
+ scanned int64 // amount of data already scanned
+ scan scanner
+ err error
+
+ tokenState int
+ tokenStack []int
+}
+
+// NewDecoder returns a new decoder that reads from r.
+//
+// The decoder introduces its own buffering and may
+// read data from r beyond the JSON values requested.
+func NewDecoder(r io.Reader) *Decoder {
+ return &Decoder{r: r}
+}
+
+// UseNumber causes the Decoder to unmarshal a number into an
+// interface value as a [Number] instead of as a float64.
+func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
+
+// DisallowUnknownFields causes the Decoder to return an error when the destination
+// is a struct and the input contains object keys which do not match any
+// non-ignored, exported fields in the destination.
+func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
+
+// Decode reads the next JSON-encoded value from its
+// input and stores it in the value pointed to by v.
+//
+// See the documentation for [Unmarshal] for details about
+// the conversion of JSON into a Go value.
+func (dec *Decoder) Decode(v any) error {
+ if dec.err != nil {
+ return dec.err
+ }
+
+ if err := dec.tokenPrepareForDecode(); err != nil {
+ return err
+ }
+
+ if !dec.tokenValueAllowed() {
+ return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()}
+ }
+
+ // Read whole value into buffer.
+ n, err := dec.readValue()
+ if err != nil {
+ return err
+ }
+ dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
+ dec.scanp += n
+
+ // Don't save err from unmarshal into dec.err:
+ // the connection is still usable since we read a complete JSON
+ // object from it before the error happened.
+ err = dec.d.unmarshal(v)
+
+ // fixup token streaming state
+ dec.tokenValueEnd()
+
+ return err
+}
+
+// Buffered returns a reader of the data remaining in the Decoder's
+// buffer. The reader is valid until the next call to [Decoder.Decode].
+func (dec *Decoder) Buffered() io.Reader {
+ return bytes.NewReader(dec.buf[dec.scanp:])
+}
+
+// readValue reads a JSON value into dec.buf.
+// It returns the length of the encoding.
+func (dec *Decoder) readValue() (int, error) {
+ dec.scan.reset()
+
+ scanp := dec.scanp
+ var err error
+Input:
+ // help the compiler see that scanp is never negative, so it can remove
+ // some bounds checks below.
+ for scanp >= 0 {
+
+ // Look in the buffer for a new value.
+ for ; scanp < len(dec.buf); scanp++ {
+ c := dec.buf[scanp]
+ dec.scan.bytes++
+ switch dec.scan.step(&dec.scan, c) {
+ case scanEnd:
+ // scanEnd is delayed one byte so we decrement
+ // the scanner bytes count by 1 to ensure that
+ // this value is correct in the next call of Decode.
+ dec.scan.bytes--
+ break Input
+ case scanEndObject, scanEndArray:
+ // scanEnd is delayed one byte.
+ // We might block trying to get that byte from src,
+ // so instead invent a space byte.
+ if stateEndValue(&dec.scan, ' ') == scanEnd {
+ scanp++
+ break Input
+ }
+ case scanError:
+ dec.err = dec.scan.err
+ return 0, dec.scan.err
+ }
+ }
+
+ // Did the last read have an error?
+ // Delayed until now to allow buffer scan.
+ if err != nil {
+ if err == io.EOF {
+ if dec.scan.step(&dec.scan, ' ') == scanEnd {
+ break Input
+ }
+ if nonSpace(dec.buf) {
+ err = io.ErrUnexpectedEOF
+ }
+ }
+ dec.err = err
+ return 0, err
+ }
+
+ n := scanp - dec.scanp
+ err = dec.refill()
+ scanp = dec.scanp + n
+ }
+ return scanp - dec.scanp, nil
+}
+
+func (dec *Decoder) refill() error {
+ // Make room to read more into the buffer.
+ // First slide down data already consumed.
+ if dec.scanp > 0 {
+ dec.scanned += int64(dec.scanp)
+ n := copy(dec.buf, dec.buf[dec.scanp:])
+ dec.buf = dec.buf[:n]
+ dec.scanp = 0
+ }
+
+ // Grow buffer if not large enough.
+ const minRead = 512
+ if cap(dec.buf)-len(dec.buf) < minRead {
+ newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
+ copy(newBuf, dec.buf)
+ dec.buf = newBuf
+ }
+
+ // Read. Delay error for next iteration (after scan).
+ n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
+ dec.buf = dec.buf[0 : len(dec.buf)+n]
+
+ return err
+}
+
+func nonSpace(b []byte) bool {
+ for _, c := range b {
+ if !isSpace(c) {
+ return true
+ }
+ }
+ return false
+}
+
+// An Encoder writes JSON values to an output stream.
+type Encoder struct {
+ w io.Writer
+ err error
+ escapeHTML bool
+
+ indentBuf []byte
+ indentPrefix string
+ indentValue string
+}
+
+// NewEncoder returns a new encoder that writes to w.
+func NewEncoder(w io.Writer) *Encoder {
+ return &Encoder{w: w, escapeHTML: true}
+}
+
+// Encode writes the JSON encoding of v to the stream,
+// with insignificant space characters elided,
+// followed by a newline character.
+//
+// See the documentation for [Marshal] for details about the
+// conversion of Go values to JSON.
+func (enc *Encoder) Encode(v any) error {
+ if enc.err != nil {
+ return enc.err
+ }
+
+ e := newEncodeState()
+ defer encodeStatePool.Put(e)
+
+ err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
+ if err != nil {
+ return err
+ }
+
+ // Terminate each value with a newline.
+ // This makes the output look a little nicer
+ // when debugging, and some kind of space
+ // is required if the encoded value was a number,
+ // so that the reader knows there aren't more
+ // digits coming.
+ e.WriteByte('\n')
+
+ b := e.Bytes()
+ if enc.indentPrefix != "" || enc.indentValue != "" {
+ enc.indentBuf, err = appendIndent(enc.indentBuf[:0], b, enc.indentPrefix, enc.indentValue)
+ if err != nil {
+ return err
+ }
+ b = enc.indentBuf
+ }
+ if _, err = enc.w.Write(b); err != nil {
+ enc.err = err
+ }
+ return err
+}
+
+// SetIndent instructs the encoder to format each subsequent encoded
+// value as if indented by the package-level function Indent(dst, src, prefix, indent).
+// Calling SetIndent("", "") disables indentation.
+func (enc *Encoder) SetIndent(prefix, indent string) {
+ enc.indentPrefix = prefix
+ enc.indentValue = indent
+}
+
+// SetEscapeHTML specifies whether problematic HTML characters
+// should be escaped inside JSON quoted strings.
+// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
+// to avoid certain safety problems that can arise when embedding JSON in HTML.
+//
+// In non-HTML settings where the escaping interferes with the readability
+// of the output, SetEscapeHTML(false) disables this behavior.
+func (enc *Encoder) SetEscapeHTML(on bool) {
+ enc.escapeHTML = on
+}
+
+// RawMessage is a raw encoded JSON value.
+// It implements [Marshaler] and [Unmarshaler] and can
+// be used to delay JSON decoding or precompute a JSON encoding.
+type RawMessage []byte
+
+// MarshalJSON returns m as the JSON encoding of m.
+func (m RawMessage) MarshalJSON() ([]byte, error) {
+ if m == nil {
+ return []byte("null"), nil
+ }
+ return m, nil
+}
+
+// UnmarshalJSON sets *m to a copy of data.
+func (m *RawMessage) UnmarshalJSON(data []byte) error {
+ if m == nil {
+ return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
+ }
+ *m = append((*m)[0:0], data...)
+ return nil
+}
+
+var _ Marshaler = (*RawMessage)(nil)
+var _ Unmarshaler = (*RawMessage)(nil)
+
+// A Token holds a value of one of these types:
+//
+// - [Delim], for the four JSON delimiters [ ] { }
+// - bool, for JSON booleans
+// - float64, for JSON numbers
+// - [Number], for JSON numbers
+// - string, for JSON string literals
+// - nil, for JSON null
+type Token any
+
+const (
+ tokenTopValue = iota
+ tokenArrayStart
+ tokenArrayValue
+ tokenArrayComma
+ tokenObjectStart
+ tokenObjectKey
+ tokenObjectColon
+ tokenObjectValue
+ tokenObjectComma
+)
+
+// advance tokenstate from a separator state to a value state
+func (dec *Decoder) tokenPrepareForDecode() error {
+ // Note: Not calling peek before switch, to avoid
+ // putting peek into the standard Decode path.
+ // peek is only called when using the Token API.
+ switch dec.tokenState {
+ case tokenArrayComma:
+ c, err := dec.peek()
+ if err != nil {
+ return err
+ }
+ if c != ',' {
+ return &SyntaxError{"expected comma after array element", dec.InputOffset()}
+ }
+ dec.scanp++
+ dec.tokenState = tokenArrayValue
+ case tokenObjectColon:
+ c, err := dec.peek()
+ if err != nil {
+ return err
+ }
+ if c != ':' {
+ return &SyntaxError{"expected colon after object key", dec.InputOffset()}
+ }
+ dec.scanp++
+ dec.tokenState = tokenObjectValue
+ }
+ return nil
+}
+
+func (dec *Decoder) tokenValueAllowed() bool {
+ switch dec.tokenState {
+ case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
+ return true
+ }
+ return false
+}
+
+func (dec *Decoder) tokenValueEnd() {
+ switch dec.tokenState {
+ case tokenArrayStart, tokenArrayValue:
+ dec.tokenState = tokenArrayComma
+ case tokenObjectValue:
+ dec.tokenState = tokenObjectComma
+ }
+}
+
+// A Delim is a JSON array or object delimiter, one of [ ] { or }.
+type Delim rune
+
+func (d Delim) String() string {
+ return string(d)
+}
+
+// Token returns the next JSON token in the input stream.
+// At the end of the input stream, Token returns nil, [io.EOF].
+//
+// Token guarantees that the delimiters [ ] { } it returns are
+// properly nested and matched: if Token encounters an unexpected
+// delimiter in the input, it will return an error.
+//
+// The input stream consists of basic JSON values—bool, string,
+// number, and null—along with delimiters [ ] { } of type [Delim]
+// to mark the start and end of arrays and objects.
+// Commas and colons are elided.
+func (dec *Decoder) Token() (Token, error) {
+ for {
+ c, err := dec.peek()
+ if err != nil {
+ return nil, err
+ }
+ switch c {
+ case '[':
+ if !dec.tokenValueAllowed() {
+ return dec.tokenError(c)
+ }
+ dec.scanp++
+ dec.tokenStack = append(dec.tokenStack, dec.tokenState)
+ dec.tokenState = tokenArrayStart
+ return Delim('['), nil
+
+ case ']':
+ if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
+ return dec.tokenError(c)
+ }
+ dec.scanp++
+ dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
+ dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
+ dec.tokenValueEnd()
+ return Delim(']'), nil
+
+ case '{':
+ if !dec.tokenValueAllowed() {
+ return dec.tokenError(c)
+ }
+ dec.scanp++
+ dec.tokenStack = append(dec.tokenStack, dec.tokenState)
+ dec.tokenState = tokenObjectStart
+ return Delim('{'), nil
+
+ case '}':
+ if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
+ return dec.tokenError(c)
+ }
+ dec.scanp++
+ dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
+ dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
+ dec.tokenValueEnd()
+ return Delim('}'), nil
+
+ case ':':
+ if dec.tokenState != tokenObjectColon {
+ return dec.tokenError(c)
+ }
+ dec.scanp++
+ dec.tokenState = tokenObjectValue
+ continue
+
+ case ',':
+ if dec.tokenState == tokenArrayComma {
+ dec.scanp++
+ dec.tokenState = tokenArrayValue
+ continue
+ }
+ if dec.tokenState == tokenObjectComma {
+ dec.scanp++
+ dec.tokenState = tokenObjectKey
+ continue
+ }
+ return dec.tokenError(c)
+
+ case '"':
+ if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
+ var x string
+ old := dec.tokenState
+ dec.tokenState = tokenTopValue
+ err := dec.Decode(&x)
+ dec.tokenState = old
+ if err != nil {
+ return nil, err
+ }
+ dec.tokenState = tokenObjectColon
+ return x, nil
+ }
+ fallthrough
+
+ default:
+ if !dec.tokenValueAllowed() {
+ return dec.tokenError(c)
+ }
+ var x any
+ if err := dec.Decode(&x); err != nil {
+ return nil, err
+ }
+ return x, nil
+ }
+ }
+}
+
+func (dec *Decoder) tokenError(c byte) (Token, error) {
+ var context string
+ switch dec.tokenState {
+ case tokenTopValue:
+ context = " looking for beginning of value"
+ case tokenArrayStart, tokenArrayValue, tokenObjectValue:
+ context = " looking for beginning of value"
+ case tokenArrayComma:
+ context = " after array element"
+ case tokenObjectKey:
+ context = " looking for beginning of object key string"
+ case tokenObjectColon:
+ context = " after object key"
+ case tokenObjectComma:
+ context = " after object key:value pair"
+ }
+ return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()}
+}
+
+// More reports whether there is another element in the
+// current array or object being parsed.
+func (dec *Decoder) More() bool {
+ c, err := dec.peek()
+ return err == nil && c != ']' && c != '}'
+}
+
+func (dec *Decoder) peek() (byte, error) {
+ var err error
+ for {
+ for i := dec.scanp; i < len(dec.buf); i++ {
+ c := dec.buf[i]
+ if isSpace(c) {
+ continue
+ }
+ dec.scanp = i
+ return c, nil
+ }
+ // buffer has been scanned, now report any error
+ if err != nil {
+ return 0, err
+ }
+ err = dec.refill()
+ }
+}
+
+// InputOffset returns the input stream byte offset of the current decoder position.
+// The offset gives the location of the end of the most recently returned token
+// and the beginning of the next token.
+func (dec *Decoder) InputOffset() int64 {
+ return dec.scanned + int64(dec.scanp)
+}
diff --git a/pkg/encoders/json/stream_test.go b/pkg/encoders/json/stream_test.go
new file mode 100644
index 0000000..478ee18
--- /dev/null
+++ b/pkg/encoders/json/stream_test.go
@@ -0,0 +1,524 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !goexperiment.jsonv2
+
+package json
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "log"
+ "net"
+ "net/http"
+ "net/http/httptest"
+ "path"
+ "reflect"
+ "runtime"
+ "runtime/debug"
+ "strings"
+ "testing"
+)
+
+// TODO(https://go.dev/issue/52751): Replace with native testing support.
+
+// CaseName is a case name annotated with a file and line.
+type CaseName struct {
+ Name string
+ Where CasePos
+}
+
+// Name annotates a case name with the file and line of the caller.
+func Name(s string) (c CaseName) {
+ c.Name = s
+ runtime.Callers(2, c.Where.pc[:])
+ return c
+}
+
+// CasePos represents a file and line number.
+type CasePos struct{ pc [1]uintptr }
+
+func (pos CasePos) String() string {
+ frames := runtime.CallersFrames(pos.pc[:])
+ frame, _ := frames.Next()
+ return fmt.Sprintf("%s:%d", path.Base(frame.File), frame.Line)
+}
+
+// Test values for the stream test.
+// One of each JSON kind.
+var streamTest = []any{
+ 0.1,
+ "hello",
+ nil,
+ true,
+ false,
+ []any{"a", "b", "c"},
+ map[string]any{"K": "Kelvin", "ß": "long s"},
+ 3.14, // another value to make sure something can follow map
+}
+
+var streamEncoded = `0.1
+"hello"
+null
+true
+false
+["a","b","c"]
+{"ß":"long s","K":"Kelvin"}
+3.14
+`
+
+func TestEncoder(t *testing.T) {
+ for i := 0; i <= len(streamTest); i++ {
+ var buf strings.Builder
+ enc := NewEncoder(&buf)
+ // Check that enc.SetIndent("", "") turns off indentation.
+ enc.SetIndent(">", ".")
+ enc.SetIndent("", "")
+ for j, v := range streamTest[0:i] {
+ if err := enc.Encode(v); err != nil {
+ t.Fatalf("#%d.%d Encode error: %v", i, j, err)
+ }
+ }
+ if got, want := buf.String(), nlines(streamEncoded, i); got != want {
+ t.Errorf("encoding %d items: mismatch:", i)
+ diff(t, []byte(got), []byte(want))
+ break
+ }
+ }
+}
+
+func TestEncoderErrorAndReuseEncodeState(t *testing.T) {
+ // Disable the GC temporarily to prevent encodeState's in Pool being cleaned away during the test.
+ percent := debug.SetGCPercent(-1)
+ defer debug.SetGCPercent(percent)
+
+ // Trigger an error in Marshal with cyclic data.
+ type Dummy struct {
+ Name string
+ Next *Dummy
+ }
+ dummy := Dummy{Name: "Dummy"}
+ dummy.Next = &dummy
+
+ var buf bytes.Buffer
+ enc := NewEncoder(&buf)
+ if err := enc.Encode(dummy); err == nil {
+ t.Errorf("Encode(dummy) error: got nil, want non-nil")
+ }
+
+ type Data struct {
+ A string
+ I int
+ }
+ want := Data{A: "a", I: 1}
+ if err := enc.Encode(want); err != nil {
+ t.Errorf("Marshal error: %v", err)
+ }
+
+ var got Data
+ if err := Unmarshal(buf.Bytes(), &got); err != nil {
+ t.Errorf("Unmarshal error: %v", err)
+ }
+ if got != want {
+ t.Errorf("Marshal/Unmarshal roundtrip:\n\tgot: %v\n\twant: %v", got, want)
+ }
+}
+
+var streamEncodedIndent = `0.1
+"hello"
+null
+true
+false
+[
+>."a",
+>."b",
+>."c"
+>]
+{
+>."ß": "long s",
+>."K": "Kelvin"
+>}
+3.14
+`
+
+func TestEncoderIndent(t *testing.T) {
+ var buf strings.Builder
+ enc := NewEncoder(&buf)
+ enc.SetIndent(">", ".")
+ for _, v := range streamTest {
+ enc.Encode(v)
+ }
+ if got, want := buf.String(), streamEncodedIndent; got != want {
+ t.Errorf("Encode mismatch:\ngot:\n%s\n\nwant:\n%s", got, want)
+ diff(t, []byte(got), []byte(want))
+ }
+}
+
+type strMarshaler string
+
+func (s strMarshaler) MarshalJSON() ([]byte, error) {
+ return []byte(s), nil
+}
+
+type strPtrMarshaler string
+
+func (s *strPtrMarshaler) MarshalJSON() ([]byte, error) {
+ return []byte(*s), nil
+}
+
+func TestEncoderSetEscapeHTML(t *testing.T) {
+ var c C
+ var ct CText
+ var tagStruct struct {
+ Valid int `json:"<>! "`
+ Invalid int `json:"\\"`
+ }
+
+ // This case is particularly interesting, as we force the encoder to
+ // take the address of the Ptr field to use its MarshalJSON method. This
+ // is why the '&' is important.
+ marshalerStruct := &struct {
+ NonPtr strMarshaler
+ Ptr strPtrMarshaler
+ }{`""`, `""`}
+
+ // https://golang.org/issue/34154
+ stringOption := struct {
+ Bar string `json:"bar,string"`
+ }{`foobar`}
+
+ tests := []struct {
+ CaseName
+ v any
+ wantEscape string
+ want string
+ }{
+ {Name("c"), c, `"\u003c\u0026\u003e"`, `"<&>"`},
+ {Name("ct"), ct, `"\"\u003c\u0026\u003e\""`, `"\"<&>\""`},
+ {Name(`"<&>"`), "<&>", `"\u003c\u0026\u003e"`, `"<&>"`},
+ {
+ Name("tagStruct"), tagStruct,
+ `{"\u003c\u003e\u0026#! ":0,"Invalid":0}`,
+ `{"<>! ":0,"Invalid":0}`,
+ },
+ {
+ Name(`""`), marshalerStruct,
+ `{"NonPtr":"\u003cstr\u003e","Ptr":"\u003cstr\u003e"}`,
+ `{"NonPtr":"","Ptr":""}`,
+ },
+ {
+ Name("stringOption"), stringOption,
+ `{"bar":"\"\\u003chtml\\u003efoobar\\u003c/html\\u003e\""}`,
+ `{"bar":"\"foobar\""}`,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ var buf strings.Builder
+ enc := NewEncoder(&buf)
+ if err := enc.Encode(tt.v); err != nil {
+ t.Fatalf("%s: Encode(%s) error: %s", tt.Where, tt.Name, err)
+ }
+ if got := strings.TrimSpace(buf.String()); got != tt.wantEscape {
+ t.Errorf("%s: Encode(%s):\n\tgot: %s\n\twant: %s", tt.Where, tt.Name, got, tt.wantEscape)
+ }
+ buf.Reset()
+ enc.SetEscapeHTML(false)
+ if err := enc.Encode(tt.v); err != nil {
+ t.Fatalf("%s: SetEscapeHTML(false) Encode(%s) error: %s", tt.Where, tt.Name, err)
+ }
+ if got := strings.TrimSpace(buf.String()); got != tt.want {
+ t.Errorf("%s: SetEscapeHTML(false) Encode(%s):\n\tgot: %s\n\twant: %s",
+ tt.Where, tt.Name, got, tt.want)
+ }
+ })
+ }
+}
+
+func TestDecoder(t *testing.T) {
+ for i := 0; i <= len(streamTest); i++ {
+ // Use stream without newlines as input,
+ // just to stress the decoder even more.
+ // Our test input does not include back-to-back numbers.
+ // Otherwise stripping the newlines would
+ // merge two adjacent JSON values.
+ var buf bytes.Buffer
+ for _, c := range nlines(streamEncoded, i) {
+ if c != '\n' {
+ buf.WriteRune(c)
+ }
+ }
+ out := make([]any, i)
+ dec := NewDecoder(&buf)
+ for j := range out {
+ if err := dec.Decode(&out[j]); err != nil {
+ t.Fatalf("decode #%d/%d error: %v", j, i, err)
+ }
+ }
+ if !reflect.DeepEqual(out, streamTest[0:i]) {
+ t.Errorf("decoding %d items: mismatch:", i)
+ for j := range out {
+ if !reflect.DeepEqual(out[j], streamTest[j]) {
+ t.Errorf("#%d:\n\tgot: %v\n\twant: %v", j, out[j], streamTest[j])
+ }
+ }
+ break
+ }
+ }
+}
+
+func TestDecoderBuffered(t *testing.T) {
+ r := strings.NewReader(`{"Name": "Gopher"} extra `)
+ var m struct {
+ Name string
+ }
+ d := NewDecoder(r)
+ err := d.Decode(&m)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if m.Name != "Gopher" {
+ t.Errorf("Name = %s, want Gopher", m.Name)
+ }
+ rest, err := io.ReadAll(d.Buffered())
+ if err != nil {
+ t.Fatal(err)
+ }
+ if got, want := string(rest), " extra "; got != want {
+ t.Errorf("Remaining = %s, want %s", got, want)
+ }
+}
+
+func nlines(s string, n int) string {
+ if n <= 0 {
+ return ""
+ }
+ for i, c := range s {
+ if c == '\n' {
+ if n--; n == 0 {
+ return s[0 : i+1]
+ }
+ }
+ }
+ return s
+}
+
+func TestRawMessage(t *testing.T) {
+ var data struct {
+ X float64
+ Id RawMessage
+ Y float32
+ }
+ const raw = `["\u0056",null]`
+ const want = `{"X":0.1,"Id":["\u0056",null],"Y":0.2}`
+ err := Unmarshal([]byte(want), &data)
+ if err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+ if string([]byte(data.Id)) != raw {
+ t.Fatalf("Unmarshal:\n\tgot: %s\n\twant: %s", []byte(data.Id), raw)
+ }
+ got, err := Marshal(&data)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+ if string(got) != want {
+ t.Fatalf("Marshal:\n\tgot: %s\n\twant: %s", got, want)
+ }
+}
+
+func TestNullRawMessage(t *testing.T) {
+ var data struct {
+ X float64
+ Id RawMessage
+ IdPtr *RawMessage
+ Y float32
+ }
+ const want = `{"X":0.1,"Id":null,"IdPtr":null,"Y":0.2}`
+ err := Unmarshal([]byte(want), &data)
+ if err != nil {
+ t.Fatalf("Unmarshal error: %v", err)
+ }
+ if want, got := "null", string(data.Id); want != got {
+ t.Fatalf("Unmarshal:\n\tgot: %s\n\twant: %s", got, want)
+ }
+ if data.IdPtr != nil {
+ t.Fatalf("pointer mismatch: got non-nil, want nil")
+ }
+ got, err := Marshal(&data)
+ if err != nil {
+ t.Fatalf("Marshal error: %v", err)
+ }
+ if string(got) != want {
+ t.Fatalf("Marshal:\n\tgot: %s\n\twant: %s", got, want)
+ }
+}
+
+func TestBlocking(t *testing.T) {
+ tests := []struct {
+ CaseName
+ in string
+ }{
+ {Name(""), `{"x": 1}`},
+ {Name(""), `[1, 2, 3]`},
+ }
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ r, w := net.Pipe()
+ go w.Write([]byte(tt.in))
+ var val any
+
+ // If Decode reads beyond what w.Write writes above,
+ // it will block, and the test will deadlock.
+ if err := NewDecoder(r).Decode(&val); err != nil {
+ t.Errorf("%s: NewDecoder(%s).Decode error: %v", tt.Where, tt.in, err)
+ }
+ r.Close()
+ w.Close()
+ })
+ }
+}
+
+type decodeThis struct {
+ v any
+}
+
+func TestDecodeInStream(t *testing.T) {
+ tests := []struct {
+ CaseName
+ json string
+ expTokens []any
+ }{
+ // streaming token cases
+ {CaseName: Name(""), json: `10`, expTokens: []any{float64(10)}},
+ {CaseName: Name(""), json: ` [10] `, expTokens: []any{
+ Delim('['), float64(10), Delim(']')}},
+ {CaseName: Name(""), json: ` [false,10,"b"] `, expTokens: []any{
+ Delim('['), false, float64(10), "b", Delim(']')}},
+ {CaseName: Name(""), json: `{ "a": 1 }`, expTokens: []any{
+ Delim('{'), "a", float64(1), Delim('}')}},
+ {CaseName: Name(""), json: `{"a": 1, "b":"3"}`, expTokens: []any{
+ Delim('{'), "a", float64(1), "b", "3", Delim('}')}},
+ {CaseName: Name(""), json: ` [{"a": 1},{"a": 2}] `, expTokens: []any{
+ Delim('['),
+ Delim('{'), "a", float64(1), Delim('}'),
+ Delim('{'), "a", float64(2), Delim('}'),
+ Delim(']')}},
+ {CaseName: Name(""), json: `{"obj": {"a": 1}}`, expTokens: []any{
+ Delim('{'), "obj", Delim('{'), "a", float64(1), Delim('}'),
+ Delim('}')}},
+ {CaseName: Name(""), json: `{"obj": [{"a": 1}]}`, expTokens: []any{
+ Delim('{'), "obj", Delim('['),
+ Delim('{'), "a", float64(1), Delim('}'),
+ Delim(']'), Delim('}')}},
+
+ // streaming tokens with intermittent Decode()
+ {CaseName: Name(""), json: `{ "a": 1 }`, expTokens: []any{
+ Delim('{'), "a",
+ decodeThis{float64(1)},
+ Delim('}')}},
+ {CaseName: Name(""), json: ` [ { "a" : 1 } ] `, expTokens: []any{
+ Delim('['),
+ decodeThis{map[string]any{"a": float64(1)}},
+ Delim(']')}},
+ {CaseName: Name(""), json: ` [{"a": 1},{"a": 2}] `, expTokens: []any{
+ Delim('['),
+ decodeThis{map[string]any{"a": float64(1)}},
+ decodeThis{map[string]any{"a": float64(2)}},
+ Delim(']')}},
+ {CaseName: Name(""), json: `{ "obj" : [ { "a" : 1 } ] }`, expTokens: []any{
+ Delim('{'), "obj", Delim('['),
+ decodeThis{map[string]any{"a": float64(1)}},
+ Delim(']'), Delim('}')}},
+
+ {CaseName: Name(""), json: `{"obj": {"a": 1}}`, expTokens: []any{
+ Delim('{'), "obj",
+ decodeThis{map[string]any{"a": float64(1)}},
+ Delim('}')}},
+ {CaseName: Name(""), json: `{"obj": [{"a": 1}]}`, expTokens: []any{
+ Delim('{'), "obj",
+ decodeThis{[]any{
+ map[string]any{"a": float64(1)},
+ }},
+ Delim('}')}},
+ {CaseName: Name(""), json: ` [{"a": 1} {"a": 2}] `, expTokens: []any{
+ Delim('['),
+ decodeThis{map[string]any{"a": float64(1)}},
+ decodeThis{&SyntaxError{"expected comma after array element", 11}},
+ }},
+ {CaseName: Name(""), json: `{ "` + strings.Repeat("a", 513) + `" 1 }`, expTokens: []any{
+ Delim('{'), strings.Repeat("a", 513),
+ decodeThis{&SyntaxError{"expected colon after object key", 518}},
+ }},
+ {CaseName: Name(""), json: `{ "\a" }`, expTokens: []any{
+ Delim('{'),
+ &SyntaxError{"invalid character 'a' in string escape code", 3},
+ }},
+ {CaseName: Name(""), json: ` \a`, expTokens: []any{
+ &SyntaxError{"invalid character '\\\\' looking for beginning of value", 1},
+ }},
+ }
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ dec := NewDecoder(strings.NewReader(tt.json))
+ for i, want := range tt.expTokens {
+ var got any
+ var err error
+
+ if dt, ok := want.(decodeThis); ok {
+ want = dt.v
+ err = dec.Decode(&got)
+ } else {
+ got, err = dec.Token()
+ }
+ if errWant, ok := want.(error); ok {
+ if err == nil || !reflect.DeepEqual(err, errWant) {
+ t.Fatalf("%s:\n\tinput: %s\n\tgot error: %v\n\twant error: %v", tt.Where, tt.json, err, errWant)
+ }
+ break
+ } else if err != nil {
+ t.Fatalf("%s:\n\tinput: %s\n\tgot error: %v\n\twant error: nil", tt.Where, tt.json, err)
+ }
+ if !reflect.DeepEqual(got, want) {
+ t.Fatalf("%s: token %d:\n\tinput: %s\n\tgot: %T(%v)\n\twant: %T(%v)", tt.Where, i, tt.json, got, got, want, want)
+ }
+ }
+ })
+ }
+}
+
+// Test from golang.org/issue/11893
+func TestHTTPDecoding(t *testing.T) {
+ const raw = `{ "foo": "bar" }`
+
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Write([]byte(raw))
+ }))
+ defer ts.Close()
+ res, err := http.Get(ts.URL)
+ if err != nil {
+ log.Fatalf("http.Get error: %v", err)
+ }
+ defer res.Body.Close()
+
+ foo := struct {
+ Foo string
+ }{}
+
+ d := NewDecoder(res.Body)
+ err = d.Decode(&foo)
+ if err != nil {
+ t.Fatalf("Decode error: %v", err)
+ }
+ if foo.Foo != "bar" {
+ t.Errorf(`Decode: got %q, want "bar"`, foo.Foo)
+ }
+
+ // make sure we get the EOF the second time
+ err = d.Decode(&foo)
+ if err != io.EOF {
+ t.Errorf("Decode error:\n\tgot: %v\n\twant: io.EOF", err)
+ }
+}
diff --git a/pkg/encoders/json/tables.go b/pkg/encoders/json/tables.go
new file mode 100644
index 0000000..e8841cf
--- /dev/null
+++ b/pkg/encoders/json/tables.go
@@ -0,0 +1,220 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !goexperiment.jsonv2
+
+package json
+
+import "unicode/utf8"
+
+// safeSet holds the value true if the ASCII character with the given array
+// position can be represented inside a JSON string without any further
+// escaping.
+//
+// All values are true except for the ASCII control characters (0-31), the
+// double quote ("), and the backslash character ("\").
+var safeSet = [utf8.RuneSelf]bool{
+ ' ': true,
+ '!': true,
+ '"': false,
+ '#': true,
+ '$': true,
+ '%': true,
+ '&': true,
+ '\'': true,
+ '(': true,
+ ')': true,
+ '*': true,
+ '+': true,
+ ',': true,
+ '-': true,
+ '.': true,
+ '/': true,
+ '0': true,
+ '1': true,
+ '2': true,
+ '3': true,
+ '4': true,
+ '5': true,
+ '6': true,
+ '7': true,
+ '8': true,
+ '9': true,
+ ':': true,
+ ';': true,
+ '<': true,
+ '=': true,
+ '>': true,
+ '?': true,
+ '@': true,
+ 'A': true,
+ 'B': true,
+ 'C': true,
+ 'D': true,
+ 'E': true,
+ 'F': true,
+ 'G': true,
+ 'H': true,
+ 'I': true,
+ 'J': true,
+ 'K': true,
+ 'L': true,
+ 'M': true,
+ 'N': true,
+ 'O': true,
+ 'P': true,
+ 'Q': true,
+ 'R': true,
+ 'S': true,
+ 'T': true,
+ 'U': true,
+ 'V': true,
+ 'W': true,
+ 'X': true,
+ 'Y': true,
+ 'Z': true,
+ '[': true,
+ '\\': false,
+ ']': true,
+ '^': true,
+ '_': true,
+ '`': true,
+ 'a': true,
+ 'b': true,
+ 'c': true,
+ 'd': true,
+ 'e': true,
+ 'f': true,
+ 'g': true,
+ 'h': true,
+ 'i': true,
+ 'j': true,
+ 'k': true,
+ 'l': true,
+ 'm': true,
+ 'n': true,
+ 'o': true,
+ 'p': true,
+ 'q': true,
+ 'r': true,
+ 's': true,
+ 't': true,
+ 'u': true,
+ 'v': true,
+ 'w': true,
+ 'x': true,
+ 'y': true,
+ 'z': true,
+ '{': true,
+ '|': true,
+ '}': true,
+ '~': true,
+ '\u007f': true,
+}
+
+// htmlSafeSet holds the value true if the ASCII character with the given
+// array position can be safely represented inside a JSON string, embedded
+// inside of HTML `
+ got, err := json.Marshal(in)
+ if err != nil {
+ t.Fatalf("json.Marshal error: %v", err)
+ }
+ want := map[string]string{
+ "v1": `"\u003cscript\u003e console.log(\"Hello, world!\"); \u003c/script\u003e"`,
+ "v2": `""`,
+ }[json.Version]
+ if string(got) != want {
+ t.Fatalf("json.Marshal = %s, want %s", got, want)
+ }
+ })
+ }
+}
+
+// In v1, JSON serialization silently ignored invalid UTF-8 by
+// replacing such bytes with the Unicode replacement character.
+// In v2, JSON serialization reports an error if invalid UTF-8 is encountered.
+//
+// Users of v2 can opt into the v1 behavior by setting [AllowInvalidUTF8].
+//
+// Silently allowing invalid UTF-8 causes data corruption that can be difficult
+// to detect until it is too late. Once it has been discovered, strict UTF-8
+// behavior sometimes cannot be enabled since other logic may be depending
+// on the current behavior due to Hyrum's Law.
+//
+// Tim Bray, the author of RFC 8259 recommends that implementations should
+// go beyond RFC 8259 and instead target compliance with RFC 7493,
+// which makes strict decisions about behavior left undefined in RFC 8259.
+// In particular, RFC 7493 rejects the presence of invalid UTF-8.
+// See https://www.tbray.org/ongoing/When/201x/2017/12/14/RFC-8259-STD-90
+func TestInvalidUTF8(t *testing.T) {
+ for _, json := range jsonPackages {
+ t.Run(path.Join("Marshal", json.Version), func(t *testing.T) {
+ got, err := json.Marshal("\xff")
+ switch {
+ case json.Version == "v1" && err != nil:
+ t.Fatalf("json.Marshal error: %v", err)
+ case json.Version == "v1" && string(got) != "\"\ufffd\"":
+ t.Fatalf(`json.Marshal = %s, want %q`, got, "\ufffd")
+ case json.Version == "v2" && err == nil:
+ t.Fatal("json.Marshal error is nil, want non-nil")
+ }
+ })
+ }
+
+ for _, json := range jsonPackages {
+ t.Run(path.Join("Unmarshal", json.Version), func(t *testing.T) {
+ const in = "\"\xff\""
+ var got string
+ err := json.Unmarshal([]byte(in), &got)
+ switch {
+ case json.Version == "v1" && err != nil:
+ t.Fatalf("json.Unmarshal error: %v", err)
+ case json.Version == "v1" && got != "\ufffd":
+ t.Fatalf(`json.Unmarshal = %q, want "\ufffd"`, got)
+ case json.Version == "v2" && err == nil:
+ t.Fatal("json.Unmarshal error is nil, want non-nil")
+ }
+ })
+ }
+}
+
+// In v1, duplicate JSON object names are permitted by default where
+// they follow the inconsistent and difficult-to-explain merge semantics of v1.
+// In v2, duplicate JSON object names are rejected by default where
+// they follow the merge semantics of v2 based on RFC 7396.
+//
+// Users of v2 can opt into the v1 behavior by setting [AllowDuplicateNames].
+//
+// Per RFC 8259, the handling of duplicate names is left as undefined behavior.
+// Rejecting such inputs is within the realm of valid behavior.
+// Tim Bray, the author of RFC 8259 recommends that implementations should
+// go beyond RFC 8259 and instead target compliance with RFC 7493,
+// which makes strict decisions about behavior left undefined in RFC 8259.
+// In particular, RFC 7493 rejects the presence of duplicate object names.
+// See https://www.tbray.org/ongoing/When/201x/2017/12/14/RFC-8259-STD-90
+//
+// The lack of duplicate name rejection has correctness implications where
+// roundtrip unmarshal/marshal do not result in semantically equivalent JSON.
+// This is surprising behavior for users when they accidentally
+// send JSON objects with duplicate names.
+//
+// The lack of duplicate name rejection may have security implications since it
+// becomes difficult for a security tool to validate the semantic meaning of a
+// JSON object since meaning is undefined in the presence of duplicate names.
+// See https://labs.bishopfox.com/tech-blog/an-exploration-of-json-interoperability-vulnerabilities
+//
+// Related issue:
+//
+// https://go.dev/issue/48298
+func TestDuplicateNames(t *testing.T) {
+ for _, json := range jsonPackages {
+ t.Run(path.Join("Unmarshal", json.Version), func(t *testing.T) {
+ const in = `{"Name":1,"Name":2}`
+ var got struct{ Name int }
+ err := json.Unmarshal([]byte(in), &got)
+ switch {
+ case json.Version == "v1" && err != nil:
+ t.Fatalf("json.Unmarshal error: %v", err)
+ case json.Version == "v1" && got != struct{ Name int }{2}:
+ t.Fatalf(`json.Unmarshal = %v, want {2}`, got)
+ case json.Version == "v2" && err == nil:
+ t.Fatal("json.Unmarshal error is nil, want non-nil")
+ }
+ })
+ }
+}
+
+// In v1, unmarshaling a JSON null into a non-empty value was inconsistent
+// in that sometimes it would be ignored and other times clear the value.
+// In v2, unmarshaling a JSON null into a non-empty value would consistently
+// always clear the value regardless of the value's type.
+//
+// The purpose of this change is to have consistent behavior with how JSON nulls
+// are handled during Unmarshal. This semantic detail has no effect
+// when Unmarshaling into a empty value.
+//
+// Related issues:
+//
+// https://go.dev/issue/22177
+// https://go.dev/issue/33835
+func TestMergeNull(t *testing.T) {
+ type Types struct {
+ Bool bool
+ String string
+ Bytes []byte
+ Int int
+ Map map[string]string
+ Struct struct{ Field string }
+ Slice []string
+ Array [1]string
+ Pointer *string
+ Interface any
+ }
+
+ for _, json := range jsonPackages {
+ t.Run(path.Join("Unmarshal", json.Version), func(t *testing.T) {
+ // Start with a non-empty value where all fields are populated.
+ in := Types{
+ Bool: true,
+ String: "old",
+ Bytes: []byte("old"),
+ Int: 1234,
+ Map: map[string]string{"old": "old"},
+ Struct: struct{ Field string }{"old"},
+ Slice: []string{"old"},
+ Array: [1]string{"old"},
+ Pointer: new(string),
+ Interface: "old",
+ }
+
+ // Unmarshal a JSON null into every field.
+ if err := json.Unmarshal([]byte(`{
+ "Bool": null,
+ "String": null,
+ "Bytes": null,
+ "Int": null,
+ "Map": null,
+ "Struct": null,
+ "Slice": null,
+ "Array": null,
+ "Pointer": null,
+ "Interface": null
+ }`), &in); err != nil {
+ t.Fatalf("json.Unmarshal error: %v", err)
+ }
+
+ want := map[string]Types{
+ "v1": {
+ Bool: true,
+ String: "old",
+ Int: 1234,
+ Struct: struct{ Field string }{"old"},
+ Array: [1]string{"old"},
+ },
+ "v2": {}, // all fields are zeroed
+ }[json.Version]
+ if !reflect.DeepEqual(in, want) {
+ t.Fatalf("json.Unmarshal = %+v, want %+v", in, want)
+ }
+ })
+ }
+}
+
+// In v1, merge semantics are inconsistent and difficult to explain.
+// In v2, merge semantics replaces the destination value for anything
+// other than a JSON object, and recursively merges JSON objects.
+//
+// Merge semantics in v1 are inconsistent and difficult to explain
+// largely because the behavior came about organically, rather than
+// having a principled approach to how the semantics should operate.
+// In v2, merging follows behavior based on RFC 7396.
+//
+// Related issues:
+//
+// https://go.dev/issue/21092
+// https://go.dev/issue/26946
+// https://go.dev/issue/27172
+// https://go.dev/issue/30701
+// https://go.dev/issue/31924
+// https://go.dev/issue/43664
+func TestMergeComposite(t *testing.T) {
+ type Tuple struct{ Old, New bool }
+ type Composites struct {
+ Slice []Tuple
+ Array [1]Tuple
+ Map map[string]Tuple
+ MapPointer map[string]*Tuple
+ Struct struct{ Tuple Tuple }
+ StructPointer *struct{ Tuple Tuple }
+ Interface any
+ InterfacePointer any
+ }
+
+ for _, json := range jsonPackages {
+ t.Run(path.Join("Unmarshal", json.Version), func(t *testing.T) {
+ // Start with a non-empty value where all fields are populated.
+ in := Composites{
+ Slice: []Tuple{{Old: true}, {Old: true}}[:1],
+ Array: [1]Tuple{{Old: true}},
+ Map: map[string]Tuple{"Tuple": {Old: true}},
+ MapPointer: map[string]*Tuple{"Tuple": {Old: true}},
+ Struct: struct{ Tuple Tuple }{Tuple{Old: true}},
+ StructPointer: &struct{ Tuple Tuple }{Tuple{Old: true}},
+ Interface: Tuple{Old: true},
+ InterfacePointer: &Tuple{Old: true},
+ }
+
+ // Unmarshal into every pre-populated field.
+ if err := json.Unmarshal([]byte(`{
+ "Slice": [{"New":true}, {"New":true}],
+ "Array": [{"New":true}],
+ "Map": {"Tuple": {"New":true}},
+ "MapPointer": {"Tuple": {"New":true}},
+ "Struct": {"Tuple": {"New":true}},
+ "StructPointer": {"Tuple": {"New":true}},
+ "Interface": {"New":true},
+ "InterfacePointer": {"New":true}
+ }`), &in); err != nil {
+ t.Fatalf("json.Unmarshal error: %v", err)
+ }
+
+ merged := Tuple{Old: true, New: true}
+ replaced := Tuple{Old: false, New: true}
+ want := map[string]Composites{
+ "v1": {
+ Slice: []Tuple{merged, merged}, // merged
+ Array: [1]Tuple{merged}, // merged
+ Map: map[string]Tuple{"Tuple": replaced}, // replaced
+ MapPointer: map[string]*Tuple{"Tuple": &replaced}, // replaced
+ Struct: struct{ Tuple Tuple }{merged}, // merged (same as v2)
+ StructPointer: &struct{ Tuple Tuple }{merged}, // merged (same as v2)
+ Interface: map[string]any{"New": true}, // replaced
+ InterfacePointer: &merged, // merged (same as v2)
+ },
+ "v2": {
+ Slice: []Tuple{replaced, replaced}, // replaced
+ Array: [1]Tuple{replaced}, // replaced
+ Map: map[string]Tuple{"Tuple": merged}, // merged
+ MapPointer: map[string]*Tuple{"Tuple": &merged}, // merged
+ Struct: struct{ Tuple Tuple }{merged}, // merged (same as v1)
+ StructPointer: &struct{ Tuple Tuple }{merged}, // merged (same as v1)
+ Interface: merged, // merged
+ InterfacePointer: &merged, // merged (same as v1)
+ },
+ }[json.Version]
+ if !reflect.DeepEqual(in, want) {
+ t.Fatalf("json.Unmarshal = %+v, want %+v", in, want)
+ }
+ })
+ }
+}
+
+// In v1, there was no special support for time.Duration,
+// which resulted in that type simply being treated as a signed integer.
+// In v2, there is now first-class support for time.Duration, where the type is
+// formatted and parsed using time.Duration.String and time.ParseDuration.
+//
+// Users of v2 can opt into the v1 behavior by setting
+// the "format:nano" option in the `json` struct field tag:
+//
+// struct {
+// Duration time.Duration `json:",format:nano"`
+// }
+//
+// Related issue:
+//
+// https://go.dev/issue/10275
+func TestTimeDurations(t *testing.T) {
+ t.SkipNow() // TODO(https://go.dev/issue/71631): The default representation of time.Duration is still undecided.
+ for _, json := range jsonPackages {
+ t.Run(path.Join("Marshal", json.Version), func(t *testing.T) {
+ got, err := json.Marshal(time.Minute)
+ switch {
+ case err != nil:
+ t.Fatalf("json.Marshal error: %v", err)
+ case json.Version == "v1" && string(got) != "60000000000":
+ t.Fatalf("json.Marshal = %s, want 60000000000", got)
+ case json.Version == "v2" && string(got) != `"1m0s"`:
+ t.Fatalf(`json.Marshal = %s, want "1m0s"`, got)
+ }
+ })
+ }
+
+ for _, json := range jsonPackages {
+ t.Run(path.Join("Unmarshal", json.Version), func(t *testing.T) {
+ in := map[string]string{
+ "v1": "60000000000",
+ "v2": `"1m0s"`,
+ }[json.Version]
+ var got time.Duration
+ err := json.Unmarshal([]byte(in), &got)
+ switch {
+ case err != nil:
+ t.Fatalf("json.Unmarshal error: %v", err)
+ case got != time.Minute:
+ t.Fatalf("json.Unmarshal = %v, want 1m0s", got)
+ }
+ })
+ }
+}
+
+// In v1, non-empty structs without any JSON serializable fields are permitted.
+// In v2, non-empty structs without any JSON serializable fields are rejected.
+//
+// The purpose of this change is to avoid a common pitfall for new users
+// where they expect JSON serialization to handle unexported fields.
+// However, this does not work since Go reflection does not
+// provide the package the ability to mutate such fields.
+// Rejecting unserializable structs in v2 is intended to be a clear signal
+// that the type is not supposed to be serialized.
+func TestEmptyStructs(t *testing.T) {
+ never := func(string) bool { return false }
+ onlyV2 := func(v string) bool { return v == "v2" }
+ values := []struct {
+ in any
+ wantError func(string) bool
+ }{
+ // It is okay to marshal a truly empty struct in v1 and v2.
+ {in: addr(struct{}{}), wantError: never},
+ // In v1, a non-empty struct without exported fields
+ // is equivalent to an empty struct, but is rejected in v2.
+ // Note that errors.errorString type has only unexported fields.
+ {in: errors.New("error"), wantError: onlyV2},
+ // A mix of exported and unexported fields is permitted.
+ {in: addr(struct{ Exported, unexported int }{}), wantError: never},
+ }
+
+ for _, json := range jsonPackages {
+ t.Run("Marshal", func(t *testing.T) {
+ for _, value := range values {
+ wantError := value.wantError(json.Version)
+ _, err := json.Marshal(value.in)
+ switch {
+ case (err == nil) && wantError:
+ t.Fatalf("json.Marshal error is nil, want non-nil")
+ case (err != nil) && !wantError:
+ t.Fatalf("json.Marshal error: %v", err)
+ }
+ }
+ })
+ }
+
+ for _, json := range jsonPackages {
+ t.Run("Unmarshal", func(t *testing.T) {
+ for _, value := range values {
+ wantError := value.wantError(json.Version)
+ out := reflect.New(reflect.TypeOf(value.in).Elem()).Interface()
+ err := json.Unmarshal([]byte("{}"), out)
+ switch {
+ case (err == nil) && wantError:
+ t.Fatalf("json.Unmarshal error is nil, want non-nil")
+ case (err != nil) && !wantError:
+ t.Fatalf("json.Unmarshal error: %v", err)
+ }
+ }
+ })
+ }
+}
diff --git a/pkg/encoders/json/v2_encode.go b/pkg/encoders/json/v2_encode.go
new file mode 100644
index 0000000..c2d620b
--- /dev/null
+++ b/pkg/encoders/json/v2_encode.go
@@ -0,0 +1,251 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.jsonv2
+
+// Package json implements encoding and decoding of JSON as defined in
+// RFC 7159. The mapping between JSON and Go values is described
+// in the documentation for the Marshal and Unmarshal functions.
+//
+// See "JSON and Go" for an introduction to this package:
+// https://golang.org/doc/articles/json_and_go.html
+//
+// # Security Considerations
+//
+// See the "Security Considerations" section in [encoding/json/v2].
+//
+// For historical reasons, the default behavior of v1 [encoding/json]
+// unfortunately operates with less secure defaults.
+// New usages of JSON in Go are encouraged to use [encoding/json/v2] instead.
+package json
+
+import (
+ "reflect"
+ "strconv"
+
+ jsonv2 "encoding/json/v2"
+)
+
+// Marshal returns the JSON encoding of v.
+//
+// Marshal traverses the value v recursively.
+// If an encountered value implements [Marshaler]
+// and is not a nil pointer, Marshal calls [Marshaler.MarshalJSON]
+// to produce JSON. If no [Marshaler.MarshalJSON] method is present but the
+// value implements [encoding.TextMarshaler] instead, Marshal calls
+// [encoding.TextMarshaler.MarshalText] and encodes the result as a JSON string.
+// The nil pointer exception is not strictly necessary
+// but mimics a similar, necessary exception in the behavior of
+// [Unmarshaler.UnmarshalJSON].
+//
+// Otherwise, Marshal uses the following type-dependent default encodings:
+//
+// Boolean values encode as JSON booleans.
+//
+// Floating point, integer, and [Number] values encode as JSON numbers.
+// NaN and +/-Inf values will return an [UnsupportedValueError].
+//
+// String values encode as JSON strings coerced to valid UTF-8,
+// replacing invalid bytes with the Unicode replacement rune.
+// So that the JSON will be safe to embed inside HTML