From c958a7d9ed6853961a993b11e0f88ec57f9fa4ce Mon Sep 17 00:00:00 2001 From: mleku Date: Sat, 23 Aug 2025 10:34:22 +0100 Subject: [PATCH] add forked version of encoding/json with html escape disabled - modified all local packages to use this fork --- cmd/eventpool/eventpool.go | 7 +- pkg/encoders/event/event.go | 3 + pkg/encoders/event/event_test.go | 12 +- pkg/encoders/json/bench_test.go | 583 ++++ pkg/encoders/json/decode.go | 1314 ++++++++ pkg/encoders/json/decode_test.go | 2830 ++++++++++++++++ pkg/encoders/json/encode.go | 1418 +++++++++ pkg/encoders/json/encode_test.go | 1425 +++++++++ pkg/encoders/json/example_marshaling_test.go | 75 + pkg/encoders/json/example_test.go | 312 ++ .../json/example_text_marshaling_test.go | 69 + pkg/encoders/json/fold.go | 50 + pkg/encoders/json/fold_test.go | 52 + pkg/encoders/json/fuzz_test.go | 85 + pkg/encoders/json/indent.go | 184 ++ pkg/encoders/json/internal/internal.go | 41 + pkg/encoders/json/internal/jsonflags/flags.go | 215 ++ .../json/internal/jsonflags/flags_test.go | 75 + .../json/internal/jsonopts/options.go | 202 ++ .../json/internal/jsonopts/options_test.go | 233 ++ pkg/encoders/json/internal/jsonwire/decode.go | 629 ++++ .../json/internal/jsonwire/decode_test.go | 443 +++ pkg/encoders/json/internal/jsonwire/encode.go | 290 ++ .../json/internal/jsonwire/encode_test.go | 332 ++ pkg/encoders/json/internal/jsonwire/wire.go | 217 ++ .../json/internal/jsonwire/wire_test.go | 98 + pkg/encoders/json/jsontext/coder_test.go | 856 +++++ pkg/encoders/json/jsontext/decode.go | 1168 +++++++ pkg/encoders/json/jsontext/decode_test.go | 1267 ++++++++ pkg/encoders/json/jsontext/doc.go | 116 + pkg/encoders/json/jsontext/encode.go | 972 ++++++ pkg/encoders/json/jsontext/encode_test.go | 737 +++++ pkg/encoders/json/jsontext/errors.go | 182 ++ pkg/encoders/json/jsontext/example_test.go | 130 + pkg/encoders/json/jsontext/export.go | 77 + pkg/encoders/json/jsontext/fuzz_test.go | 236 ++ pkg/encoders/json/jsontext/options.go | 304 ++ pkg/encoders/json/jsontext/pools.go | 152 + pkg/encoders/json/jsontext/quote.go | 41 + pkg/encoders/json/jsontext/state.go | 828 +++++ pkg/encoders/json/jsontext/state_test.go | 396 +++ pkg/encoders/json/jsontext/token.go | 527 +++ pkg/encoders/json/jsontext/token_test.go | 168 + pkg/encoders/json/jsontext/value.go | 395 +++ pkg/encoders/json/jsontext/value_test.go | 200 ++ pkg/encoders/json/number_test.go | 120 + pkg/encoders/json/scanner.go | 612 ++++ pkg/encoders/json/scanner_test.go | 306 ++ pkg/encoders/json/stream.go | 514 +++ pkg/encoders/json/stream_test.go | 524 +++ pkg/encoders/json/tables.go | 220 ++ pkg/encoders/json/tagkey_test.go | 123 + pkg/encoders/json/tags.go | 40 + pkg/encoders/json/tags_test.go | 28 + pkg/encoders/json/v2_bench_test.go | 483 +++ pkg/encoders/json/v2_decode.go | 253 ++ pkg/encoders/json/v2_decode_test.go | 2835 +++++++++++++++++ pkg/encoders/json/v2_diff_test.go | 1130 +++++++ pkg/encoders/json/v2_encode.go | 251 ++ pkg/encoders/json/v2_encode_test.go | 1430 +++++++++ .../json/v2_example_marshaling_test.go | 76 + pkg/encoders/json/v2_example_test.go | 313 ++ .../json/v2_example_text_marshaling_test.go | 70 + pkg/encoders/json/v2_fuzz_test.go | 85 + pkg/encoders/json/v2_indent.go | 133 + pkg/encoders/json/v2_inject.go | 153 + pkg/encoders/json/v2_options.go | 546 ++++ pkg/encoders/json/v2_scanner.go | 86 + pkg/encoders/json/v2_scanner_test.go | 306 ++ pkg/encoders/json/v2_stream.go | 231 ++ pkg/encoders/json/v2_stream_test.go | 504 +++ pkg/encoders/json/v2_tagkey_test.go | 121 + 72 files changed, 31429 insertions(+), 10 deletions(-) create mode 100644 pkg/encoders/json/bench_test.go create mode 100644 pkg/encoders/json/decode.go create mode 100644 pkg/encoders/json/decode_test.go create mode 100644 pkg/encoders/json/encode.go create mode 100644 pkg/encoders/json/encode_test.go create mode 100644 pkg/encoders/json/example_marshaling_test.go create mode 100644 pkg/encoders/json/example_test.go create mode 100644 pkg/encoders/json/example_text_marshaling_test.go create mode 100644 pkg/encoders/json/fold.go create mode 100644 pkg/encoders/json/fold_test.go create mode 100644 pkg/encoders/json/fuzz_test.go create mode 100644 pkg/encoders/json/indent.go create mode 100644 pkg/encoders/json/internal/internal.go create mode 100644 pkg/encoders/json/internal/jsonflags/flags.go create mode 100644 pkg/encoders/json/internal/jsonflags/flags_test.go create mode 100644 pkg/encoders/json/internal/jsonopts/options.go create mode 100644 pkg/encoders/json/internal/jsonopts/options_test.go create mode 100644 pkg/encoders/json/internal/jsonwire/decode.go create mode 100644 pkg/encoders/json/internal/jsonwire/decode_test.go create mode 100644 pkg/encoders/json/internal/jsonwire/encode.go create mode 100644 pkg/encoders/json/internal/jsonwire/encode_test.go create mode 100644 pkg/encoders/json/internal/jsonwire/wire.go create mode 100644 pkg/encoders/json/internal/jsonwire/wire_test.go create mode 100644 pkg/encoders/json/jsontext/coder_test.go create mode 100644 pkg/encoders/json/jsontext/decode.go create mode 100644 pkg/encoders/json/jsontext/decode_test.go create mode 100644 pkg/encoders/json/jsontext/doc.go create mode 100644 pkg/encoders/json/jsontext/encode.go create mode 100644 pkg/encoders/json/jsontext/encode_test.go create mode 100644 pkg/encoders/json/jsontext/errors.go create mode 100644 pkg/encoders/json/jsontext/example_test.go create mode 100644 pkg/encoders/json/jsontext/export.go create mode 100644 pkg/encoders/json/jsontext/fuzz_test.go create mode 100644 pkg/encoders/json/jsontext/options.go create mode 100644 pkg/encoders/json/jsontext/pools.go create mode 100644 pkg/encoders/json/jsontext/quote.go create mode 100644 pkg/encoders/json/jsontext/state.go create mode 100644 pkg/encoders/json/jsontext/state_test.go create mode 100644 pkg/encoders/json/jsontext/token.go create mode 100644 pkg/encoders/json/jsontext/token_test.go create mode 100644 pkg/encoders/json/jsontext/value.go create mode 100644 pkg/encoders/json/jsontext/value_test.go create mode 100644 pkg/encoders/json/number_test.go create mode 100644 pkg/encoders/json/scanner.go create mode 100644 pkg/encoders/json/scanner_test.go create mode 100644 pkg/encoders/json/stream.go create mode 100644 pkg/encoders/json/stream_test.go create mode 100644 pkg/encoders/json/tables.go create mode 100644 pkg/encoders/json/tagkey_test.go create mode 100644 pkg/encoders/json/tags.go create mode 100644 pkg/encoders/json/tags_test.go create mode 100644 pkg/encoders/json/v2_bench_test.go create mode 100644 pkg/encoders/json/v2_decode.go create mode 100644 pkg/encoders/json/v2_decode_test.go create mode 100644 pkg/encoders/json/v2_diff_test.go create mode 100644 pkg/encoders/json/v2_encode.go create mode 100644 pkg/encoders/json/v2_encode_test.go create mode 100644 pkg/encoders/json/v2_example_marshaling_test.go create mode 100644 pkg/encoders/json/v2_example_test.go create mode 100644 pkg/encoders/json/v2_example_text_marshaling_test.go create mode 100644 pkg/encoders/json/v2_fuzz_test.go create mode 100644 pkg/encoders/json/v2_indent.go create mode 100644 pkg/encoders/json/v2_inject.go create mode 100644 pkg/encoders/json/v2_options.go create mode 100644 pkg/encoders/json/v2_scanner.go create mode 100644 pkg/encoders/json/v2_scanner_test.go create mode 100644 pkg/encoders/json/v2_stream.go create mode 100644 pkg/encoders/json/v2_stream_test.go create mode 100644 pkg/encoders/json/v2_tagkey_test.go diff --git a/cmd/eventpool/eventpool.go b/cmd/eventpool/eventpool.go index bfd7079..9e3d4d4 100644 --- a/cmd/eventpool/eventpool.go +++ b/cmd/eventpool/eventpool.go @@ -9,6 +9,7 @@ import ( "lukechampine.com/frand" "next.orly.dev/pkg/encoders/event" "next.orly.dev/pkg/encoders/hex" + "next.orly.dev/pkg/encoders/json" "next.orly.dev/pkg/encoders/tag" "next.orly.dev/pkg/utils" "next.orly.dev/pkg/utils/bufpool" @@ -36,7 +37,7 @@ func main() { ev.Content = frand.Bytes(frand.Intn(1024) + 1) ev.Sig = frand.Bytes(64) // log.I.S(ev) - b, err := ev.MarshalJSON() + b, err := json.Marshal(ev) if chk.E(err) { return } @@ -44,11 +45,11 @@ func main() { bc = append(bc, b...) // log.I.F("%s", bc) ev2 := event.New() - if err = ev2.UnmarshalJSON(b); chk.E(err) { + if err = json.Unmarshal(b, ev2); chk.E(err) { return } var b2 []byte - if b2, err = ev.MarshalJSON(); err != nil { + if b2, err = json.Marshal(ev); err != nil { return } if !utils.FastEqual(bc, b2) { diff --git a/pkg/encoders/event/event.go b/pkg/encoders/event/event.go index 1b1e6a1..cdb154c 100644 --- a/pkg/encoders/event/event.go +++ b/pkg/encoders/event/event.go @@ -27,6 +27,9 @@ import ( // encode <, >, and & characters due to legacy bullcrap in the encoding/json // library. Either call MarshalJSON directly or use a json.Encoder with html // escaping disabled. +// +// Or import "next.orly.dev/pkg/encoders/json" and use json.Marshal which is the +// same as go 1.25 json v1 except with this one stupidity removed. type E struct { // ID is the SHA256 hash of the canonical encoding of the event in binary format diff --git a/pkg/encoders/event/event_test.go b/pkg/encoders/event/event_test.go index 3ad2503..8b41994 100644 --- a/pkg/encoders/event/event_test.go +++ b/pkg/encoders/event/event_test.go @@ -3,7 +3,6 @@ package event import ( "bufio" "bytes" - "encoding/json" "testing" "time" @@ -12,6 +11,7 @@ import ( "lukechampine.com/frand" "next.orly.dev/pkg/encoders/event/examples" "next.orly.dev/pkg/encoders/hex" + "next.orly.dev/pkg/encoders/json" "next.orly.dev/pkg/encoders/tag" "next.orly.dev/pkg/utils" "next.orly.dev/pkg/utils/bufpool" @@ -39,11 +39,9 @@ func TestMarshalJSONUnmarshalJSON(t *testing.T) { with line breaks and tabs and other stuff `) ev.Sig = frand.Bytes(64) - // log.I.S(ev) - // b, err := ev.MarshalJSON() var err error var b []byte - if b, err = ev.MarshalJSON(); chk.E(err) { + if b, err = json.Marshal(ev); chk.E(err) { t.Fatal(err) } var bc []byte @@ -53,7 +51,7 @@ func TestMarshalJSONUnmarshalJSON(t *testing.T) { t.Fatal(err) } var b2 []byte - if b2, err = ev2.MarshalJSON(); err != nil { + if b2, err = json.Marshal(ev2); err != nil { t.Fatal(err) } if !utils.FastEqual(bc, b2) { @@ -82,8 +80,8 @@ func TestExamplesCache(t *testing.T) { t.Fatal(err) } var b2 []byte - // can't use json.Marshal as it improperly escapes <, > and &. - if b2, err = ev.MarshalJSON(); err != nil { + // can't use encoding/json.Marshal as it improperly escapes <, > and &. + if b2, err = json.Marshal(ev); err != nil { t.Fatal(err) } if !utils.FastEqual(c, b2) { diff --git a/pkg/encoders/json/bench_test.go b/pkg/encoders/json/bench_test.go new file mode 100644 index 0000000..0471881 --- /dev/null +++ b/pkg/encoders/json/bench_test.go @@ -0,0 +1,583 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Large data benchmark. +// The JSON data is a summary of agl's changes in the +// go, webkit, and chromium open source projects. +// We benchmark converting between the JSON form +// and in-memory data structures. + +//go:build !goexperiment.jsonv2 + +package json + +import ( + "bytes" + "fmt" + "internal/testenv" + "internal/zstd" + "io" + "os" + "reflect" + "regexp" + "runtime" + "strings" + "sync" + "testing" +) + +type codeResponse struct { + Tree *codeNode `json:"tree"` + Username string `json:"username"` +} + +type codeNode struct { + Name string `json:"name"` + Kids []*codeNode `json:"kids"` + CLWeight float64 `json:"cl_weight"` + Touches int `json:"touches"` + MinT int64 `json:"min_t"` + MaxT int64 `json:"max_t"` + MeanT int64 `json:"mean_t"` +} + +var codeJSON []byte +var codeStruct codeResponse + +func codeInit() { + f, err := os.Open("internal/jsontest/testdata/golang_source.json.zst") + if err != nil { + panic(err) + } + defer f.Close() + gz := zstd.NewReader(f) + data, err := io.ReadAll(gz) + if err != nil { + panic(err) + } + + codeJSON = data + + if err := Unmarshal(codeJSON, &codeStruct); err != nil { + panic("unmarshal code.json: " + err.Error()) + } + + if data, err = Marshal(&codeStruct); err != nil { + panic("marshal code.json: " + err.Error()) + } + + if !bytes.Equal(data, codeJSON) { + println("different lengths", len(data), len(codeJSON)) + for i := 0; i < len(data) && i < len(codeJSON); i++ { + if data[i] != codeJSON[i] { + println("re-marshal: changed at byte", i) + println("orig: ", string(codeJSON[i-10:i+10])) + println("new: ", string(data[i-10:i+10])) + break + } + } + panic("re-marshal code.json: different result") + } +} + +func BenchmarkCodeEncoder(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + enc := NewEncoder(io.Discard) + for pb.Next() { + if err := enc.Encode(&codeStruct); err != nil { + b.Fatalf("Encode error: %v", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkCodeEncoderError(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + + // Trigger an error in Marshal with cyclic data. + type Dummy struct { + Name string + Next *Dummy + } + dummy := Dummy{Name: "Dummy"} + dummy.Next = &dummy + + b.RunParallel(func(pb *testing.PB) { + enc := NewEncoder(io.Discard) + for pb.Next() { + if err := enc.Encode(&codeStruct); err != nil { + b.Fatalf("Encode error: %v", err) + } + if _, err := Marshal(dummy); err == nil { + b.Fatal("Marshal error: got nil, want non-nil") + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkCodeMarshal(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if _, err := Marshal(&codeStruct); err != nil { + b.Fatalf("Marshal error: %v", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkCodeMarshalError(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + + // Trigger an error in Marshal with cyclic data. + type Dummy struct { + Name string + Next *Dummy + } + dummy := Dummy{Name: "Dummy"} + dummy.Next = &dummy + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if _, err := Marshal(&codeStruct); err != nil { + b.Fatalf("Marshal error: %v", err) + } + if _, err := Marshal(dummy); err == nil { + b.Fatal("Marshal error: got nil, want non-nil") + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func benchMarshalBytes(n int) func(*testing.B) { + sample := []byte("hello world") + // Use a struct pointer, to avoid an allocation when passing it as an + // interface parameter to Marshal. + v := &struct { + Bytes []byte + }{ + bytes.Repeat(sample, (n/len(sample))+1)[:n], + } + return func(b *testing.B) { + for i := 0; i < b.N; i++ { + if _, err := Marshal(v); err != nil { + b.Fatalf("Marshal error: %v", err) + } + } + } +} + +func benchMarshalBytesError(n int) func(*testing.B) { + sample := []byte("hello world") + // Use a struct pointer, to avoid an allocation when passing it as an + // interface parameter to Marshal. + v := &struct { + Bytes []byte + }{ + bytes.Repeat(sample, (n/len(sample))+1)[:n], + } + + // Trigger an error in Marshal with cyclic data. + type Dummy struct { + Name string + Next *Dummy + } + dummy := Dummy{Name: "Dummy"} + dummy.Next = &dummy + + return func(b *testing.B) { + for i := 0; i < b.N; i++ { + if _, err := Marshal(v); err != nil { + b.Fatalf("Marshal error: %v", err) + } + if _, err := Marshal(dummy); err == nil { + b.Fatal("Marshal error: got nil, want non-nil") + } + } + } +} + +func BenchmarkMarshalBytes(b *testing.B) { + b.ReportAllocs() + // 32 fits within encodeState.scratch. + b.Run("32", benchMarshalBytes(32)) + // 256 doesn't fit in encodeState.scratch, but is small enough to + // allocate and avoid the slower base64.NewEncoder. + b.Run("256", benchMarshalBytes(256)) + // 4096 is large enough that we want to avoid allocating for it. + b.Run("4096", benchMarshalBytes(4096)) +} + +func BenchmarkMarshalBytesError(b *testing.B) { + b.ReportAllocs() + // 32 fits within encodeState.scratch. + b.Run("32", benchMarshalBytesError(32)) + // 256 doesn't fit in encodeState.scratch, but is small enough to + // allocate and avoid the slower base64.NewEncoder. + b.Run("256", benchMarshalBytesError(256)) + // 4096 is large enough that we want to avoid allocating for it. + b.Run("4096", benchMarshalBytesError(4096)) +} + +func BenchmarkMarshalMap(b *testing.B) { + b.ReportAllocs() + m := map[string]int{ + "key3": 3, + "key2": 2, + "key1": 1, + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if _, err := Marshal(m); err != nil { + b.Fatal("Marshal:", err) + } + } + }) +} + +func BenchmarkCodeDecoder(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + var buf bytes.Buffer + dec := NewDecoder(&buf) + var r codeResponse + for pb.Next() { + buf.Write(codeJSON) + // hide EOF + buf.WriteByte('\n') + buf.WriteByte('\n') + buf.WriteByte('\n') + if err := dec.Decode(&r); err != nil { + b.Fatalf("Decode error: %v", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkUnicodeDecoder(b *testing.B) { + b.ReportAllocs() + j := []byte(`"\uD83D\uDE01"`) + b.SetBytes(int64(len(j))) + r := bytes.NewReader(j) + dec := NewDecoder(r) + var out string + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := dec.Decode(&out); err != nil { + b.Fatalf("Decode error: %v", err) + } + r.Seek(0, 0) + } +} + +func BenchmarkDecoderStream(b *testing.B) { + b.ReportAllocs() + b.StopTimer() + var buf bytes.Buffer + dec := NewDecoder(&buf) + buf.WriteString(`"` + strings.Repeat("x", 1000000) + `"` + "\n\n\n") + var x any + if err := dec.Decode(&x); err != nil { + b.Fatalf("Decode error: %v", err) + } + ones := strings.Repeat(" 1\n", 300000) + "\n\n\n" + b.StartTimer() + for i := 0; i < b.N; i++ { + if i%300000 == 0 { + buf.WriteString(ones) + } + x = nil + switch err := dec.Decode(&x); { + case err != nil: + b.Fatalf("Decode error: %v", err) + case x != 1.0: + b.Fatalf("Decode: got %v want 1.0", i) + } + } +} + +func BenchmarkCodeUnmarshal(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + var r codeResponse + if err := Unmarshal(codeJSON, &r); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkCodeUnmarshalReuse(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + var r codeResponse + for pb.Next() { + if err := Unmarshal(codeJSON, &r); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkUnmarshalString(b *testing.B) { + b.ReportAllocs() + data := []byte(`"hello, world"`) + b.RunParallel(func(pb *testing.PB) { + var s string + for pb.Next() { + if err := Unmarshal(data, &s); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) +} + +func BenchmarkUnmarshalFloat64(b *testing.B) { + b.ReportAllocs() + data := []byte(`3.14`) + b.RunParallel(func(pb *testing.PB) { + var f float64 + for pb.Next() { + if err := Unmarshal(data, &f); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) +} + +func BenchmarkUnmarshalInt64(b *testing.B) { + b.ReportAllocs() + data := []byte(`3`) + b.RunParallel(func(pb *testing.PB) { + var x int64 + for pb.Next() { + if err := Unmarshal(data, &x); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) +} + +func BenchmarkUnmarshalMap(b *testing.B) { + b.ReportAllocs() + data := []byte(`{"key1":"value1","key2":"value2","key3":"value3"}`) + b.RunParallel(func(pb *testing.PB) { + x := make(map[string]string, 3) + for pb.Next() { + if err := Unmarshal(data, &x); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) +} + +func BenchmarkIssue10335(b *testing.B) { + b.ReportAllocs() + j := []byte(`{"a":{ }}`) + b.RunParallel(func(pb *testing.PB) { + var s struct{} + for pb.Next() { + if err := Unmarshal(j, &s); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) +} + +func BenchmarkIssue34127(b *testing.B) { + b.ReportAllocs() + j := struct { + Bar string `json:"bar,string"` + }{ + Bar: `foobar`, + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if _, err := Marshal(&j); err != nil { + b.Fatalf("Marshal error: %v", err) + } + } + }) +} + +func BenchmarkUnmapped(b *testing.B) { + b.ReportAllocs() + j := []byte(`{"s": "hello", "y": 2, "o": {"x": 0}, "a": [1, 99, {"x": 1}]}`) + b.RunParallel(func(pb *testing.PB) { + var s struct{} + for pb.Next() { + if err := Unmarshal(j, &s); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) +} + +func BenchmarkTypeFieldsCache(b *testing.B) { + b.ReportAllocs() + var maxTypes int = 1e6 + if testenv.Builder() != "" { + maxTypes = 1e3 // restrict cache sizes on builders + } + + // Dynamically generate many new types. + types := make([]reflect.Type, maxTypes) + fs := []reflect.StructField{{ + Type: reflect.TypeFor[string](), + Index: []int{0}, + }} + for i := range types { + fs[0].Name = fmt.Sprintf("TypeFieldsCache%d", i) + types[i] = reflect.StructOf(fs) + } + + // clearClear clears the cache. Other JSON operations, must not be running. + clearCache := func() { + fieldCache = sync.Map{} + } + + // MissTypes tests the performance of repeated cache misses. + // This measures the time to rebuild a cache of size nt. + for nt := 1; nt <= maxTypes; nt *= 10 { + ts := types[:nt] + b.Run(fmt.Sprintf("MissTypes%d", nt), func(b *testing.B) { + nc := runtime.GOMAXPROCS(0) + for i := 0; i < b.N; i++ { + clearCache() + var wg sync.WaitGroup + for j := 0; j < nc; j++ { + wg.Add(1) + go func(j int) { + for _, t := range ts[(j*len(ts))/nc : ((j+1)*len(ts))/nc] { + cachedTypeFields(t) + } + wg.Done() + }(j) + } + wg.Wait() + } + }) + } + + // HitTypes tests the performance of repeated cache hits. + // This measures the average time of each cache lookup. + for nt := 1; nt <= maxTypes; nt *= 10 { + // Pre-warm a cache of size nt. + clearCache() + for _, t := range types[:nt] { + cachedTypeFields(t) + } + b.Run(fmt.Sprintf("HitTypes%d", nt), func(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + cachedTypeFields(types[0]) + } + }) + }) + } +} + +func BenchmarkEncodeMarshaler(b *testing.B) { + b.ReportAllocs() + + m := struct { + A int + B RawMessage + }{} + + b.RunParallel(func(pb *testing.PB) { + enc := NewEncoder(io.Discard) + + for pb.Next() { + if err := enc.Encode(&m); err != nil { + b.Fatalf("Encode error: %v", err) + } + } + }) +} + +func BenchmarkEncoderEncode(b *testing.B) { + b.ReportAllocs() + type T struct { + X, Y string + } + v := &T{"foo", "bar"} + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if err := NewEncoder(io.Discard).Encode(v); err != nil { + b.Fatalf("Encode error: %v", err) + } + } + }) +} + +func BenchmarkNumberIsValid(b *testing.B) { + s := "-61657.61667E+61673" + for i := 0; i < b.N; i++ { + isValidNumber(s) + } +} + +func BenchmarkNumberIsValidRegexp(b *testing.B) { + var jsonNumberRegexp = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`) + s := "-61657.61667E+61673" + for i := 0; i < b.N; i++ { + jsonNumberRegexp.MatchString(s) + } +} + +func BenchmarkUnmarshalNumber(b *testing.B) { + b.ReportAllocs() + data := []byte(`"-61657.61667E+61673"`) + var number Number + for i := 0; i < b.N; i++ { + if err := Unmarshal(data, &number); err != nil { + b.Fatal("Unmarshal:", err) + } + } +} diff --git a/pkg/encoders/json/decode.go b/pkg/encoders/json/decode.go new file mode 100644 index 0000000..70885a5 --- /dev/null +++ b/pkg/encoders/json/decode.go @@ -0,0 +1,1314 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Represents JSON data structure using native Go types: booleans, floats, +// strings, arrays, and maps. + +//go:build !goexperiment.jsonv2 + +package json + +import ( + "encoding" + "encoding/base64" + "fmt" + "reflect" + "strconv" + "strings" + "unicode" + "unicode/utf16" + "unicode/utf8" + _ "unsafe" // for linkname +) + +// Unmarshal parses the JSON-encoded data and stores the result +// in the value pointed to by v. If v is nil or not a pointer, +// Unmarshal returns an [InvalidUnmarshalError]. +// +// Unmarshal uses the inverse of the encodings that +// [Marshal] uses, allocating maps, slices, and pointers as necessary, +// with the following additional rules: +// +// To unmarshal JSON into a pointer, Unmarshal first handles the case of +// the JSON being the JSON literal null. In that case, Unmarshal sets +// the pointer to nil. Otherwise, Unmarshal unmarshals the JSON into +// the value pointed at by the pointer. If the pointer is nil, Unmarshal +// allocates a new value for it to point to. +// +// To unmarshal JSON into a value implementing [Unmarshaler], +// Unmarshal calls that value's [Unmarshaler.UnmarshalJSON] method, including +// when the input is a JSON null. +// Otherwise, if the value implements [encoding.TextUnmarshaler] +// and the input is a JSON quoted string, Unmarshal calls +// [encoding.TextUnmarshaler.UnmarshalText] with the unquoted form of the string. +// +// To unmarshal JSON into a struct, Unmarshal matches incoming object keys to +// the keys used by [Marshal] (either the struct field name or its tag), +// ignoring case. If multiple struct fields match an object key, an exact case +// match is preferred over a case-insensitive one. +// +// Incoming object members are processed in the order observed. If an object +// includes duplicate keys, later duplicates will replace or be merged into +// prior values. +// +// To unmarshal JSON into an interface value, +// Unmarshal stores one of these in the interface value: +// +// - bool, for JSON booleans +// - float64, for JSON numbers +// - string, for JSON strings +// - []any, for JSON arrays +// - map[string]any, for JSON objects +// - nil for JSON null +// +// To unmarshal a JSON array into a slice, Unmarshal resets the slice length +// to zero and then appends each element to the slice. +// As a special case, to unmarshal an empty JSON array into a slice, +// Unmarshal replaces the slice with a new empty slice. +// +// To unmarshal a JSON array into a Go array, Unmarshal decodes +// JSON array elements into corresponding Go array elements. +// If the Go array is smaller than the JSON array, +// the additional JSON array elements are discarded. +// If the JSON array is smaller than the Go array, +// the additional Go array elements are set to zero values. +// +// To unmarshal a JSON object into a map, Unmarshal first establishes a map to +// use. If the map is nil, Unmarshal allocates a new map. Otherwise Unmarshal +// reuses the existing map, keeping existing entries. Unmarshal then stores +// key-value pairs from the JSON object into the map. The map's key type must +// either be any string type, an integer, or implement [encoding.TextUnmarshaler]. +// +// If the JSON-encoded data contain a syntax error, Unmarshal returns a [SyntaxError]. +// +// If a JSON value is not appropriate for a given target type, +// or if a JSON number overflows the target type, Unmarshal +// skips that field and completes the unmarshaling as best it can. +// If no more serious errors are encountered, Unmarshal returns +// an [UnmarshalTypeError] describing the earliest such error. In any +// case, it's not guaranteed that all the remaining fields following +// the problematic one will be unmarshaled into the target object. +// +// The JSON null value unmarshals into an interface, map, pointer, or slice +// by setting that Go value to nil. Because null is often used in JSON to mean +// “not present,” unmarshaling a JSON null into any other Go type has no effect +// on the value and produces no error. +// +// When unmarshaling quoted strings, invalid UTF-8 or +// invalid UTF-16 surrogate pairs are not treated as an error. +// Instead, they are replaced by the Unicode replacement +// character U+FFFD. +func Unmarshal(data []byte, v any) error { + // Check for well-formedness. + // Avoids filling out half a data structure + // before discovering a JSON syntax error. + var d decodeState + err := checkValid(data, &d.scan) + if err != nil { + return err + } + + d.init(data) + return d.unmarshal(v) +} + +// Unmarshaler is the interface implemented by types +// that can unmarshal a JSON description of themselves. +// The input can be assumed to be a valid encoding of +// a JSON value. UnmarshalJSON must copy the JSON data +// if it wishes to retain the data after returning. +type Unmarshaler interface { + UnmarshalJSON([]byte) error +} + +// An UnmarshalTypeError describes a JSON value that was +// not appropriate for a value of a specific Go type. +type UnmarshalTypeError struct { + Value string // description of JSON value - "bool", "array", "number -5" + Type reflect.Type // type of Go value it could not be assigned to + Offset int64 // error occurred after reading Offset bytes + Struct string // name of the struct type containing the field + Field string // the full path from root node to the field, include embedded struct +} + +func (e *UnmarshalTypeError) Error() string { + if e.Struct != "" || e.Field != "" { + return "json: cannot unmarshal " + e.Value + " into Go struct field " + e.Struct + "." + e.Field + " of type " + e.Type.String() + } + return "json: cannot unmarshal " + e.Value + " into Go value of type " + e.Type.String() +} + +// An UnmarshalFieldError describes a JSON object key that +// led to an unexported (and therefore unwritable) struct field. +// +// Deprecated: No longer used; kept for compatibility. +type UnmarshalFieldError struct { + Key string + Type reflect.Type + Field reflect.StructField +} + +func (e *UnmarshalFieldError) Error() string { + return "json: cannot unmarshal object key " + strconv.Quote(e.Key) + " into unexported field " + e.Field.Name + " of type " + e.Type.String() +} + +// An InvalidUnmarshalError describes an invalid argument passed to [Unmarshal]. +// (The argument to [Unmarshal] must be a non-nil pointer.) +type InvalidUnmarshalError struct { + Type reflect.Type +} + +func (e *InvalidUnmarshalError) Error() string { + if e.Type == nil { + return "json: Unmarshal(nil)" + } + + if e.Type.Kind() != reflect.Pointer { + return "json: Unmarshal(non-pointer " + e.Type.String() + ")" + } + return "json: Unmarshal(nil " + e.Type.String() + ")" +} + +func (d *decodeState) unmarshal(v any) error { + rv := reflect.ValueOf(v) + if rv.Kind() != reflect.Pointer || rv.IsNil() { + return &InvalidUnmarshalError{reflect.TypeOf(v)} + } + + d.scan.reset() + d.scanWhile(scanSkipSpace) + // We decode rv not rv.Elem because the Unmarshaler interface + // test must be applied at the top level of the value. + err := d.value(rv) + if err != nil { + return d.addErrorContext(err) + } + return d.savedError +} + +// A Number represents a JSON number literal. +type Number string + +// String returns the literal text of the number. +func (n Number) String() string { return string(n) } + +// Float64 returns the number as a float64. +func (n Number) Float64() (float64, error) { + return strconv.ParseFloat(string(n), 64) +} + +// Int64 returns the number as an int64. +func (n Number) Int64() (int64, error) { + return strconv.ParseInt(string(n), 10, 64) +} + +// An errorContext provides context for type errors during decoding. +type errorContext struct { + Struct reflect.Type + FieldStack []string +} + +// decodeState represents the state while decoding a JSON value. +type decodeState struct { + data []byte + off int // next read offset in data + opcode int // last read result + scan scanner + errorContext *errorContext + savedError error + useNumber bool + disallowUnknownFields bool +} + +// readIndex returns the position of the last byte read. +func (d *decodeState) readIndex() int { + return d.off - 1 +} + +// phasePanicMsg is used as a panic message when we end up with something that +// shouldn't happen. It can indicate a bug in the JSON decoder, or that +// something is editing the data slice while the decoder executes. +const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?" + +func (d *decodeState) init(data []byte) *decodeState { + d.data = data + d.off = 0 + d.savedError = nil + if d.errorContext != nil { + d.errorContext.Struct = nil + // Reuse the allocated space for the FieldStack slice. + d.errorContext.FieldStack = d.errorContext.FieldStack[:0] + } + return d +} + +// saveError saves the first err it is called with, +// for reporting at the end of the unmarshal. +func (d *decodeState) saveError(err error) { + if d.savedError == nil { + d.savedError = d.addErrorContext(err) + } +} + +// addErrorContext returns a new error enhanced with information from d.errorContext +func (d *decodeState) addErrorContext(err error) error { + if d.errorContext != nil && (d.errorContext.Struct != nil || len(d.errorContext.FieldStack) > 0) { + switch err := err.(type) { + case *UnmarshalTypeError: + err.Struct = d.errorContext.Struct.Name() + fieldStack := d.errorContext.FieldStack + if err.Field != "" { + fieldStack = append(fieldStack, err.Field) + } + err.Field = strings.Join(fieldStack, ".") + } + } + return err +} + +// skip scans to the end of what was started. +func (d *decodeState) skip() { + s, data, i := &d.scan, d.data, d.off + depth := len(s.parseState) + for { + op := s.step(s, data[i]) + i++ + if len(s.parseState) < depth { + d.off = i + d.opcode = op + return + } + } +} + +// scanNext processes the byte at d.data[d.off]. +func (d *decodeState) scanNext() { + if d.off < len(d.data) { + d.opcode = d.scan.step(&d.scan, d.data[d.off]) + d.off++ + } else { + d.opcode = d.scan.eof() + d.off = len(d.data) + 1 // mark processed EOF with len+1 + } +} + +// scanWhile processes bytes in d.data[d.off:] until it +// receives a scan code not equal to op. +func (d *decodeState) scanWhile(op int) { + s, data, i := &d.scan, d.data, d.off + for i < len(data) { + newOp := s.step(s, data[i]) + i++ + if newOp != op { + d.opcode = newOp + d.off = i + return + } + } + + d.off = len(data) + 1 // mark processed EOF with len+1 + d.opcode = d.scan.eof() +} + +// rescanLiteral is similar to scanWhile(scanContinue), but it specialises the +// common case where we're decoding a literal. The decoder scans the input +// twice, once for syntax errors and to check the length of the value, and the +// second to perform the decoding. +// +// Only in the second step do we use decodeState to tokenize literals, so we +// know there aren't any syntax errors. We can take advantage of that knowledge, +// and scan a literal's bytes much more quickly. +func (d *decodeState) rescanLiteral() { + data, i := d.data, d.off +Switch: + switch data[i-1] { + case '"': // string + for ; i < len(data); i++ { + switch data[i] { + case '\\': + i++ // escaped char + case '"': + i++ // tokenize the closing quote too + break Switch + } + } + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number + for ; i < len(data); i++ { + switch data[i] { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '.', 'e', 'E', '+', '-': + default: + break Switch + } + } + case 't': // true + i += len("rue") + case 'f': // false + i += len("alse") + case 'n': // null + i += len("ull") + } + if i < len(data) { + d.opcode = stateEndValue(&d.scan, data[i]) + } else { + d.opcode = scanEnd + } + d.off = i + 1 +} + +// value consumes a JSON value from d.data[d.off-1:], decoding into v, and +// reads the following byte ahead. If v is invalid, the value is discarded. +// The first byte of the value has been read already. +func (d *decodeState) value(v reflect.Value) error { + switch d.opcode { + default: + panic(phasePanicMsg) + + case scanBeginArray: + if v.IsValid() { + if err := d.array(v); err != nil { + return err + } + } else { + d.skip() + } + d.scanNext() + + case scanBeginObject: + if v.IsValid() { + if err := d.object(v); err != nil { + return err + } + } else { + d.skip() + } + d.scanNext() + + case scanBeginLiteral: + // All bytes inside literal return scanContinue op code. + start := d.readIndex() + d.rescanLiteral() + + if v.IsValid() { + if err := d.literalStore(d.data[start:d.readIndex()], v, false); err != nil { + return err + } + } + } + return nil +} + +type unquotedValue struct{} + +// valueQuoted is like value but decodes a +// quoted string literal or literal null into an interface value. +// If it finds anything other than a quoted string literal or null, +// valueQuoted returns unquotedValue{}. +func (d *decodeState) valueQuoted() any { + switch d.opcode { + default: + panic(phasePanicMsg) + + case scanBeginArray, scanBeginObject: + d.skip() + d.scanNext() + + case scanBeginLiteral: + v := d.literalInterface() + switch v.(type) { + case nil, string: + return v + } + } + return unquotedValue{} +} + +// indirect walks down v allocating pointers as needed, +// until it gets to a non-pointer. +// If it encounters an Unmarshaler, indirect stops and returns that. +// If decodingNull is true, indirect stops at the first settable pointer so it +// can be set to nil. +func indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnmarshaler, reflect.Value) { + // Issue #24153 indicates that it is generally not a guaranteed property + // that you may round-trip a reflect.Value by calling Value.Addr().Elem() + // and expect the value to still be settable for values derived from + // unexported embedded struct fields. + // + // The logic below effectively does this when it first addresses the value + // (to satisfy possible pointer methods) and continues to dereference + // subsequent pointers as necessary. + // + // After the first round-trip, we set v back to the original value to + // preserve the original RW flags contained in reflect.Value. + v0 := v + haveAddr := false + + // If v is a named type and is addressable, + // start with its address, so that if the type has pointer methods, + // we find them. + if v.Kind() != reflect.Pointer && v.Type().Name() != "" && v.CanAddr() { + haveAddr = true + v = v.Addr() + } + for { + // Load value from interface, but only if the result will be + // usefully addressable. + if v.Kind() == reflect.Interface && !v.IsNil() { + e := v.Elem() + if e.Kind() == reflect.Pointer && !e.IsNil() && (!decodingNull || e.Elem().Kind() == reflect.Pointer) { + haveAddr = false + v = e + continue + } + } + + if v.Kind() != reflect.Pointer { + break + } + + if decodingNull && v.CanSet() { + break + } + + // Prevent infinite loop if v is an interface pointing to its own address: + // var v any + // v = &v + if v.Elem().Kind() == reflect.Interface && v.Elem().Elem().Equal(v) { + v = v.Elem() + break + } + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + if v.Type().NumMethod() > 0 && v.CanInterface() { + if u, ok := v.Interface().(Unmarshaler); ok { + return u, nil, reflect.Value{} + } + if !decodingNull { + if u, ok := v.Interface().(encoding.TextUnmarshaler); ok { + return nil, u, reflect.Value{} + } + } + } + + if haveAddr { + v = v0 // restore original value after round-trip Value.Addr().Elem() + haveAddr = false + } else { + v = v.Elem() + } + } + return nil, nil, v +} + +// array consumes an array from d.data[d.off-1:], decoding into v. +// The first byte of the array ('[') has been read already. +func (d *decodeState) array(v reflect.Value) error { + // Check for unmarshaler. + u, ut, pv := indirect(v, false) + if u != nil { + start := d.readIndex() + d.skip() + return u.UnmarshalJSON(d.data[start:d.off]) + } + if ut != nil { + d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) + d.skip() + return nil + } + v = pv + + // Check type of target. + switch v.Kind() { + case reflect.Interface: + if v.NumMethod() == 0 { + // Decoding into nil interface? Switch to non-reflect code. + ai := d.arrayInterface() + v.Set(reflect.ValueOf(ai)) + return nil + } + // Otherwise it's invalid. + fallthrough + default: + d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) + d.skip() + return nil + case reflect.Array, reflect.Slice: + break + } + + i := 0 + for { + // Look ahead for ] - can only happen on first iteration. + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndArray { + break + } + + // Expand slice length, growing the slice if necessary. + if v.Kind() == reflect.Slice { + if i >= v.Cap() { + v.Grow(1) + } + if i >= v.Len() { + v.SetLen(i + 1) + } + } + + if i < v.Len() { + // Decode into element. + if err := d.value(v.Index(i)); err != nil { + return err + } + } else { + // Ran out of fixed array: skip. + if err := d.value(reflect.Value{}); err != nil { + return err + } + } + i++ + + // Next token must be , or ]. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndArray { + break + } + if d.opcode != scanArrayValue { + panic(phasePanicMsg) + } + } + + if i < v.Len() { + if v.Kind() == reflect.Array { + for ; i < v.Len(); i++ { + v.Index(i).SetZero() // zero remainder of array + } + } else { + v.SetLen(i) // truncate the slice + } + } + if i == 0 && v.Kind() == reflect.Slice { + v.Set(reflect.MakeSlice(v.Type(), 0, 0)) + } + return nil +} + +var nullLiteral = []byte("null") +var textUnmarshalerType = reflect.TypeFor[encoding.TextUnmarshaler]() + +// object consumes an object from d.data[d.off-1:], decoding into v. +// The first byte ('{') of the object has been read already. +func (d *decodeState) object(v reflect.Value) error { + // Check for unmarshaler. + u, ut, pv := indirect(v, false) + if u != nil { + start := d.readIndex() + d.skip() + return u.UnmarshalJSON(d.data[start:d.off]) + } + if ut != nil { + d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)}) + d.skip() + return nil + } + v = pv + t := v.Type() + + // Decoding into nil interface? Switch to non-reflect code. + if v.Kind() == reflect.Interface && v.NumMethod() == 0 { + oi := d.objectInterface() + v.Set(reflect.ValueOf(oi)) + return nil + } + + var fields structFields + + // Check type of target: + // struct or + // map[T1]T2 where T1 is string, an integer type, + // or an encoding.TextUnmarshaler + switch v.Kind() { + case reflect.Map: + // Map key must either have string kind, have an integer kind, + // or be an encoding.TextUnmarshaler. + switch t.Key().Kind() { + case reflect.String, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + default: + if !reflect.PointerTo(t.Key()).Implements(textUnmarshalerType) { + d.saveError(&UnmarshalTypeError{Value: "object", Type: t, Offset: int64(d.off)}) + d.skip() + return nil + } + } + if v.IsNil() { + v.Set(reflect.MakeMap(t)) + } + case reflect.Struct: + fields = cachedTypeFields(t) + // ok + default: + d.saveError(&UnmarshalTypeError{Value: "object", Type: t, Offset: int64(d.off)}) + d.skip() + return nil + } + + var mapElem reflect.Value + var origErrorContext errorContext + if d.errorContext != nil { + origErrorContext = *d.errorContext + } + + for { + // Read opening " of string key or closing }. + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndObject { + // closing } - can only happen on first iteration. + break + } + if d.opcode != scanBeginLiteral { + panic(phasePanicMsg) + } + + // Read key. + start := d.readIndex() + d.rescanLiteral() + item := d.data[start:d.readIndex()] + key, ok := unquoteBytes(item) + if !ok { + panic(phasePanicMsg) + } + + // Figure out field corresponding to key. + var subv reflect.Value + destring := false // whether the value is wrapped in a string to be decoded first + + if v.Kind() == reflect.Map { + elemType := t.Elem() + if !mapElem.IsValid() { + mapElem = reflect.New(elemType).Elem() + } else { + mapElem.SetZero() + } + subv = mapElem + } else { + f := fields.byExactName[string(key)] + if f == nil { + f = fields.byFoldedName[string(foldName(key))] + } + if f != nil { + subv = v + destring = f.quoted + if d.errorContext == nil { + d.errorContext = new(errorContext) + } + for i, ind := range f.index { + if subv.Kind() == reflect.Pointer { + if subv.IsNil() { + // If a struct embeds a pointer to an unexported type, + // it is not possible to set a newly allocated value + // since the field is unexported. + // + // See https://golang.org/issue/21357 + if !subv.CanSet() { + d.saveError(fmt.Errorf("json: cannot set embedded pointer to unexported struct: %v", subv.Type().Elem())) + // Invalidate subv to ensure d.value(subv) skips over + // the JSON value without assigning it to subv. + subv = reflect.Value{} + destring = false + break + } + subv.Set(reflect.New(subv.Type().Elem())) + } + subv = subv.Elem() + } + if i < len(f.index)-1 { + d.errorContext.FieldStack = append( + d.errorContext.FieldStack, + subv.Type().Field(ind).Name, + ) + } + subv = subv.Field(ind) + } + d.errorContext.Struct = t + d.errorContext.FieldStack = append(d.errorContext.FieldStack, f.name) + } else if d.disallowUnknownFields { + d.saveError(fmt.Errorf("json: unknown field %q", key)) + } + } + + // Read : before value. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode != scanObjectKey { + panic(phasePanicMsg) + } + d.scanWhile(scanSkipSpace) + + if destring { + switch qv := d.valueQuoted().(type) { + case nil: + if err := d.literalStore(nullLiteral, subv, false); err != nil { + return err + } + case string: + if err := d.literalStore([]byte(qv), subv, true); err != nil { + return err + } + default: + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into %v", subv.Type())) + } + } else { + if err := d.value(subv); err != nil { + return err + } + } + + // Write value back to map; + // if using struct, subv points into struct already. + if v.Kind() == reflect.Map { + kt := t.Key() + var kv reflect.Value + if reflect.PointerTo(kt).Implements(textUnmarshalerType) { + kv = reflect.New(kt) + if err := d.literalStore(item, kv, true); err != nil { + return err + } + kv = kv.Elem() + } else { + switch kt.Kind() { + case reflect.String: + kv = reflect.New(kt).Elem() + kv.SetString(string(key)) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + s := string(key) + n, err := strconv.ParseInt(s, 10, 64) + if err != nil || kt.OverflowInt(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)}) + break + } + kv = reflect.New(kt).Elem() + kv.SetInt(n) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + s := string(key) + n, err := strconv.ParseUint(s, 10, 64) + if err != nil || kt.OverflowUint(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)}) + break + } + kv = reflect.New(kt).Elem() + kv.SetUint(n) + default: + panic("json: Unexpected key type") // should never occur + } + } + if kv.IsValid() { + v.SetMapIndex(kv, subv) + } + } + + // Next token must be , or }. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.errorContext != nil { + // Reset errorContext to its original state. + // Keep the same underlying array for FieldStack, to reuse the + // space and avoid unnecessary allocs. + d.errorContext.FieldStack = d.errorContext.FieldStack[:len(origErrorContext.FieldStack)] + d.errorContext.Struct = origErrorContext.Struct + } + if d.opcode == scanEndObject { + break + } + if d.opcode != scanObjectValue { + panic(phasePanicMsg) + } + } + return nil +} + +// convertNumber converts the number literal s to a float64 or a Number +// depending on the setting of d.useNumber. +func (d *decodeState) convertNumber(s string) (any, error) { + if d.useNumber { + return Number(s), nil + } + f, err := strconv.ParseFloat(s, 64) + if err != nil { + return nil, &UnmarshalTypeError{Value: "number " + s, Type: reflect.TypeFor[float64](), Offset: int64(d.off)} + } + return f, nil +} + +var numberType = reflect.TypeFor[Number]() + +// literalStore decodes a literal stored in item into v. +// +// fromQuoted indicates whether this literal came from unwrapping a +// string from the ",string" struct tag option. this is used only to +// produce more helpful error messages. +func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool) error { + // Check for unmarshaler. + if len(item) == 0 { + // Empty string given. + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + return nil + } + isNull := item[0] == 'n' // null + u, ut, pv := indirect(v, isNull) + if u != nil { + return u.UnmarshalJSON(item) + } + if ut != nil { + if item[0] != '"' { + if fromQuoted { + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + return nil + } + val := "number" + switch item[0] { + case 'n': + val = "null" + case 't', 'f': + val = "bool" + } + d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())}) + return nil + } + s, ok := unquoteBytes(item) + if !ok { + if fromQuoted { + return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) + } + panic(phasePanicMsg) + } + return ut.UnmarshalText(s) + } + + v = pv + + switch c := item[0]; c { + case 'n': // null + // The main parser checks that only true and false can reach here, + // but if this was a quoted string input, it could be anything. + if fromQuoted && string(item) != "null" { + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + break + } + switch v.Kind() { + case reflect.Interface, reflect.Pointer, reflect.Map, reflect.Slice: + v.SetZero() + // otherwise, ignore null for primitives/string + } + case 't', 'f': // true, false + value := item[0] == 't' + // The main parser checks that only true and false can reach here, + // but if this was a quoted string input, it could be anything. + if fromQuoted && string(item) != "true" && string(item) != "false" { + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + break + } + switch v.Kind() { + default: + if fromQuoted { + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + } else { + d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())}) + } + case reflect.Bool: + v.SetBool(value) + case reflect.Interface: + if v.NumMethod() == 0 { + v.Set(reflect.ValueOf(value)) + } else { + d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())}) + } + } + + case '"': // string + s, ok := unquoteBytes(item) + if !ok { + if fromQuoted { + return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) + } + panic(phasePanicMsg) + } + switch v.Kind() { + default: + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) + case reflect.Slice: + if v.Type().Elem().Kind() != reflect.Uint8 { + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + b := make([]byte, base64.StdEncoding.DecodedLen(len(s))) + n, err := base64.StdEncoding.Decode(b, s) + if err != nil { + d.saveError(err) + break + } + v.SetBytes(b[:n]) + case reflect.String: + t := string(s) + if v.Type() == numberType && !isValidNumber(t) { + return fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", item) + } + v.SetString(t) + case reflect.Interface: + if v.NumMethod() == 0 { + v.Set(reflect.ValueOf(string(s))) + } else { + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) + } + } + + default: // number + if c != '-' && (c < '0' || c > '9') { + if fromQuoted { + return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) + } + panic(phasePanicMsg) + } + switch v.Kind() { + default: + if v.Kind() == reflect.String && v.Type() == numberType { + // s must be a valid number, because it's + // already been tokenized. + v.SetString(string(item)) + break + } + if fromQuoted { + return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) + } + d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())}) + case reflect.Interface: + n, err := d.convertNumber(string(item)) + if err != nil { + d.saveError(err) + break + } + if v.NumMethod() != 0 { + d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + v.Set(reflect.ValueOf(n)) + + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + n, err := strconv.ParseInt(string(item), 10, 64) + if err != nil || v.OverflowInt(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + v.SetInt(n) + + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + n, err := strconv.ParseUint(string(item), 10, 64) + if err != nil || v.OverflowUint(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + v.SetUint(n) + + case reflect.Float32, reflect.Float64: + n, err := strconv.ParseFloat(string(item), v.Type().Bits()) + if err != nil || v.OverflowFloat(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + v.SetFloat(n) + } + } + return nil +} + +// The xxxInterface routines build up a value to be stored +// in an empty interface. They are not strictly necessary, +// but they avoid the weight of reflection in this common case. + +// valueInterface is like value but returns any. +func (d *decodeState) valueInterface() (val any) { + switch d.opcode { + default: + panic(phasePanicMsg) + case scanBeginArray: + val = d.arrayInterface() + d.scanNext() + case scanBeginObject: + val = d.objectInterface() + d.scanNext() + case scanBeginLiteral: + val = d.literalInterface() + } + return +} + +// arrayInterface is like array but returns []any. +func (d *decodeState) arrayInterface() []any { + var v = make([]any, 0) + for { + // Look ahead for ] - can only happen on first iteration. + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndArray { + break + } + + v = append(v, d.valueInterface()) + + // Next token must be , or ]. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndArray { + break + } + if d.opcode != scanArrayValue { + panic(phasePanicMsg) + } + } + return v +} + +// objectInterface is like object but returns map[string]any. +func (d *decodeState) objectInterface() map[string]any { + m := make(map[string]any) + for { + // Read opening " of string key or closing }. + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndObject { + // closing } - can only happen on first iteration. + break + } + if d.opcode != scanBeginLiteral { + panic(phasePanicMsg) + } + + // Read string key. + start := d.readIndex() + d.rescanLiteral() + item := d.data[start:d.readIndex()] + key, ok := unquote(item) + if !ok { + panic(phasePanicMsg) + } + + // Read : before value. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode != scanObjectKey { + panic(phasePanicMsg) + } + d.scanWhile(scanSkipSpace) + + // Read value. + m[key] = d.valueInterface() + + // Next token must be , or }. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndObject { + break + } + if d.opcode != scanObjectValue { + panic(phasePanicMsg) + } + } + return m +} + +// literalInterface consumes and returns a literal from d.data[d.off-1:] and +// it reads the following byte ahead. The first byte of the literal has been +// read already (that's how the caller knows it's a literal). +func (d *decodeState) literalInterface() any { + // All bytes inside literal return scanContinue op code. + start := d.readIndex() + d.rescanLiteral() + + item := d.data[start:d.readIndex()] + + switch c := item[0]; c { + case 'n': // null + return nil + + case 't', 'f': // true, false + return c == 't' + + case '"': // string + s, ok := unquote(item) + if !ok { + panic(phasePanicMsg) + } + return s + + default: // number + if c != '-' && (c < '0' || c > '9') { + panic(phasePanicMsg) + } + n, err := d.convertNumber(string(item)) + if err != nil { + d.saveError(err) + } + return n + } +} + +// getu4 decodes \uXXXX from the beginning of s, returning the hex value, +// or it returns -1. +func getu4(s []byte) rune { + if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { + return -1 + } + var r rune + for _, c := range s[2:6] { + switch { + case '0' <= c && c <= '9': + c = c - '0' + case 'a' <= c && c <= 'f': + c = c - 'a' + 10 + case 'A' <= c && c <= 'F': + c = c - 'A' + 10 + default: + return -1 + } + r = r*16 + rune(c) + } + return r +} + +// unquote converts a quoted JSON string literal s into an actual string t. +// The rules are different than for Go, so cannot use strconv.Unquote. +func unquote(s []byte) (t string, ok bool) { + s, ok = unquoteBytes(s) + t = string(s) + return +} + +// unquoteBytes should be an internal detail, +// but widely used packages access it using linkname. +// Notable members of the hall of shame include: +// - github.com/bytedance/sonic +// +// Do not remove or change the type signature. +// See go.dev/issue/67401. +// +//go:linkname unquoteBytes +func unquoteBytes(s []byte) (t []byte, ok bool) { + if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { + return + } + s = s[1 : len(s)-1] + + // Check for unusual characters. If there are none, + // then no unquoting is needed, so return a slice of the + // original bytes. + r := 0 + for r < len(s) { + c := s[r] + if c == '\\' || c == '"' || c < ' ' { + break + } + if c < utf8.RuneSelf { + r++ + continue + } + rr, size := utf8.DecodeRune(s[r:]) + if rr == utf8.RuneError && size == 1 { + break + } + r += size + } + if r == len(s) { + return s, true + } + + b := make([]byte, len(s)+2*utf8.UTFMax) + w := copy(b, s[0:r]) + for r < len(s) { + // Out of room? Can only happen if s is full of + // malformed UTF-8 and we're replacing each + // byte with RuneError. + if w >= len(b)-2*utf8.UTFMax { + nb := make([]byte, (len(b)+utf8.UTFMax)*2) + copy(nb, b[0:w]) + b = nb + } + switch c := s[r]; { + case c == '\\': + r++ + if r >= len(s) { + return + } + switch s[r] { + default: + return + case '"', '\\', '/', '\'': + b[w] = s[r] + r++ + w++ + case 'b': + b[w] = '\b' + r++ + w++ + case 'f': + b[w] = '\f' + r++ + w++ + case 'n': + b[w] = '\n' + r++ + w++ + case 'r': + b[w] = '\r' + r++ + w++ + case 't': + b[w] = '\t' + r++ + w++ + case 'u': + r-- + rr := getu4(s[r:]) + if rr < 0 { + return + } + r += 6 + if utf16.IsSurrogate(rr) { + rr1 := getu4(s[r:]) + if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { + // A valid pair; consume. + r += 6 + w += utf8.EncodeRune(b[w:], dec) + break + } + // Invalid surrogate; fall back to replacement rune. + rr = unicode.ReplacementChar + } + w += utf8.EncodeRune(b[w:], rr) + } + + // Quote, control characters are invalid. + case c == '"', c < ' ': + return + + // ASCII + case c < utf8.RuneSelf: + b[w] = c + r++ + w++ + + // Coerce to well-formed UTF-8. + default: + rr, size := utf8.DecodeRune(s[r:]) + r += size + w += utf8.EncodeRune(b[w:], rr) + } + } + return b[0:w], true +} diff --git a/pkg/encoders/json/decode_test.go b/pkg/encoders/json/decode_test.go new file mode 100644 index 0000000..0df31c8 --- /dev/null +++ b/pkg/encoders/json/decode_test.go @@ -0,0 +1,2830 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !goexperiment.jsonv2 + +package json + +import ( + "bytes" + "encoding" + "errors" + "fmt" + "image" + "io" + "maps" + "math" + "math/big" + "net" + "reflect" + "slices" + "strconv" + "strings" + "testing" + "time" +) + +type T struct { + X string + Y int + Z int `json:"-"` +} + +type U struct { + Alphabet string `json:"alpha"` +} + +type V struct { + F1 any + F2 int32 + F3 Number + F4 *VOuter +} + +type VOuter struct { + V V +} + +type W struct { + S SS +} + +type P struct { + PP PP +} + +type PP struct { + T T + Ts []T +} + +type SS string + +func (*SS) UnmarshalJSON(data []byte) error { + return &UnmarshalTypeError{Value: "number", Type: reflect.TypeFor[SS]()} +} + +type TAlias T + +func (tt *TAlias) UnmarshalJSON(data []byte) error { + t := T{} + if err := Unmarshal(data, &t); err != nil { + return err + } + *tt = TAlias(t) + return nil +} + +type TOuter struct { + T TAlias +} + +// ifaceNumAsFloat64/ifaceNumAsNumber are used to test unmarshaling with and +// without UseNumber +var ifaceNumAsFloat64 = map[string]any{ + "k1": float64(1), + "k2": "s", + "k3": []any{float64(1), float64(2.0), float64(3e-3)}, + "k4": map[string]any{"kk1": "s", "kk2": float64(2)}, +} + +var ifaceNumAsNumber = map[string]any{ + "k1": Number("1"), + "k2": "s", + "k3": []any{Number("1"), Number("2.0"), Number("3e-3")}, + "k4": map[string]any{"kk1": "s", "kk2": Number("2")}, +} + +type tx struct { + x int +} + +type u8 uint8 + +// A type that can unmarshal itself. + +type unmarshaler struct { + T bool +} + +func (u *unmarshaler) UnmarshalJSON(b []byte) error { + *u = unmarshaler{true} // All we need to see that UnmarshalJSON is called. + return nil +} + +type ustruct struct { + M unmarshaler +} + +type unmarshalerText struct { + A, B string +} + +// needed for re-marshaling tests +func (u unmarshalerText) MarshalText() ([]byte, error) { + return []byte(u.A + ":" + u.B), nil +} + +func (u *unmarshalerText) UnmarshalText(b []byte) error { + pos := bytes.IndexByte(b, ':') + if pos == -1 { + return errors.New("missing separator") + } + u.A, u.B = string(b[:pos]), string(b[pos+1:]) + return nil +} + +var _ encoding.TextUnmarshaler = (*unmarshalerText)(nil) + +type ustructText struct { + M unmarshalerText +} + +// u8marshal is an integer type that can marshal/unmarshal itself. +type u8marshal uint8 + +func (u8 u8marshal) MarshalText() ([]byte, error) { + return []byte(fmt.Sprintf("u%d", u8)), nil +} + +var errMissingU8Prefix = errors.New("missing 'u' prefix") + +func (u8 *u8marshal) UnmarshalText(b []byte) error { + if !bytes.HasPrefix(b, []byte{'u'}) { + return errMissingU8Prefix + } + n, err := strconv.Atoi(string(b[1:])) + if err != nil { + return err + } + *u8 = u8marshal(n) + return nil +} + +var _ encoding.TextUnmarshaler = (*u8marshal)(nil) + +var ( + umtrue = unmarshaler{true} + umslice = []unmarshaler{{true}} + umstruct = ustruct{unmarshaler{true}} + + umtrueXY = unmarshalerText{"x", "y"} + umsliceXY = []unmarshalerText{{"x", "y"}} + umstructXY = ustructText{unmarshalerText{"x", "y"}} + + ummapXY = map[unmarshalerText]bool{{"x", "y"}: true} +) + +// Test data structures for anonymous fields. + +type Point struct { + Z int +} + +type Top struct { + Level0 int + Embed0 + *Embed0a + *Embed0b `json:"e,omitempty"` // treated as named + Embed0c `json:"-"` // ignored + Loop + Embed0p // has Point with X, Y, used + Embed0q // has Point with Z, used + embed // contains exported field +} + +type Embed0 struct { + Level1a int // overridden by Embed0a's Level1a with json tag + Level1b int // used because Embed0a's Level1b is renamed + Level1c int // used because Embed0a's Level1c is ignored + Level1d int // annihilated by Embed0a's Level1d + Level1e int `json:"x"` // annihilated by Embed0a.Level1e +} + +type Embed0a struct { + Level1a int `json:"Level1a,omitempty"` + Level1b int `json:"LEVEL1B,omitempty"` + Level1c int `json:"-"` + Level1d int // annihilated by Embed0's Level1d + Level1f int `json:"x"` // annihilated by Embed0's Level1e +} + +type Embed0b Embed0 + +type Embed0c Embed0 + +type Embed0p struct { + image.Point +} + +type Embed0q struct { + Point +} + +type embed struct { + Q int +} + +type Loop struct { + Loop1 int `json:",omitempty"` + Loop2 int `json:",omitempty"` + *Loop +} + +// From reflect test: +// The X in S6 and S7 annihilate, but they also block the X in S8.S9. +type S5 struct { + S6 + S7 + S8 +} + +type S6 struct { + X int +} + +type S7 S6 + +type S8 struct { + S9 +} + +type S9 struct { + X int + Y int +} + +// From reflect test: +// The X in S11.S6 and S12.S6 annihilate, but they also block the X in S13.S8.S9. +type S10 struct { + S11 + S12 + S13 +} + +type S11 struct { + S6 +} + +type S12 struct { + S6 +} + +type S13 struct { + S8 +} + +type Ambig struct { + // Given "hello", the first match should win. + First int `json:"HELLO"` + Second int `json:"Hello"` +} + +type XYZ struct { + X any + Y any + Z any +} + +type unexportedWithMethods struct{} + +func (unexportedWithMethods) F() {} + +type byteWithMarshalJSON byte + +func (b byteWithMarshalJSON) MarshalJSON() ([]byte, error) { + return []byte(fmt.Sprintf(`"Z%.2x"`, byte(b))), nil +} + +func (b *byteWithMarshalJSON) UnmarshalJSON(data []byte) error { + if len(data) != 5 || data[0] != '"' || data[1] != 'Z' || data[4] != '"' { + return fmt.Errorf("bad quoted string") + } + i, err := strconv.ParseInt(string(data[2:4]), 16, 8) + if err != nil { + return fmt.Errorf("bad hex") + } + *b = byteWithMarshalJSON(i) + return nil +} + +type byteWithPtrMarshalJSON byte + +func (b *byteWithPtrMarshalJSON) MarshalJSON() ([]byte, error) { + return byteWithMarshalJSON(*b).MarshalJSON() +} + +func (b *byteWithPtrMarshalJSON) UnmarshalJSON(data []byte) error { + return (*byteWithMarshalJSON)(b).UnmarshalJSON(data) +} + +type byteWithMarshalText byte + +func (b byteWithMarshalText) MarshalText() ([]byte, error) { + return []byte(fmt.Sprintf(`Z%.2x`, byte(b))), nil +} + +func (b *byteWithMarshalText) UnmarshalText(data []byte) error { + if len(data) != 3 || data[0] != 'Z' { + return fmt.Errorf("bad quoted string") + } + i, err := strconv.ParseInt(string(data[1:3]), 16, 8) + if err != nil { + return fmt.Errorf("bad hex") + } + *b = byteWithMarshalText(i) + return nil +} + +type byteWithPtrMarshalText byte + +func (b *byteWithPtrMarshalText) MarshalText() ([]byte, error) { + return byteWithMarshalText(*b).MarshalText() +} + +func (b *byteWithPtrMarshalText) UnmarshalText(data []byte) error { + return (*byteWithMarshalText)(b).UnmarshalText(data) +} + +type intWithMarshalJSON int + +func (b intWithMarshalJSON) MarshalJSON() ([]byte, error) { + return []byte(fmt.Sprintf(`"Z%.2x"`, int(b))), nil +} + +func (b *intWithMarshalJSON) UnmarshalJSON(data []byte) error { + if len(data) != 5 || data[0] != '"' || data[1] != 'Z' || data[4] != '"' { + return fmt.Errorf("bad quoted string") + } + i, err := strconv.ParseInt(string(data[2:4]), 16, 8) + if err != nil { + return fmt.Errorf("bad hex") + } + *b = intWithMarshalJSON(i) + return nil +} + +type intWithPtrMarshalJSON int + +func (b *intWithPtrMarshalJSON) MarshalJSON() ([]byte, error) { + return intWithMarshalJSON(*b).MarshalJSON() +} + +func (b *intWithPtrMarshalJSON) UnmarshalJSON(data []byte) error { + return (*intWithMarshalJSON)(b).UnmarshalJSON(data) +} + +type intWithMarshalText int + +func (b intWithMarshalText) MarshalText() ([]byte, error) { + return []byte(fmt.Sprintf(`Z%.2x`, int(b))), nil +} + +func (b *intWithMarshalText) UnmarshalText(data []byte) error { + if len(data) != 3 || data[0] != 'Z' { + return fmt.Errorf("bad quoted string") + } + i, err := strconv.ParseInt(string(data[1:3]), 16, 8) + if err != nil { + return fmt.Errorf("bad hex") + } + *b = intWithMarshalText(i) + return nil +} + +type intWithPtrMarshalText int + +func (b *intWithPtrMarshalText) MarshalText() ([]byte, error) { + return intWithMarshalText(*b).MarshalText() +} + +func (b *intWithPtrMarshalText) UnmarshalText(data []byte) error { + return (*intWithMarshalText)(b).UnmarshalText(data) +} + +type mapStringToStringData struct { + Data map[string]string `json:"data"` +} + +type B struct { + B bool `json:",string"` +} + +type DoublePtr struct { + I **int + J **int +} + +var unmarshalTests = []struct { + CaseName + in string + ptr any // new(type) + out any + err error + useNumber bool + golden bool + disallowUnknownFields bool +}{ + // basic types + {CaseName: Name(""), in: `true`, ptr: new(bool), out: true}, + {CaseName: Name(""), in: `1`, ptr: new(int), out: 1}, + {CaseName: Name(""), in: `1.2`, ptr: new(float64), out: 1.2}, + {CaseName: Name(""), in: `-5`, ptr: new(int16), out: int16(-5)}, + {CaseName: Name(""), in: `2`, ptr: new(Number), out: Number("2"), useNumber: true}, + {CaseName: Name(""), in: `2`, ptr: new(Number), out: Number("2")}, + {CaseName: Name(""), in: `2`, ptr: new(any), out: float64(2.0)}, + {CaseName: Name(""), in: `2`, ptr: new(any), out: Number("2"), useNumber: true}, + {CaseName: Name(""), in: `"a\u1234"`, ptr: new(string), out: "a\u1234"}, + {CaseName: Name(""), in: `"http:\/\/"`, ptr: new(string), out: "http://"}, + {CaseName: Name(""), in: `"g-clef: \uD834\uDD1E"`, ptr: new(string), out: "g-clef: \U0001D11E"}, + {CaseName: Name(""), in: `"invalid: \uD834x\uDD1E"`, ptr: new(string), out: "invalid: \uFFFDx\uFFFD"}, + {CaseName: Name(""), in: "null", ptr: new(any), out: nil}, + {CaseName: Name(""), in: `{"X": [1,2,3], "Y": 4}`, ptr: new(T), out: T{Y: 4}, err: &UnmarshalTypeError{"array", reflect.TypeFor[string](), 7, "T", "X"}}, + {CaseName: Name(""), in: `{"X": 23}`, ptr: new(T), out: T{}, err: &UnmarshalTypeError{"number", reflect.TypeFor[string](), 8, "T", "X"}}, + {CaseName: Name(""), in: `{"x": 1}`, ptr: new(tx), out: tx{}}, + {CaseName: Name(""), in: `{"x": 1}`, ptr: new(tx), out: tx{}}, + {CaseName: Name(""), in: `{"x": 1}`, ptr: new(tx), err: fmt.Errorf("json: unknown field \"x\""), disallowUnknownFields: true}, + {CaseName: Name(""), in: `{"S": 23}`, ptr: new(W), out: W{}, err: &UnmarshalTypeError{"number", reflect.TypeFor[SS](), 0, "W", "S"}}, + {CaseName: Name(""), in: `{"T": {"X": 23}}`, ptr: new(TOuter), out: TOuter{}, err: &UnmarshalTypeError{"number", reflect.TypeFor[string](), 8, "TOuter", "T.X"}}, + {CaseName: Name(""), in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: float64(1), F2: int32(2), F3: Number("3")}}, + {CaseName: Name(""), in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: Number("1"), F2: int32(2), F3: Number("3")}, useNumber: true}, + {CaseName: Name(""), in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(any), out: ifaceNumAsFloat64}, + {CaseName: Name(""), in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(any), out: ifaceNumAsNumber, useNumber: true}, + + // raw values with whitespace + {CaseName: Name(""), in: "\n true ", ptr: new(bool), out: true}, + {CaseName: Name(""), in: "\t 1 ", ptr: new(int), out: 1}, + {CaseName: Name(""), in: "\r 1.2 ", ptr: new(float64), out: 1.2}, + {CaseName: Name(""), in: "\t -5 \n", ptr: new(int16), out: int16(-5)}, + {CaseName: Name(""), in: "\t \"a\\u1234\" \n", ptr: new(string), out: "a\u1234"}, + + // Z has a "-" tag. + {CaseName: Name(""), in: `{"Y": 1, "Z": 2}`, ptr: new(T), out: T{Y: 1}}, + {CaseName: Name(""), in: `{"Y": 1, "Z": 2}`, ptr: new(T), out: T{Y: 1}, err: fmt.Errorf("json: unknown field \"Z\""), disallowUnknownFields: true}, + + {CaseName: Name(""), in: `{"alpha": "abc", "alphabet": "xyz"}`, ptr: new(U), out: U{Alphabet: "abc"}}, + {CaseName: Name(""), in: `{"alpha": "abc", "alphabet": "xyz"}`, ptr: new(U), out: U{Alphabet: "abc"}, err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true}, + {CaseName: Name(""), in: `{"alpha": "abc"}`, ptr: new(U), out: U{Alphabet: "abc"}}, + {CaseName: Name(""), in: `{"alphabet": "xyz"}`, ptr: new(U), out: U{}}, + {CaseName: Name(""), in: `{"alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true}, + + // syntax errors + {CaseName: Name(""), in: ``, ptr: new(any), err: &SyntaxError{"unexpected end of JSON input", 0}}, + {CaseName: Name(""), in: " \n\r\t", ptr: new(any), err: &SyntaxError{"unexpected end of JSON input", 4}}, + {CaseName: Name(""), in: `[2, 3`, ptr: new(any), err: &SyntaxError{"unexpected end of JSON input", 5}}, + {CaseName: Name(""), in: `{"X": "foo", "Y"}`, err: &SyntaxError{"invalid character '}' after object key", 17}}, + {CaseName: Name(""), in: `[1, 2, 3+]`, err: &SyntaxError{"invalid character '+' after array element", 9}}, + {CaseName: Name(""), in: `{"X":12x}`, err: &SyntaxError{"invalid character 'x' after object key:value pair", 8}, useNumber: true}, + {CaseName: Name(""), in: `{"F3": -}`, ptr: new(V), err: &SyntaxError{"invalid character '}' in numeric literal", 9}}, + + // raw value errors + {CaseName: Name(""), in: "\x01 42", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, + {CaseName: Name(""), in: " 42 \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 5}}, + {CaseName: Name(""), in: "\x01 true", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, + {CaseName: Name(""), in: " false \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 8}}, + {CaseName: Name(""), in: "\x01 1.2", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, + {CaseName: Name(""), in: " 3.4 \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 6}}, + {CaseName: Name(""), in: "\x01 \"string\"", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, + {CaseName: Name(""), in: " \"string\" \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 11}}, + + // array tests + {CaseName: Name(""), in: `[1, 2, 3]`, ptr: new([3]int), out: [3]int{1, 2, 3}}, + {CaseName: Name(""), in: `[1, 2, 3]`, ptr: new([1]int), out: [1]int{1}}, + {CaseName: Name(""), in: `[1, 2, 3]`, ptr: new([5]int), out: [5]int{1, 2, 3, 0, 0}}, + {CaseName: Name(""), in: `[1, 2, 3]`, ptr: new(MustNotUnmarshalJSON), err: errors.New("MustNotUnmarshalJSON was used")}, + + // empty array to interface test + {CaseName: Name(""), in: `[]`, ptr: new([]any), out: []any{}}, + {CaseName: Name(""), in: `null`, ptr: new([]any), out: []any(nil)}, + {CaseName: Name(""), in: `{"T":[]}`, ptr: new(map[string]any), out: map[string]any{"T": []any{}}}, + {CaseName: Name(""), in: `{"T":null}`, ptr: new(map[string]any), out: map[string]any{"T": any(nil)}}, + + // composite tests + {CaseName: Name(""), in: allValueIndent, ptr: new(All), out: allValue}, + {CaseName: Name(""), in: allValueCompact, ptr: new(All), out: allValue}, + {CaseName: Name(""), in: allValueIndent, ptr: new(*All), out: &allValue}, + {CaseName: Name(""), in: allValueCompact, ptr: new(*All), out: &allValue}, + {CaseName: Name(""), in: pallValueIndent, ptr: new(All), out: pallValue}, + {CaseName: Name(""), in: pallValueCompact, ptr: new(All), out: pallValue}, + {CaseName: Name(""), in: pallValueIndent, ptr: new(*All), out: &pallValue}, + {CaseName: Name(""), in: pallValueCompact, ptr: new(*All), out: &pallValue}, + + // unmarshal interface test + {CaseName: Name(""), in: `{"T":false}`, ptr: new(unmarshaler), out: umtrue}, // use "false" so test will fail if custom unmarshaler is not called + {CaseName: Name(""), in: `{"T":false}`, ptr: new(*unmarshaler), out: &umtrue}, + {CaseName: Name(""), in: `[{"T":false}]`, ptr: new([]unmarshaler), out: umslice}, + {CaseName: Name(""), in: `[{"T":false}]`, ptr: new(*[]unmarshaler), out: &umslice}, + {CaseName: Name(""), in: `{"M":{"T":"x:y"}}`, ptr: new(ustruct), out: umstruct}, + + // UnmarshalText interface test + {CaseName: Name(""), in: `"x:y"`, ptr: new(unmarshalerText), out: umtrueXY}, + {CaseName: Name(""), in: `"x:y"`, ptr: new(*unmarshalerText), out: &umtrueXY}, + {CaseName: Name(""), in: `["x:y"]`, ptr: new([]unmarshalerText), out: umsliceXY}, + {CaseName: Name(""), in: `["x:y"]`, ptr: new(*[]unmarshalerText), out: &umsliceXY}, + {CaseName: Name(""), in: `{"M":"x:y"}`, ptr: new(ustructText), out: umstructXY}, + + // integer-keyed map test + { + CaseName: Name(""), + in: `{"-1":"a","0":"b","1":"c"}`, + ptr: new(map[int]string), + out: map[int]string{-1: "a", 0: "b", 1: "c"}, + }, + { + CaseName: Name(""), + in: `{"0":"a","10":"c","9":"b"}`, + ptr: new(map[u8]string), + out: map[u8]string{0: "a", 9: "b", 10: "c"}, + }, + { + CaseName: Name(""), + in: `{"-9223372036854775808":"min","9223372036854775807":"max"}`, + ptr: new(map[int64]string), + out: map[int64]string{math.MinInt64: "min", math.MaxInt64: "max"}, + }, + { + CaseName: Name(""), + in: `{"18446744073709551615":"max"}`, + ptr: new(map[uint64]string), + out: map[uint64]string{math.MaxUint64: "max"}, + }, + { + CaseName: Name(""), + in: `{"0":false,"10":true}`, + ptr: new(map[uintptr]bool), + out: map[uintptr]bool{0: false, 10: true}, + }, + + // Check that MarshalText and UnmarshalText take precedence + // over default integer handling in map keys. + { + CaseName: Name(""), + in: `{"u2":4}`, + ptr: new(map[u8marshal]int), + out: map[u8marshal]int{2: 4}, + }, + { + CaseName: Name(""), + in: `{"2":4}`, + ptr: new(map[u8marshal]int), + out: map[u8marshal]int{}, + err: errMissingU8Prefix, + }, + + // integer-keyed map errors + { + CaseName: Name(""), + in: `{"abc":"abc"}`, + ptr: new(map[int]string), + out: map[int]string{}, + err: &UnmarshalTypeError{Value: "number abc", Type: reflect.TypeFor[int](), Offset: 2}, + }, + { + CaseName: Name(""), + in: `{"256":"abc"}`, + ptr: new(map[uint8]string), + out: map[uint8]string{}, + err: &UnmarshalTypeError{Value: "number 256", Type: reflect.TypeFor[uint8](), Offset: 2}, + }, + { + CaseName: Name(""), + in: `{"128":"abc"}`, + ptr: new(map[int8]string), + out: map[int8]string{}, + err: &UnmarshalTypeError{Value: "number 128", Type: reflect.TypeFor[int8](), Offset: 2}, + }, + { + CaseName: Name(""), + in: `{"-1":"abc"}`, + ptr: new(map[uint8]string), + out: map[uint8]string{}, + err: &UnmarshalTypeError{Value: "number -1", Type: reflect.TypeFor[uint8](), Offset: 2}, + }, + { + CaseName: Name(""), + in: `{"F":{"a":2,"3":4}}`, + ptr: new(map[string]map[int]int), + out: map[string]map[int]int{"F": {3: 4}}, + err: &UnmarshalTypeError{Value: "number a", Type: reflect.TypeFor[int](), Offset: 7}, + }, + { + CaseName: Name(""), + in: `{"F":{"a":2,"3":4}}`, + ptr: new(map[string]map[uint]int), + out: map[string]map[uint]int{"F": {3: 4}}, + err: &UnmarshalTypeError{Value: "number a", Type: reflect.TypeFor[uint](), Offset: 7}, + }, + + // Map keys can be encoding.TextUnmarshalers. + {CaseName: Name(""), in: `{"x:y":true}`, ptr: new(map[unmarshalerText]bool), out: ummapXY}, + // If multiple values for the same key exists, only the most recent value is used. + {CaseName: Name(""), in: `{"x:y":false,"x:y":true}`, ptr: new(map[unmarshalerText]bool), out: ummapXY}, + + { + CaseName: Name(""), + in: `{ + "Level0": 1, + "Level1b": 2, + "Level1c": 3, + "x": 4, + "Level1a": 5, + "LEVEL1B": 6, + "e": { + "Level1a": 8, + "Level1b": 9, + "Level1c": 10, + "Level1d": 11, + "x": 12 + }, + "Loop1": 13, + "Loop2": 14, + "X": 15, + "Y": 16, + "Z": 17, + "Q": 18 + }`, + ptr: new(Top), + out: Top{ + Level0: 1, + Embed0: Embed0{ + Level1b: 2, + Level1c: 3, + }, + Embed0a: &Embed0a{ + Level1a: 5, + Level1b: 6, + }, + Embed0b: &Embed0b{ + Level1a: 8, + Level1b: 9, + Level1c: 10, + Level1d: 11, + Level1e: 12, + }, + Loop: Loop{ + Loop1: 13, + Loop2: 14, + }, + Embed0p: Embed0p{ + Point: image.Point{X: 15, Y: 16}, + }, + Embed0q: Embed0q{ + Point: Point{Z: 17}, + }, + embed: embed{ + Q: 18, + }, + }, + }, + { + CaseName: Name(""), + in: `{"hello": 1}`, + ptr: new(Ambig), + out: Ambig{First: 1}, + }, + + { + CaseName: Name(""), + in: `{"X": 1,"Y":2}`, + ptr: new(S5), + out: S5{S8: S8{S9: S9{Y: 2}}}, + }, + { + CaseName: Name(""), + in: `{"X": 1,"Y":2}`, + ptr: new(S5), + out: S5{S8: S8{S9{Y: 2}}}, + err: fmt.Errorf("json: unknown field \"X\""), + disallowUnknownFields: true, + }, + { + CaseName: Name(""), + in: `{"X": 1,"Y":2}`, + ptr: new(S10), + out: S10{S13: S13{S8: S8{S9: S9{Y: 2}}}}, + }, + { + CaseName: Name(""), + in: `{"X": 1,"Y":2}`, + ptr: new(S10), + out: S10{S13: S13{S8{S9{Y: 2}}}}, + err: fmt.Errorf("json: unknown field \"X\""), + disallowUnknownFields: true, + }, + { + CaseName: Name(""), + in: `{"I": 0, "I": null, "J": null}`, + ptr: new(DoublePtr), + out: DoublePtr{I: nil, J: nil}, + }, + + // invalid UTF-8 is coerced to valid UTF-8. + { + CaseName: Name(""), + in: "\"hello\xffworld\"", + ptr: new(string), + out: "hello\ufffdworld", + }, + { + CaseName: Name(""), + in: "\"hello\xc2\xc2world\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + CaseName: Name(""), + in: "\"hello\xc2\xffworld\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + CaseName: Name(""), + in: "\"hello\\ud800world\"", + ptr: new(string), + out: "hello\ufffdworld", + }, + { + CaseName: Name(""), + in: "\"hello\\ud800\\ud800world\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + CaseName: Name(""), + in: "\"hello\\ud800\\ud800world\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + CaseName: Name(""), + in: "\"hello\xed\xa0\x80\xed\xb0\x80world\"", + ptr: new(string), + out: "hello\ufffd\ufffd\ufffd\ufffd\ufffd\ufffdworld", + }, + + // Used to be issue 8305, but time.Time implements encoding.TextUnmarshaler so this works now. + { + CaseName: Name(""), + in: `{"2009-11-10T23:00:00Z": "hello world"}`, + ptr: new(map[time.Time]string), + out: map[time.Time]string{time.Date(2009, 11, 10, 23, 0, 0, 0, time.UTC): "hello world"}, + }, + + // issue 8305 + { + CaseName: Name(""), + in: `{"2009-11-10T23:00:00Z": "hello world"}`, + ptr: new(map[Point]string), + err: &UnmarshalTypeError{Value: "object", Type: reflect.TypeFor[map[Point]string](), Offset: 1}, + }, + { + CaseName: Name(""), + in: `{"asdf": "hello world"}`, + ptr: new(map[unmarshaler]string), + err: &UnmarshalTypeError{Value: "object", Type: reflect.TypeFor[map[unmarshaler]string](), Offset: 1}, + }, + + // related to issue 13783. + // Go 1.7 changed marshaling a slice of typed byte to use the methods on the byte type, + // similar to marshaling a slice of typed int. + // These tests check that, assuming the byte type also has valid decoding methods, + // either the old base64 string encoding or the new per-element encoding can be + // successfully unmarshaled. The custom unmarshalers were accessible in earlier + // versions of Go, even though the custom marshaler was not. + { + CaseName: Name(""), + in: `"AQID"`, + ptr: new([]byteWithMarshalJSON), + out: []byteWithMarshalJSON{1, 2, 3}, + }, + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]byteWithMarshalJSON), + out: []byteWithMarshalJSON{1, 2, 3}, + golden: true, + }, + { + CaseName: Name(""), + in: `"AQID"`, + ptr: new([]byteWithMarshalText), + out: []byteWithMarshalText{1, 2, 3}, + }, + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]byteWithMarshalText), + out: []byteWithMarshalText{1, 2, 3}, + golden: true, + }, + { + CaseName: Name(""), + in: `"AQID"`, + ptr: new([]byteWithPtrMarshalJSON), + out: []byteWithPtrMarshalJSON{1, 2, 3}, + }, + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]byteWithPtrMarshalJSON), + out: []byteWithPtrMarshalJSON{1, 2, 3}, + golden: true, + }, + { + CaseName: Name(""), + in: `"AQID"`, + ptr: new([]byteWithPtrMarshalText), + out: []byteWithPtrMarshalText{1, 2, 3}, + }, + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]byteWithPtrMarshalText), + out: []byteWithPtrMarshalText{1, 2, 3}, + golden: true, + }, + + // ints work with the marshaler but not the base64 []byte case + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]intWithMarshalJSON), + out: []intWithMarshalJSON{1, 2, 3}, + golden: true, + }, + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]intWithMarshalText), + out: []intWithMarshalText{1, 2, 3}, + golden: true, + }, + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]intWithPtrMarshalJSON), + out: []intWithPtrMarshalJSON{1, 2, 3}, + golden: true, + }, + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]intWithPtrMarshalText), + out: []intWithPtrMarshalText{1, 2, 3}, + golden: true, + }, + + {CaseName: Name(""), in: `0.000001`, ptr: new(float64), out: 0.000001, golden: true}, + {CaseName: Name(""), in: `1e-7`, ptr: new(float64), out: 1e-7, golden: true}, + {CaseName: Name(""), in: `100000000000000000000`, ptr: new(float64), out: 100000000000000000000.0, golden: true}, + {CaseName: Name(""), in: `1e+21`, ptr: new(float64), out: 1e21, golden: true}, + {CaseName: Name(""), in: `-0.000001`, ptr: new(float64), out: -0.000001, golden: true}, + {CaseName: Name(""), in: `-1e-7`, ptr: new(float64), out: -1e-7, golden: true}, + {CaseName: Name(""), in: `-100000000000000000000`, ptr: new(float64), out: -100000000000000000000.0, golden: true}, + {CaseName: Name(""), in: `-1e+21`, ptr: new(float64), out: -1e21, golden: true}, + {CaseName: Name(""), in: `999999999999999900000`, ptr: new(float64), out: 999999999999999900000.0, golden: true}, + {CaseName: Name(""), in: `9007199254740992`, ptr: new(float64), out: 9007199254740992.0, golden: true}, + {CaseName: Name(""), in: `9007199254740993`, ptr: new(float64), out: 9007199254740992.0, golden: false}, + + { + CaseName: Name(""), + in: `{"V": {"F2": "hello"}}`, + ptr: new(VOuter), + err: &UnmarshalTypeError{ + Value: "string", + Struct: "V", + Field: "V.F2", + Type: reflect.TypeFor[int32](), + Offset: 20, + }, + }, + { + CaseName: Name(""), + in: `{"V": {"F4": {}, "F2": "hello"}}`, + ptr: new(VOuter), + out: VOuter{V: V{F4: &VOuter{}}}, + err: &UnmarshalTypeError{ + Value: "string", + Struct: "V", + Field: "V.F2", + Type: reflect.TypeFor[int32](), + Offset: 30, + }, + }, + + { + CaseName: Name(""), + in: `{"Level1a": "hello"}`, + ptr: new(Top), + out: Top{Embed0a: &Embed0a{}}, + err: &UnmarshalTypeError{ + Value: "string", + Struct: "Top", + Field: "Embed0a.Level1a", + Type: reflect.TypeFor[int](), + Offset: 19, + }, + }, + + // issue 15146. + // invalid inputs in wrongStringTests below. + {CaseName: Name(""), in: `{"B":"true"}`, ptr: new(B), out: B{true}, golden: true}, + {CaseName: Name(""), in: `{"B":"false"}`, ptr: new(B), out: B{false}, golden: true}, + {CaseName: Name(""), in: `{"B": "maybe"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "maybe" into bool`)}, + {CaseName: Name(""), in: `{"B": "tru"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "tru" into bool`)}, + {CaseName: Name(""), in: `{"B": "False"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "False" into bool`)}, + {CaseName: Name(""), in: `{"B": "null"}`, ptr: new(B), out: B{false}}, + {CaseName: Name(""), in: `{"B": "nul"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "nul" into bool`)}, + {CaseName: Name(""), in: `{"B": [2, 3]}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal unquoted value into bool`)}, + + // additional tests for disallowUnknownFields + { + CaseName: Name(""), + in: `{ + "Level0": 1, + "Level1b": 2, + "Level1c": 3, + "x": 4, + "Level1a": 5, + "LEVEL1B": 6, + "e": { + "Level1a": 8, + "Level1b": 9, + "Level1c": 10, + "Level1d": 11, + "x": 12 + }, + "Loop1": 13, + "Loop2": 14, + "X": 15, + "Y": 16, + "Z": 17, + "Q": 18, + "extra": true + }`, + ptr: new(Top), + out: Top{ + Level0: 1, + Embed0: Embed0{ + Level1b: 2, + Level1c: 3, + }, + Embed0a: &Embed0a{Level1a: 5, Level1b: 6}, + Embed0b: &Embed0b{Level1a: 8, Level1b: 9, Level1c: 10, Level1d: 11, Level1e: 12}, + Loop: Loop{ + Loop1: 13, + Loop2: 14, + Loop: nil, + }, + Embed0p: Embed0p{ + Point: image.Point{ + X: 15, + Y: 16, + }, + }, + Embed0q: Embed0q{Point: Point{Z: 17}}, + embed: embed{Q: 18}, + }, + err: fmt.Errorf("json: unknown field \"extra\""), + disallowUnknownFields: true, + }, + { + CaseName: Name(""), + in: `{ + "Level0": 1, + "Level1b": 2, + "Level1c": 3, + "x": 4, + "Level1a": 5, + "LEVEL1B": 6, + "e": { + "Level1a": 8, + "Level1b": 9, + "Level1c": 10, + "Level1d": 11, + "x": 12, + "extra": null + }, + "Loop1": 13, + "Loop2": 14, + "X": 15, + "Y": 16, + "Z": 17, + "Q": 18 + }`, + ptr: new(Top), + out: Top{ + Level0: 1, + Embed0: Embed0{ + Level1b: 2, + Level1c: 3, + }, + Embed0a: &Embed0a{Level1a: 5, Level1b: 6}, + Embed0b: &Embed0b{Level1a: 8, Level1b: 9, Level1c: 10, Level1d: 11, Level1e: 12}, + Loop: Loop{ + Loop1: 13, + Loop2: 14, + Loop: nil, + }, + Embed0p: Embed0p{ + Point: image.Point{ + X: 15, + Y: 16, + }, + }, + Embed0q: Embed0q{Point: Point{Z: 17}}, + embed: embed{Q: 18}, + }, + err: fmt.Errorf("json: unknown field \"extra\""), + disallowUnknownFields: true, + }, + // issue 26444 + // UnmarshalTypeError without field & struct values + { + CaseName: Name(""), + in: `{"data":{"test1": "bob", "test2": 123}}`, + ptr: new(mapStringToStringData), + out: mapStringToStringData{map[string]string{"test1": "bob", "test2": ""}}, + err: &UnmarshalTypeError{Value: "number", Type: reflect.TypeFor[string](), Offset: 37, Struct: "mapStringToStringData", Field: "data"}, + }, + { + CaseName: Name(""), + in: `{"data":{"test1": 123, "test2": "bob"}}`, + ptr: new(mapStringToStringData), + out: mapStringToStringData{Data: map[string]string{"test1": "", "test2": "bob"}}, + err: &UnmarshalTypeError{Value: "number", Type: reflect.TypeFor[string](), Offset: 21, Struct: "mapStringToStringData", Field: "data"}, + }, + + // trying to decode JSON arrays or objects via TextUnmarshaler + { + CaseName: Name(""), + in: `[1, 2, 3]`, + ptr: new(MustNotUnmarshalText), + err: &UnmarshalTypeError{Value: "array", Type: reflect.TypeFor[*MustNotUnmarshalText](), Offset: 1}, + }, + { + CaseName: Name(""), + in: `{"foo": "bar"}`, + ptr: new(MustNotUnmarshalText), + err: &UnmarshalTypeError{Value: "object", Type: reflect.TypeFor[*MustNotUnmarshalText](), Offset: 1}, + }, + // #22369 + { + CaseName: Name(""), + in: `{"PP": {"T": {"Y": "bad-type"}}}`, + ptr: new(P), + err: &UnmarshalTypeError{ + Value: "string", + Struct: "T", + Field: "PP.T.Y", + Type: reflect.TypeFor[int](), + Offset: 29, + }, + }, + { + CaseName: Name(""), + in: `{"Ts": [{"Y": 1}, {"Y": 2}, {"Y": "bad-type"}]}`, + ptr: new(PP), + out: PP{Ts: []T{{Y: 1}, {Y: 2}, {Y: 0}}}, + err: &UnmarshalTypeError{ + Value: "string", + Struct: "T", + Field: "Ts.Y", + Type: reflect.TypeFor[int](), + Offset: 44, + }, + }, + // #14702 + { + CaseName: Name(""), + in: `invalid`, + ptr: new(Number), + err: &SyntaxError{ + msg: "invalid character 'i' looking for beginning of value", + Offset: 1, + }, + }, + { + CaseName: Name(""), + in: `"invalid"`, + ptr: new(Number), + err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), + }, + { + CaseName: Name(""), + in: `{"A":"invalid"}`, + ptr: new(struct{ A Number }), + err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), + }, + { + CaseName: Name(""), + in: `{"A":"invalid"}`, + ptr: new(struct { + A Number `json:",string"` + }), + err: fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into json.Number", `invalid`), + }, + { + CaseName: Name(""), + in: `{"A":"invalid"}`, + ptr: new(map[string]Number), + out: map[string]Number{}, + err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), + }, + + { + CaseName: Name(""), + in: `5`, + ptr: new(Number), + out: Number("5"), + }, + { + CaseName: Name(""), + in: `"5"`, + ptr: new(Number), + out: Number("5"), + }, + { + CaseName: Name(""), + in: `{"N":5}`, + ptr: new(struct{ N Number }), + out: struct{ N Number }{"5"}, + }, + { + CaseName: Name(""), + in: `{"N":"5"}`, + ptr: new(struct{ N Number }), + out: struct{ N Number }{"5"}, + }, + { + CaseName: Name(""), + in: `{"N":5}`, + ptr: new(struct { + N Number `json:",string"` + }), + err: fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into json.Number"), + }, + { + CaseName: Name(""), + in: `{"N":"5"}`, + ptr: new(struct { + N Number `json:",string"` + }), + out: struct { + N Number `json:",string"` + }{"5"}, + }, + + // Verify that syntactic errors are immediately fatal, + // while semantic errors are lazily reported + // (i.e., allow processing to continue). + { + CaseName: Name(""), + in: `[1,2,true,4,5}`, + ptr: new([]int), + err: &SyntaxError{msg: "invalid character '}' after array element", Offset: 14}, + }, + { + CaseName: Name(""), + in: `[1,2,true,4,5]`, + ptr: new([]int), + out: []int{1, 2, 0, 4, 5}, + err: &UnmarshalTypeError{Value: "bool", Type: reflect.TypeFor[int](), Offset: 9}, + }, + + { + CaseName: Name("DashComma"), + in: `{"-":"hello"}`, + ptr: new(struct { + F string `json:"-,"` + }), + out: struct { + F string `json:"-,"` + }{"hello"}, + }, + { + CaseName: Name("DashCommaOmitEmpty"), + in: `{"-":"hello"}`, + ptr: new(struct { + F string `json:"-,omitempty"` + }), + out: struct { + F string `json:"-,omitempty"` + }{"hello"}, + }, +} + +func TestMarshal(t *testing.T) { + b, err := Marshal(allValue) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if string(b) != allValueCompact { + t.Errorf("Marshal:") + diff(t, b, []byte(allValueCompact)) + return + } + + b, err = Marshal(pallValue) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if string(b) != pallValueCompact { + t.Errorf("Marshal:") + diff(t, b, []byte(pallValueCompact)) + return + } +} + +func TestMarshalInvalidUTF8(t *testing.T) { + tests := []struct { + CaseName + in string + want string + }{ + {Name(""), "hello\xffworld", `"hello\ufffdworld"`}, + {Name(""), "", `""`}, + {Name(""), "\xff", `"\ufffd"`}, + {Name(""), "\xff\xff", `"\ufffd\ufffd"`}, + {Name(""), "a\xffb", `"a\ufffdb"`}, + {Name(""), "\xe6\x97\xa5\xe6\x9c\xac\xff\xaa\x9e", `"日本\ufffd\ufffd\ufffd"`}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + got, err := Marshal(tt.in) + if string(got) != tt.want || err != nil { + t.Errorf("%s: Marshal(%q):\n\tgot: (%q, %v)\n\twant: (%q, nil)", tt.Where, tt.in, got, err, tt.want) + } + }) + } +} + +func TestMarshalNumberZeroVal(t *testing.T) { + var n Number + out, err := Marshal(n) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + got := string(out) + if got != "0" { + t.Fatalf("Marshal: got %s, want 0", got) + } +} + +func TestMarshalEmbeds(t *testing.T) { + top := &Top{ + Level0: 1, + Embed0: Embed0{ + Level1b: 2, + Level1c: 3, + }, + Embed0a: &Embed0a{ + Level1a: 5, + Level1b: 6, + }, + Embed0b: &Embed0b{ + Level1a: 8, + Level1b: 9, + Level1c: 10, + Level1d: 11, + Level1e: 12, + }, + Loop: Loop{ + Loop1: 13, + Loop2: 14, + }, + Embed0p: Embed0p{ + Point: image.Point{X: 15, Y: 16}, + }, + Embed0q: Embed0q{ + Point: Point{Z: 17}, + }, + embed: embed{ + Q: 18, + }, + } + got, err := Marshal(top) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + want := "{\"Level0\":1,\"Level1b\":2,\"Level1c\":3,\"Level1a\":5,\"LEVEL1B\":6,\"e\":{\"Level1a\":8,\"Level1b\":9,\"Level1c\":10,\"Level1d\":11,\"x\":12},\"Loop1\":13,\"Loop2\":14,\"X\":15,\"Y\":16,\"Z\":17,\"Q\":18}" + if string(got) != want { + t.Errorf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +func equalError(a, b error) bool { + isJSONError := func(err error) bool { + switch err.(type) { + case + *InvalidUTF8Error, + *InvalidUnmarshalError, + *MarshalerError, + *SyntaxError, + *UnmarshalFieldError, + *UnmarshalTypeError, + *UnsupportedTypeError, + *UnsupportedValueError: + return true + } + return false + } + + if a == nil || b == nil { + return a == nil && b == nil + } + if isJSONError(a) || isJSONError(b) { + return reflect.DeepEqual(a, b) // safe for locally defined error types + } + return a.Error() == b.Error() +} + +func TestUnmarshal(t *testing.T) { + for _, tt := range unmarshalTests { + t.Run(tt.Name, func(t *testing.T) { + in := []byte(tt.in) + var scan scanner + if err := checkValid(in, &scan); err != nil { + if !equalError(err, tt.err) { + t.Fatalf("%s: checkValid error:\n\tgot %#v\n\twant %#v", tt.Where, err, tt.err) + } + } + if tt.ptr == nil { + return + } + + typ := reflect.TypeOf(tt.ptr) + if typ.Kind() != reflect.Pointer { + t.Fatalf("%s: unmarshalTest.ptr %T is not a pointer type", tt.Where, tt.ptr) + } + typ = typ.Elem() + + // v = new(right-type) + v := reflect.New(typ) + + if !reflect.DeepEqual(tt.ptr, v.Interface()) { + // There's no reason for ptr to point to non-zero data, + // as we decode into new(right-type), so the data is + // discarded. + // This can easily mean tests that silently don't test + // what they should. To test decoding into existing + // data, see TestPrefilled. + t.Fatalf("%s: unmarshalTest.ptr %#v is not a pointer to a zero value", tt.Where, tt.ptr) + } + + dec := NewDecoder(bytes.NewReader(in)) + if tt.useNumber { + dec.UseNumber() + } + if tt.disallowUnknownFields { + dec.DisallowUnknownFields() + } + if tt.err != nil && strings.Contains(tt.err.Error(), "unexpected end of JSON input") { + // In streaming mode, we expect EOF or ErrUnexpectedEOF instead. + if strings.TrimSpace(tt.in) == "" { + tt.err = io.EOF + } else { + tt.err = io.ErrUnexpectedEOF + } + } + if err := dec.Decode(v.Interface()); !equalError(err, tt.err) { + t.Fatalf("%s: Decode error:\n\tgot: %v\n\twant: %v\n\n\tgot: %#v\n\twant: %#v", tt.Where, err, tt.err, err, tt.err) + } else if err != nil && tt.out == nil { + // Initialize tt.out during an error where there are no mutations, + // so the output is just the zero value of the input type. + tt.out = reflect.Zero(v.Elem().Type()).Interface() + } + if got := v.Elem().Interface(); !reflect.DeepEqual(got, tt.out) { + gotJSON, _ := Marshal(got) + wantJSON, _ := Marshal(tt.out) + t.Fatalf("%s: Decode:\n\tgot: %#+v\n\twant: %#+v\n\n\tgotJSON: %s\n\twantJSON: %s", tt.Where, got, tt.out, gotJSON, wantJSON) + } + + // Check round trip also decodes correctly. + if tt.err == nil { + enc, err := Marshal(v.Interface()) + if err != nil { + t.Fatalf("%s: Marshal error after roundtrip: %v", tt.Where, err) + } + if tt.golden && !bytes.Equal(enc, in) { + t.Errorf("%s: Marshal:\n\tgot: %s\n\twant: %s", tt.Where, enc, in) + } + vv := reflect.New(reflect.TypeOf(tt.ptr).Elem()) + dec = NewDecoder(bytes.NewReader(enc)) + if tt.useNumber { + dec.UseNumber() + } + if err := dec.Decode(vv.Interface()); err != nil { + t.Fatalf("%s: Decode(%#q) error after roundtrip: %v", tt.Where, enc, err) + } + if !reflect.DeepEqual(v.Elem().Interface(), vv.Elem().Interface()) { + t.Fatalf("%s: Decode:\n\tgot: %#+v\n\twant: %#+v\n\n\tgotJSON: %s\n\twantJSON: %s", + tt.Where, v.Elem().Interface(), vv.Elem().Interface(), + stripWhitespace(string(enc)), stripWhitespace(string(in))) + } + } + }) + } +} + +func TestUnmarshalMarshal(t *testing.T) { + initBig() + var v any + if err := Unmarshal(jsonBig, &v); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + b, err := Marshal(v) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if !bytes.Equal(jsonBig, b) { + t.Errorf("Marshal:") + diff(t, b, jsonBig) + return + } +} + +// Independent of Decode, basic coverage of the accessors in Number +func TestNumberAccessors(t *testing.T) { + tests := []struct { + CaseName + in string + i int64 + intErr string + f float64 + floatErr string + }{ + {CaseName: Name(""), in: "-1.23e1", intErr: "strconv.ParseInt: parsing \"-1.23e1\": invalid syntax", f: -1.23e1}, + {CaseName: Name(""), in: "-12", i: -12, f: -12.0}, + {CaseName: Name(""), in: "1e1000", intErr: "strconv.ParseInt: parsing \"1e1000\": invalid syntax", floatErr: "strconv.ParseFloat: parsing \"1e1000\": value out of range"}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + n := Number(tt.in) + if got := n.String(); got != tt.in { + t.Errorf("%s: Number(%q).String() = %s, want %s", tt.Where, tt.in, got, tt.in) + } + if i, err := n.Int64(); err == nil && tt.intErr == "" && i != tt.i { + t.Errorf("%s: Number(%q).Int64() = %d, want %d", tt.Where, tt.in, i, tt.i) + } else if (err == nil && tt.intErr != "") || (err != nil && err.Error() != tt.intErr) { + t.Errorf("%s: Number(%q).Int64() error:\n\tgot: %v\n\twant: %v", tt.Where, tt.in, err, tt.intErr) + } + if f, err := n.Float64(); err == nil && tt.floatErr == "" && f != tt.f { + t.Errorf("%s: Number(%q).Float64() = %g, want %g", tt.Where, tt.in, f, tt.f) + } else if (err == nil && tt.floatErr != "") || (err != nil && err.Error() != tt.floatErr) { + t.Errorf("%s: Number(%q).Float64() error:\n\tgot %v\n\twant: %v", tt.Where, tt.in, err, tt.floatErr) + } + }) + } +} + +func TestLargeByteSlice(t *testing.T) { + s0 := make([]byte, 2000) + for i := range s0 { + s0[i] = byte(i) + } + b, err := Marshal(s0) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + var s1 []byte + if err := Unmarshal(b, &s1); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if !bytes.Equal(s0, s1) { + t.Errorf("Marshal:") + diff(t, s0, s1) + } +} + +type Xint struct { + X int +} + +func TestUnmarshalInterface(t *testing.T) { + var xint Xint + var i any = &xint + if err := Unmarshal([]byte(`{"X":1}`), &i); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if xint.X != 1 { + t.Fatalf("xint.X = %d, want 1", xint.X) + } +} + +func TestUnmarshalPtrPtr(t *testing.T) { + var xint Xint + pxint := &xint + if err := Unmarshal([]byte(`{"X":1}`), &pxint); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if xint.X != 1 { + t.Fatalf("xint.X = %d, want 1", xint.X) + } +} + +func TestEscape(t *testing.T) { + const input = `"foobar"` + " [\u2028 \u2029]" + const want = `"\"foobar\"\u003chtml\u003e [\u2028 \u2029]"` + got, err := Marshal(input) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if string(got) != want { + t.Errorf("Marshal(%#q):\n\tgot: %s\n\twant: %s", input, got, want) + } +} + +// If people misuse the ,string modifier, the error message should be +// helpful, telling the user that they're doing it wrong. +func TestErrorMessageFromMisusedString(t *testing.T) { + // WrongString is a struct that's misusing the ,string modifier. + type WrongString struct { + Message string `json:"result,string"` + } + tests := []struct { + CaseName + in, err string + }{ + {Name(""), `{"result":"x"}`, `json: invalid use of ,string struct tag, trying to unmarshal "x" into string`}, + {Name(""), `{"result":"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "foo" into string`}, + {Name(""), `{"result":"123"}`, `json: invalid use of ,string struct tag, trying to unmarshal "123" into string`}, + {Name(""), `{"result":123}`, `json: invalid use of ,string struct tag, trying to unmarshal unquoted value into string`}, + {Name(""), `{"result":"\""}`, `json: invalid use of ,string struct tag, trying to unmarshal "\"" into string`}, + {Name(""), `{"result":"\"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "\"foo" into string`}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + r := strings.NewReader(tt.in) + var s WrongString + err := NewDecoder(r).Decode(&s) + got := fmt.Sprintf("%v", err) + if got != tt.err { + t.Errorf("%s: Decode error:\n\tgot: %s\n\twant: %s", tt.Where, got, tt.err) + } + }) + } +} + +type All struct { + Bool bool + Int int + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint uint + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Uintptr uintptr + Float32 float32 + Float64 float64 + + Foo string `json:"bar"` + Foo2 string `json:"bar2,dummyopt"` + + IntStr int64 `json:",string"` + UintptrStr uintptr `json:",string"` + + PBool *bool + PInt *int + PInt8 *int8 + PInt16 *int16 + PInt32 *int32 + PInt64 *int64 + PUint *uint + PUint8 *uint8 + PUint16 *uint16 + PUint32 *uint32 + PUint64 *uint64 + PUintptr *uintptr + PFloat32 *float32 + PFloat64 *float64 + + String string + PString *string + + Map map[string]Small + MapP map[string]*Small + PMap *map[string]Small + PMapP *map[string]*Small + + EmptyMap map[string]Small + NilMap map[string]Small + + Slice []Small + SliceP []*Small + PSlice *[]Small + PSliceP *[]*Small + + EmptySlice []Small + NilSlice []Small + + StringSlice []string + ByteSlice []byte + + Small Small + PSmall *Small + PPSmall **Small + + Interface any + PInterface *any + + unexported int +} + +type Small struct { + Tag string +} + +var allValue = All{ + Bool: true, + Int: 2, + Int8: 3, + Int16: 4, + Int32: 5, + Int64: 6, + Uint: 7, + Uint8: 8, + Uint16: 9, + Uint32: 10, + Uint64: 11, + Uintptr: 12, + Float32: 14.1, + Float64: 15.1, + Foo: "foo", + Foo2: "foo2", + IntStr: 42, + UintptrStr: 44, + String: "16", + Map: map[string]Small{ + "17": {Tag: "tag17"}, + "18": {Tag: "tag18"}, + }, + MapP: map[string]*Small{ + "19": {Tag: "tag19"}, + "20": nil, + }, + EmptyMap: map[string]Small{}, + Slice: []Small{{Tag: "tag20"}, {Tag: "tag21"}}, + SliceP: []*Small{{Tag: "tag22"}, nil, {Tag: "tag23"}}, + EmptySlice: []Small{}, + StringSlice: []string{"str24", "str25", "str26"}, + ByteSlice: []byte{27, 28, 29}, + Small: Small{Tag: "tag30"}, + PSmall: &Small{Tag: "tag31"}, + Interface: 5.2, +} + +var pallValue = All{ + PBool: &allValue.Bool, + PInt: &allValue.Int, + PInt8: &allValue.Int8, + PInt16: &allValue.Int16, + PInt32: &allValue.Int32, + PInt64: &allValue.Int64, + PUint: &allValue.Uint, + PUint8: &allValue.Uint8, + PUint16: &allValue.Uint16, + PUint32: &allValue.Uint32, + PUint64: &allValue.Uint64, + PUintptr: &allValue.Uintptr, + PFloat32: &allValue.Float32, + PFloat64: &allValue.Float64, + PString: &allValue.String, + PMap: &allValue.Map, + PMapP: &allValue.MapP, + PSlice: &allValue.Slice, + PSliceP: &allValue.SliceP, + PPSmall: &allValue.PSmall, + PInterface: &allValue.Interface, +} + +var allValueIndent = `{ + "Bool": true, + "Int": 2, + "Int8": 3, + "Int16": 4, + "Int32": 5, + "Int64": 6, + "Uint": 7, + "Uint8": 8, + "Uint16": 9, + "Uint32": 10, + "Uint64": 11, + "Uintptr": 12, + "Float32": 14.1, + "Float64": 15.1, + "bar": "foo", + "bar2": "foo2", + "IntStr": "42", + "UintptrStr": "44", + "PBool": null, + "PInt": null, + "PInt8": null, + "PInt16": null, + "PInt32": null, + "PInt64": null, + "PUint": null, + "PUint8": null, + "PUint16": null, + "PUint32": null, + "PUint64": null, + "PUintptr": null, + "PFloat32": null, + "PFloat64": null, + "String": "16", + "PString": null, + "Map": { + "17": { + "Tag": "tag17" + }, + "18": { + "Tag": "tag18" + } + }, + "MapP": { + "19": { + "Tag": "tag19" + }, + "20": null + }, + "PMap": null, + "PMapP": null, + "EmptyMap": {}, + "NilMap": null, + "Slice": [ + { + "Tag": "tag20" + }, + { + "Tag": "tag21" + } + ], + "SliceP": [ + { + "Tag": "tag22" + }, + null, + { + "Tag": "tag23" + } + ], + "PSlice": null, + "PSliceP": null, + "EmptySlice": [], + "NilSlice": null, + "StringSlice": [ + "str24", + "str25", + "str26" + ], + "ByteSlice": "Gxwd", + "Small": { + "Tag": "tag30" + }, + "PSmall": { + "Tag": "tag31" + }, + "PPSmall": null, + "Interface": 5.2, + "PInterface": null +}` + +var allValueCompact = stripWhitespace(allValueIndent) + +var pallValueIndent = `{ + "Bool": false, + "Int": 0, + "Int8": 0, + "Int16": 0, + "Int32": 0, + "Int64": 0, + "Uint": 0, + "Uint8": 0, + "Uint16": 0, + "Uint32": 0, + "Uint64": 0, + "Uintptr": 0, + "Float32": 0, + "Float64": 0, + "bar": "", + "bar2": "", + "IntStr": "0", + "UintptrStr": "0", + "PBool": true, + "PInt": 2, + "PInt8": 3, + "PInt16": 4, + "PInt32": 5, + "PInt64": 6, + "PUint": 7, + "PUint8": 8, + "PUint16": 9, + "PUint32": 10, + "PUint64": 11, + "PUintptr": 12, + "PFloat32": 14.1, + "PFloat64": 15.1, + "String": "", + "PString": "16", + "Map": null, + "MapP": null, + "PMap": { + "17": { + "Tag": "tag17" + }, + "18": { + "Tag": "tag18" + } + }, + "PMapP": { + "19": { + "Tag": "tag19" + }, + "20": null + }, + "EmptyMap": null, + "NilMap": null, + "Slice": null, + "SliceP": null, + "PSlice": [ + { + "Tag": "tag20" + }, + { + "Tag": "tag21" + } + ], + "PSliceP": [ + { + "Tag": "tag22" + }, + null, + { + "Tag": "tag23" + } + ], + "EmptySlice": null, + "NilSlice": null, + "StringSlice": null, + "ByteSlice": null, + "Small": { + "Tag": "" + }, + "PSmall": null, + "PPSmall": { + "Tag": "tag31" + }, + "Interface": null, + "PInterface": 5.2 +}` + +var pallValueCompact = stripWhitespace(pallValueIndent) + +func TestRefUnmarshal(t *testing.T) { + type S struct { + // Ref is defined in encode_test.go. + R0 Ref + R1 *Ref + R2 RefText + R3 *RefText + } + want := S{ + R0: 12, + R1: new(Ref), + R2: 13, + R3: new(RefText), + } + *want.R1 = 12 + *want.R3 = 13 + + var got S + if err := Unmarshal([]byte(`{"R0":"ref","R1":"ref","R2":"ref","R3":"ref"}`), &got); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if !reflect.DeepEqual(got, want) { + t.Errorf("Unmarsha:\n\tgot: %+v\n\twant: %+v", got, want) + } +} + +// Test that the empty string doesn't panic decoding when ,string is specified +// Issue 3450 +func TestEmptyString(t *testing.T) { + type T2 struct { + Number1 int `json:",string"` + Number2 int `json:",string"` + } + data := `{"Number1":"1", "Number2":""}` + dec := NewDecoder(strings.NewReader(data)) + var got T2 + switch err := dec.Decode(&got); { + case err == nil: + t.Fatalf("Decode error: got nil, want non-nil") + case got.Number1 != 1: + t.Fatalf("Decode: got.Number1 = %d, want 1", got.Number1) + } +} + +// Test that a null for ,string is not replaced with the previous quoted string (issue 7046). +// It should also not be an error (issue 2540, issue 8587). +func TestNullString(t *testing.T) { + type T struct { + A int `json:",string"` + B int `json:",string"` + C *int `json:",string"` + } + data := []byte(`{"A": "1", "B": null, "C": null}`) + var s T + s.B = 1 + s.C = new(int) + *s.C = 2 + switch err := Unmarshal(data, &s); { + case err != nil: + t.Fatalf("Unmarshal error: %v", err) + case s.B != 1: + t.Fatalf("Unmarshal: s.B = %d, want 1", s.B) + case s.C != nil: + t.Fatalf("Unmarshal: s.C = %d, want non-nil", s.C) + } +} + +func addr[T any](v T) *T { + return &v +} + +func TestInterfaceSet(t *testing.T) { + errUnmarshal := &UnmarshalTypeError{Value: "object", Offset: 6, Type: reflect.TypeFor[int](), Field: "X"} + tests := []struct { + CaseName + pre any + json string + post any + }{ + {Name(""), "foo", `"bar"`, "bar"}, + {Name(""), "foo", `2`, 2.0}, + {Name(""), "foo", `true`, true}, + {Name(""), "foo", `null`, nil}, + {Name(""), map[string]any{}, `true`, true}, + {Name(""), []string{}, `true`, true}, + + {Name(""), any(nil), `null`, any(nil)}, + {Name(""), (*int)(nil), `null`, any(nil)}, + {Name(""), (*int)(addr(0)), `null`, any(nil)}, + {Name(""), (*int)(addr(1)), `null`, any(nil)}, + {Name(""), (**int)(nil), `null`, any(nil)}, + {Name(""), (**int)(addr[*int](nil)), `null`, (**int)(addr[*int](nil))}, + {Name(""), (**int)(addr(addr(1))), `null`, (**int)(addr[*int](nil))}, + {Name(""), (***int)(nil), `null`, any(nil)}, + {Name(""), (***int)(addr[**int](nil)), `null`, (***int)(addr[**int](nil))}, + {Name(""), (***int)(addr(addr[*int](nil))), `null`, (***int)(addr[**int](nil))}, + {Name(""), (***int)(addr(addr(addr(1)))), `null`, (***int)(addr[**int](nil))}, + + {Name(""), any(nil), `2`, float64(2)}, + {Name(""), (int)(1), `2`, float64(2)}, + {Name(""), (*int)(nil), `2`, float64(2)}, + {Name(""), (*int)(addr(0)), `2`, (*int)(addr(2))}, + {Name(""), (*int)(addr(1)), `2`, (*int)(addr(2))}, + {Name(""), (**int)(nil), `2`, float64(2)}, + {Name(""), (**int)(addr[*int](nil)), `2`, (**int)(addr(addr(2)))}, + {Name(""), (**int)(addr(addr(1))), `2`, (**int)(addr(addr(2)))}, + {Name(""), (***int)(nil), `2`, float64(2)}, + {Name(""), (***int)(addr[**int](nil)), `2`, (***int)(addr(addr(addr(2))))}, + {Name(""), (***int)(addr(addr[*int](nil))), `2`, (***int)(addr(addr(addr(2))))}, + {Name(""), (***int)(addr(addr(addr(1)))), `2`, (***int)(addr(addr(addr(2))))}, + + {Name(""), any(nil), `{}`, map[string]any{}}, + {Name(""), (int)(1), `{}`, map[string]any{}}, + {Name(""), (*int)(nil), `{}`, map[string]any{}}, + {Name(""), (*int)(addr(0)), `{}`, errUnmarshal}, + {Name(""), (*int)(addr(1)), `{}`, errUnmarshal}, + {Name(""), (**int)(nil), `{}`, map[string]any{}}, + {Name(""), (**int)(addr[*int](nil)), `{}`, errUnmarshal}, + {Name(""), (**int)(addr(addr(1))), `{}`, errUnmarshal}, + {Name(""), (***int)(nil), `{}`, map[string]any{}}, + {Name(""), (***int)(addr[**int](nil)), `{}`, errUnmarshal}, + {Name(""), (***int)(addr(addr[*int](nil))), `{}`, errUnmarshal}, + {Name(""), (***int)(addr(addr(addr(1)))), `{}`, errUnmarshal}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + b := struct{ X any }{tt.pre} + blob := `{"X":` + tt.json + `}` + if err := Unmarshal([]byte(blob), &b); err != nil { + if wantErr, _ := tt.post.(error); equalError(err, wantErr) { + return + } + t.Fatalf("%s: Unmarshal(%#q) error: %v", tt.Where, blob, err) + } + if !reflect.DeepEqual(b.X, tt.post) { + t.Errorf("%s: Unmarshal(%#q):\n\tpre.X: %#v\n\tgot.X: %#v\n\twant.X: %#v", tt.Where, blob, tt.pre, b.X, tt.post) + } + }) + } +} + +type NullTest struct { + Bool bool + Int int + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint uint + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Float32 float32 + Float64 float64 + String string + PBool *bool + Map map[string]string + Slice []string + Interface any + + PRaw *RawMessage + PTime *time.Time + PBigInt *big.Int + PText *MustNotUnmarshalText + PBuffer *bytes.Buffer // has methods, just not relevant ones + PStruct *struct{} + + Raw RawMessage + Time time.Time + BigInt big.Int + Text MustNotUnmarshalText + Buffer bytes.Buffer + Struct struct{} +} + +// JSON null values should be ignored for primitives and string values instead of resulting in an error. +// Issue 2540 +func TestUnmarshalNulls(t *testing.T) { + // Unmarshal docs: + // The JSON null value unmarshals into an interface, map, pointer, or slice + // by setting that Go value to nil. Because null is often used in JSON to mean + // ``not present,'' unmarshaling a JSON null into any other Go type has no effect + // on the value and produces no error. + + jsonData := []byte(`{ + "Bool" : null, + "Int" : null, + "Int8" : null, + "Int16" : null, + "Int32" : null, + "Int64" : null, + "Uint" : null, + "Uint8" : null, + "Uint16" : null, + "Uint32" : null, + "Uint64" : null, + "Float32" : null, + "Float64" : null, + "String" : null, + "PBool": null, + "Map": null, + "Slice": null, + "Interface": null, + "PRaw": null, + "PTime": null, + "PBigInt": null, + "PText": null, + "PBuffer": null, + "PStruct": null, + "Raw": null, + "Time": null, + "BigInt": null, + "Text": null, + "Buffer": null, + "Struct": null + }`) + nulls := NullTest{ + Bool: true, + Int: 2, + Int8: 3, + Int16: 4, + Int32: 5, + Int64: 6, + Uint: 7, + Uint8: 8, + Uint16: 9, + Uint32: 10, + Uint64: 11, + Float32: 12.1, + Float64: 13.1, + String: "14", + PBool: new(bool), + Map: map[string]string{}, + Slice: []string{}, + Interface: new(MustNotUnmarshalJSON), + PRaw: new(RawMessage), + PTime: new(time.Time), + PBigInt: new(big.Int), + PText: new(MustNotUnmarshalText), + PStruct: new(struct{}), + PBuffer: new(bytes.Buffer), + Raw: RawMessage("123"), + Time: time.Unix(123456789, 0), + BigInt: *big.NewInt(123), + } + + before := nulls.Time.String() + + err := Unmarshal(jsonData, &nulls) + if err != nil { + t.Errorf("Unmarshal of null values failed: %v", err) + } + if !nulls.Bool || nulls.Int != 2 || nulls.Int8 != 3 || nulls.Int16 != 4 || nulls.Int32 != 5 || nulls.Int64 != 6 || + nulls.Uint != 7 || nulls.Uint8 != 8 || nulls.Uint16 != 9 || nulls.Uint32 != 10 || nulls.Uint64 != 11 || + nulls.Float32 != 12.1 || nulls.Float64 != 13.1 || nulls.String != "14" { + t.Errorf("Unmarshal of null values affected primitives") + } + + if nulls.PBool != nil { + t.Errorf("Unmarshal of null did not clear nulls.PBool") + } + if nulls.Map != nil { + t.Errorf("Unmarshal of null did not clear nulls.Map") + } + if nulls.Slice != nil { + t.Errorf("Unmarshal of null did not clear nulls.Slice") + } + if nulls.Interface != nil { + t.Errorf("Unmarshal of null did not clear nulls.Interface") + } + if nulls.PRaw != nil { + t.Errorf("Unmarshal of null did not clear nulls.PRaw") + } + if nulls.PTime != nil { + t.Errorf("Unmarshal of null did not clear nulls.PTime") + } + if nulls.PBigInt != nil { + t.Errorf("Unmarshal of null did not clear nulls.PBigInt") + } + if nulls.PText != nil { + t.Errorf("Unmarshal of null did not clear nulls.PText") + } + if nulls.PBuffer != nil { + t.Errorf("Unmarshal of null did not clear nulls.PBuffer") + } + if nulls.PStruct != nil { + t.Errorf("Unmarshal of null did not clear nulls.PStruct") + } + + if string(nulls.Raw) != "null" { + t.Errorf("Unmarshal of RawMessage null did not record null: %v", string(nulls.Raw)) + } + if nulls.Time.String() != before { + t.Errorf("Unmarshal of time.Time null set time to %v", nulls.Time.String()) + } + if nulls.BigInt.String() != "123" { + t.Errorf("Unmarshal of big.Int null set int to %v", nulls.BigInt.String()) + } +} + +type MustNotUnmarshalJSON struct{} + +func (x MustNotUnmarshalJSON) UnmarshalJSON(data []byte) error { + return errors.New("MustNotUnmarshalJSON was used") +} + +type MustNotUnmarshalText struct{} + +func (x MustNotUnmarshalText) UnmarshalText(text []byte) error { + return errors.New("MustNotUnmarshalText was used") +} + +func TestStringKind(t *testing.T) { + type stringKind string + want := map[stringKind]int{"foo": 42} + data, err := Marshal(want) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + var got map[stringKind]int + err = Unmarshal(data, &got) + if err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if !maps.Equal(got, want) { + t.Fatalf("Marshal/Unmarshal mismatch:\n\tgot: %v\n\twant: %v", got, want) + } +} + +// Custom types with []byte as underlying type could not be marshaled +// and then unmarshaled. +// Issue 8962. +func TestByteKind(t *testing.T) { + type byteKind []byte + want := byteKind("hello") + data, err := Marshal(want) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + var got byteKind + err = Unmarshal(data, &got) + if err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if !slices.Equal(got, want) { + t.Fatalf("Marshal/Unmarshal mismatch:\n\tgot: %v\n\twant: %v", got, want) + } +} + +// The fix for issue 8962 introduced a regression. +// Issue 12921. +func TestSliceOfCustomByte(t *testing.T) { + type Uint8 uint8 + want := []Uint8("hello") + data, err := Marshal(want) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + var got []Uint8 + err = Unmarshal(data, &got) + if err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if !slices.Equal(got, want) { + t.Fatalf("Marshal/Unmarshal mismatch:\n\tgot: %v\n\twant: %v", got, want) + } +} + +func TestUnmarshalTypeError(t *testing.T) { + tests := []struct { + CaseName + dest any + in string + }{ + {Name(""), new(string), `{"user": "name"}`}, // issue 4628. + {Name(""), new(error), `{}`}, // issue 4222 + {Name(""), new(error), `[]`}, + {Name(""), new(error), `""`}, + {Name(""), new(error), `123`}, + {Name(""), new(error), `true`}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + err := Unmarshal([]byte(tt.in), tt.dest) + if _, ok := err.(*UnmarshalTypeError); !ok { + t.Errorf("%s: Unmarshal(%#q, %T):\n\tgot: %T\n\twant: %T", + tt.Where, tt.in, tt.dest, err, new(UnmarshalTypeError)) + } + }) + } +} + +func TestUnmarshalSyntax(t *testing.T) { + var x any + tests := []struct { + CaseName + in string + }{ + {Name(""), "tru"}, + {Name(""), "fals"}, + {Name(""), "nul"}, + {Name(""), "123e"}, + {Name(""), `"hello`}, + {Name(""), `[1,2,3`}, + {Name(""), `{"key":1`}, + {Name(""), `{"key":1,`}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + err := Unmarshal([]byte(tt.in), &x) + if _, ok := err.(*SyntaxError); !ok { + t.Errorf("%s: Unmarshal(%#q, any):\n\tgot: %T\n\twant: %T", + tt.Where, tt.in, err, new(SyntaxError)) + } + }) + } +} + +// Test handling of unexported fields that should be ignored. +// Issue 4660 +type unexportedFields struct { + Name string + m map[string]any `json:"-"` + m2 map[string]any `json:"abcd"` + + s []int `json:"-"` +} + +func TestUnmarshalUnexported(t *testing.T) { + input := `{"Name": "Bob", "m": {"x": 123}, "m2": {"y": 456}, "abcd": {"z": 789}, "s": [2, 3]}` + want := &unexportedFields{Name: "Bob"} + + out := &unexportedFields{} + err := Unmarshal([]byte(input), out) + if err != nil { + t.Errorf("Unmarshal error: %v", err) + } + if !reflect.DeepEqual(out, want) { + t.Errorf("Unmarshal:\n\tgot: %+v\n\twant: %+v", out, want) + } +} + +// Time3339 is a time.Time which encodes to and from JSON +// as an RFC 3339 time in UTC. +type Time3339 time.Time + +func (t *Time3339) UnmarshalJSON(b []byte) error { + if len(b) < 2 || b[0] != '"' || b[len(b)-1] != '"' { + return fmt.Errorf("types: failed to unmarshal non-string value %q as an RFC 3339 time", b) + } + tm, err := time.Parse(time.RFC3339, string(b[1:len(b)-1])) + if err != nil { + return err + } + *t = Time3339(tm) + return nil +} + +func TestUnmarshalJSONLiteralError(t *testing.T) { + var t3 Time3339 + switch err := Unmarshal([]byte(`"0000-00-00T00:00:00Z"`), &t3); { + case err == nil: + t.Fatalf("Unmarshal error: got nil, want non-nil") + case !strings.Contains(err.Error(), "range"): + t.Errorf("Unmarshal error:\n\tgot: %v\n\twant: out of range", err) + } +} + +// Test that extra object elements in an array do not result in a +// "data changing underfoot" error. +// Issue 3717 +func TestSkipArrayObjects(t *testing.T) { + json := `[{}]` + var dest [0]any + + err := Unmarshal([]byte(json), &dest) + if err != nil { + t.Errorf("Unmarshal error: %v", err) + } +} + +// Test semantics of pre-filled data, such as struct fields, map elements, +// slices, and arrays. +// Issues 4900 and 8837, among others. +func TestPrefilled(t *testing.T) { + // Values here change, cannot reuse table across runs. + tests := []struct { + CaseName + in string + ptr any + out any + }{{ + CaseName: Name(""), + in: `{"X": 1, "Y": 2}`, + ptr: &XYZ{X: float32(3), Y: int16(4), Z: 1.5}, + out: &XYZ{X: float64(1), Y: float64(2), Z: 1.5}, + }, { + CaseName: Name(""), + in: `{"X": 1, "Y": 2}`, + ptr: &map[string]any{"X": float32(3), "Y": int16(4), "Z": 1.5}, + out: &map[string]any{"X": float64(1), "Y": float64(2), "Z": 1.5}, + }, { + CaseName: Name(""), + in: `[2]`, + ptr: &[]int{1}, + out: &[]int{2}, + }, { + CaseName: Name(""), + in: `[2, 3]`, + ptr: &[]int{1}, + out: &[]int{2, 3}, + }, { + CaseName: Name(""), + in: `[2, 3]`, + ptr: &[...]int{1}, + out: &[...]int{2}, + }, { + CaseName: Name(""), + in: `[3]`, + ptr: &[...]int{1, 2}, + out: &[...]int{3, 0}, + }} + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + ptrstr := fmt.Sprintf("%v", tt.ptr) + err := Unmarshal([]byte(tt.in), tt.ptr) // tt.ptr edited here + if err != nil { + t.Errorf("%s: Unmarshal error: %v", tt.Where, err) + } + if !reflect.DeepEqual(tt.ptr, tt.out) { + t.Errorf("%s: Unmarshal(%#q, %T):\n\tgot: %v\n\twant: %v", tt.Where, tt.in, ptrstr, tt.ptr, tt.out) + } + }) + } +} + +func TestInvalidUnmarshal(t *testing.T) { + tests := []struct { + CaseName + in string + v any + wantErr error + }{ + {Name(""), `{"a":"1"}`, nil, &InvalidUnmarshalError{}}, + {Name(""), `{"a":"1"}`, struct{}{}, &InvalidUnmarshalError{reflect.TypeFor[struct{}]()}}, + {Name(""), `{"a":"1"}`, (*int)(nil), &InvalidUnmarshalError{reflect.TypeFor[*int]()}}, + {Name(""), `123`, nil, &InvalidUnmarshalError{}}, + {Name(""), `123`, struct{}{}, &InvalidUnmarshalError{reflect.TypeFor[struct{}]()}}, + {Name(""), `123`, (*int)(nil), &InvalidUnmarshalError{reflect.TypeFor[*int]()}}, + {Name(""), `123`, new(net.IP), &UnmarshalTypeError{Value: "number", Type: reflect.TypeFor[*net.IP](), Offset: 3}}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + switch gotErr := Unmarshal([]byte(tt.in), tt.v); { + case gotErr == nil: + t.Fatalf("%s: Unmarshal error: got nil, want non-nil", tt.Where) + case !reflect.DeepEqual(gotErr, tt.wantErr): + t.Errorf("%s: Unmarshal error:\n\tgot: %#v\n\twant: %#v", tt.Where, gotErr, tt.wantErr) + } + }) + } +} + +// Test that string option is ignored for invalid types. +// Issue 9812. +func TestInvalidStringOption(t *testing.T) { + num := 0 + item := struct { + T time.Time `json:",string"` + M map[string]string `json:",string"` + S []string `json:",string"` + A [1]string `json:",string"` + I any `json:",string"` + P *int `json:",string"` + }{M: make(map[string]string), S: make([]string, 0), I: num, P: &num} + + data, err := Marshal(item) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + + err = Unmarshal(data, &item) + if err != nil { + t.Fatalf("Unmarshal error: %v", err) + } +} + +// Test unmarshal behavior with regards to embedded unexported structs. +// +// (Issue 21357) If the embedded struct is a pointer and is unallocated, +// this returns an error because unmarshal cannot set the field. +// +// (Issue 24152) If the embedded struct is given an explicit name, +// ensure that the normal unmarshal logic does not panic in reflect. +// +// (Issue 28145) If the embedded struct is given an explicit name and has +// exported methods, don't cause a panic trying to get its value. +func TestUnmarshalEmbeddedUnexported(t *testing.T) { + type ( + embed1 struct{ Q int } + embed2 struct{ Q int } + embed3 struct { + Q int64 `json:",string"` + } + S1 struct { + *embed1 + R int + } + S2 struct { + *embed1 + Q int + } + S3 struct { + embed1 + R int + } + S4 struct { + *embed1 + embed2 + } + S5 struct { + *embed3 + R int + } + S6 struct { + embed1 `json:"embed1"` + } + S7 struct { + embed1 `json:"embed1"` + embed2 + } + S8 struct { + embed1 `json:"embed1"` + embed2 `json:"embed2"` + Q int + } + S9 struct { + unexportedWithMethods `json:"embed"` + } + ) + + tests := []struct { + CaseName + in string + ptr any + out any + err error + }{{ + // Error since we cannot set S1.embed1, but still able to set S1.R. + CaseName: Name(""), + in: `{"R":2,"Q":1}`, + ptr: new(S1), + out: &S1{R: 2}, + err: fmt.Errorf("json: cannot set embedded pointer to unexported struct: json.embed1"), + }, { + // The top level Q field takes precedence. + CaseName: Name(""), + in: `{"Q":1}`, + ptr: new(S2), + out: &S2{Q: 1}, + }, { + // No issue with non-pointer variant. + CaseName: Name(""), + in: `{"R":2,"Q":1}`, + ptr: new(S3), + out: &S3{embed1: embed1{Q: 1}, R: 2}, + }, { + // No error since both embedded structs have field R, which annihilate each other. + // Thus, no attempt is made at setting S4.embed1. + CaseName: Name(""), + in: `{"R":2}`, + ptr: new(S4), + out: new(S4), + }, { + // Error since we cannot set S5.embed1, but still able to set S5.R. + CaseName: Name(""), + in: `{"R":2,"Q":1}`, + ptr: new(S5), + out: &S5{R: 2}, + err: fmt.Errorf("json: cannot set embedded pointer to unexported struct: json.embed3"), + }, { + // Issue 24152, ensure decodeState.indirect does not panic. + CaseName: Name(""), + in: `{"embed1": {"Q": 1}}`, + ptr: new(S6), + out: &S6{embed1{1}}, + }, { + // Issue 24153, check that we can still set forwarded fields even in + // the presence of a name conflict. + // + // This relies on obscure behavior of reflect where it is possible + // to set a forwarded exported field on an unexported embedded struct + // even though there is a name conflict, even when it would have been + // impossible to do so according to Go visibility rules. + // Go forbids this because it is ambiguous whether S7.Q refers to + // S7.embed1.Q or S7.embed2.Q. Since embed1 and embed2 are unexported, + // it should be impossible for an external package to set either Q. + // + // It is probably okay for a future reflect change to break this. + CaseName: Name(""), + in: `{"embed1": {"Q": 1}, "Q": 2}`, + ptr: new(S7), + out: &S7{embed1{1}, embed2{2}}, + }, { + // Issue 24153, similar to the S7 case. + CaseName: Name(""), + in: `{"embed1": {"Q": 1}, "embed2": {"Q": 2}, "Q": 3}`, + ptr: new(S8), + out: &S8{embed1{1}, embed2{2}, 3}, + }, { + // Issue 228145, similar to the cases above. + CaseName: Name(""), + in: `{"embed": {}}`, + ptr: new(S9), + out: &S9{}, + }} + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + err := Unmarshal([]byte(tt.in), tt.ptr) + if !equalError(err, tt.err) { + t.Errorf("%s: Unmarshal error:\n\tgot: %v\n\twant: %v", tt.Where, err, tt.err) + } + if !reflect.DeepEqual(tt.ptr, tt.out) { + t.Errorf("%s: Unmarshal:\n\tgot: %#+v\n\twant: %#+v", tt.Where, tt.ptr, tt.out) + } + }) + } +} + +func TestUnmarshalErrorAfterMultipleJSON(t *testing.T) { + tests := []struct { + CaseName + in string + err error + }{{ + CaseName: Name(""), + in: `1 false null :`, + err: &SyntaxError{"invalid character ':' looking for beginning of value", 14}, + }, { + CaseName: Name(""), + in: `1 [] [,]`, + err: &SyntaxError{"invalid character ',' looking for beginning of value", 7}, + }, { + CaseName: Name(""), + in: `1 [] [true:]`, + err: &SyntaxError{"invalid character ':' after array element", 11}, + }, { + CaseName: Name(""), + in: `1 {} {"x"=}`, + err: &SyntaxError{"invalid character '=' after object key", 14}, + }, { + CaseName: Name(""), + in: `falsetruenul#`, + err: &SyntaxError{"invalid character '#' in literal null (expecting 'l')", 13}, + }} + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + dec := NewDecoder(strings.NewReader(tt.in)) + var err error + for err == nil { + var v any + err = dec.Decode(&v) + } + if !reflect.DeepEqual(err, tt.err) { + t.Errorf("%s: Decode error:\n\tgot: %v\n\twant: %v", tt.Where, err, tt.err) + } + }) + } +} + +type unmarshalPanic struct{} + +func (unmarshalPanic) UnmarshalJSON([]byte) error { panic(0xdead) } + +func TestUnmarshalPanic(t *testing.T) { + defer func() { + if got := recover(); !reflect.DeepEqual(got, 0xdead) { + t.Errorf("panic() = (%T)(%v), want 0xdead", got, got) + } + }() + Unmarshal([]byte("{}"), &unmarshalPanic{}) + t.Fatalf("Unmarshal should have panicked") +} + +// The decoder used to hang if decoding into an interface pointing to its own address. +// See golang.org/issues/31740. +func TestUnmarshalRecursivePointer(t *testing.T) { + var v any + v = &v + data := []byte(`{"a": "b"}`) + + if err := Unmarshal(data, v); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } +} + +type textUnmarshalerString string + +func (m *textUnmarshalerString) UnmarshalText(text []byte) error { + *m = textUnmarshalerString(strings.ToLower(string(text))) + return nil +} + +// Test unmarshal to a map, where the map key is a user defined type. +// See golang.org/issues/34437. +func TestUnmarshalMapWithTextUnmarshalerStringKey(t *testing.T) { + var p map[textUnmarshalerString]string + if err := Unmarshal([]byte(`{"FOO": "1"}`), &p); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + + if _, ok := p["foo"]; !ok { + t.Errorf(`key "foo" missing in map: %v`, p) + } +} + +func TestUnmarshalRescanLiteralMangledUnquote(t *testing.T) { + // See golang.org/issues/38105. + var p map[textUnmarshalerString]string + if err := Unmarshal([]byte(`{"开源":"12345开源"}`), &p); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if _, ok := p["开源"]; !ok { + t.Errorf(`key "开源" missing in map: %v`, p) + } + + // See golang.org/issues/38126. + type T struct { + F1 string `json:"F1,string"` + } + wantT := T{"aaa\tbbb"} + + b, err := Marshal(wantT) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + var gotT T + if err := Unmarshal(b, &gotT); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if gotT != wantT { + t.Errorf("Marshal/Unmarshal roundtrip:\n\tgot: %q\n\twant: %q", gotT, wantT) + } + + // See golang.org/issues/39555. + input := map[textUnmarshalerString]string{"FOO": "", `"`: ""} + + encoded, err := Marshal(input) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + var got map[textUnmarshalerString]string + if err := Unmarshal(encoded, &got); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + want := map[textUnmarshalerString]string{"foo": "", `"`: ""} + if !maps.Equal(got, want) { + t.Errorf("Marshal/Unmarshal roundtrip:\n\tgot: %q\n\twant: %q", gotT, wantT) + } +} + +func TestUnmarshalMaxDepth(t *testing.T) { + tests := []struct { + CaseName + data string + errMaxDepth bool + }{{ + CaseName: Name("ArrayUnderMaxNestingDepth"), + data: `{"a":` + strings.Repeat(`[`, 10000-1) + strings.Repeat(`]`, 10000-1) + `}`, + errMaxDepth: false, + }, { + CaseName: Name("ArrayOverMaxNestingDepth"), + data: `{"a":` + strings.Repeat(`[`, 10000) + strings.Repeat(`]`, 10000) + `}`, + errMaxDepth: true, + }, { + CaseName: Name("ArrayOverStackDepth"), + data: `{"a":` + strings.Repeat(`[`, 3000000) + strings.Repeat(`]`, 3000000) + `}`, + errMaxDepth: true, + }, { + CaseName: Name("ObjectUnderMaxNestingDepth"), + data: `{"a":` + strings.Repeat(`{"a":`, 10000-1) + `0` + strings.Repeat(`}`, 10000-1) + `}`, + errMaxDepth: false, + }, { + CaseName: Name("ObjectOverMaxNestingDepth"), + data: `{"a":` + strings.Repeat(`{"a":`, 10000) + `0` + strings.Repeat(`}`, 10000) + `}`, + errMaxDepth: true, + }, { + CaseName: Name("ObjectOverStackDepth"), + data: `{"a":` + strings.Repeat(`{"a":`, 3000000) + `0` + strings.Repeat(`}`, 3000000) + `}`, + errMaxDepth: true, + }} + + targets := []struct { + CaseName + newValue func() any + }{{ + CaseName: Name("unstructured"), + newValue: func() any { + var v any + return &v + }, + }, { + CaseName: Name("typed named field"), + newValue: func() any { + v := struct { + A any `json:"a"` + }{} + return &v + }, + }, { + CaseName: Name("typed missing field"), + newValue: func() any { + v := struct { + B any `json:"b"` + }{} + return &v + }, + }, { + CaseName: Name("custom unmarshaler"), + newValue: func() any { + v := unmarshaler{} + return &v + }, + }} + + for _, tt := range tests { + for _, target := range targets { + t.Run(target.Name+"-"+tt.Name, func(t *testing.T) { + err := Unmarshal([]byte(tt.data), target.newValue()) + if !tt.errMaxDepth { + if err != nil { + t.Errorf("%s: %s: Unmarshal error: %v", tt.Where, target.Where, err) + } + } else { + if err == nil || !strings.Contains(err.Error(), "exceeded max depth") { + t.Errorf("%s: %s: Unmarshal error:\n\tgot: %v\n\twant: exceeded max depth", tt.Where, target.Where, err) + } + } + }) + } + } +} diff --git a/pkg/encoders/json/encode.go b/pkg/encoders/json/encode.go new file mode 100644 index 0000000..951fbc2 --- /dev/null +++ b/pkg/encoders/json/encode.go @@ -0,0 +1,1418 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !goexperiment.jsonv2 + +// Package json implements encoding and decoding of JSON as defined in RFC 7159. +// The mapping between JSON and Go values is described in the documentation for +// the Marshal and Unmarshal functions. +// +// See "JSON and Go" for an introduction to this package: +// https://golang.org/doc/articles/json_and_go.html +// +// # Security Considerations +// +// The JSON standard (RFC 7159) is lax in its definition of a number of parser +// behaviors. As such, many JSON parsers behave differently in various +// scenarios. These differences in parsers mean that systems that use multiple +// independent JSON parser implementations may parse the same JSON object in +// differing ways. +// +// Systems that rely on a JSON object being parsed consistently for security +// purposes should be careful to understand the behaviors of this parser, as +// well as how these behaviors may cause interoperability issues with other +// parser implementations. +// +// Due to the Go Backwards Compatibility promise (https://go.dev/doc/go1compat) +// there are a number of behaviors this package exhibits that may cause +// interopability issues, but cannot be changed. In particular the following +// parsing behaviors may cause issues: +// +// - If a JSON object contains duplicate keys, keys are processed in the order +// they are observed, meaning later values will replace or be merged into +// prior values, depending on the field type (in particular maps and structs +// will have values merged, while other types have values replaced). +// - When parsing a JSON object into a Go struct, keys are considered in a +// case-insensitive fashion. +// - When parsing a JSON object into a Go struct, unknown keys in the JSON +// object are ignored (unless a [Decoder] is used and +// [Decoder.DisallowUnknownFields] has been called). +// - Invalid UTF-8 bytes in JSON strings are replaced by the Unicode +// replacement character. +// - Large JSON number integers will lose precision when unmarshaled into +// floating-point types. +package json + +import ( + "bytes" + "cmp" + "encoding" + "encoding/base64" + "fmt" + "math" + "reflect" + "slices" + "strconv" + "strings" + "sync" + "unicode" + "unicode/utf8" + _ "unsafe" // for linkname +) + +// Marshal returns the JSON encoding of v. +// +// Marshal traverses the value v recursively. +// If an encountered value implements [Marshaler] +// and is not a nil pointer, Marshal calls [Marshaler.MarshalJSON] +// to produce JSON. If no [Marshaler.MarshalJSON] method is present but the +// value implements [encoding.TextMarshaler] instead, Marshal calls +// [encoding.TextMarshaler.MarshalText] and encodes the result as a JSON string. +// The nil pointer exception is not strictly necessary +// but mimics a similar, necessary exception in the behavior of +// [Unmarshaler.UnmarshalJSON]. +// +// Otherwise, Marshal uses the following type-dependent default encodings: +// +// Boolean values encode as JSON booleans. +// +// Floating point, integer, and [Number] values encode as JSON numbers. +// NaN and +/-Inf values will return an [UnsupportedValueError]. +// +// String values encode as JSON strings coerced to valid UTF-8, +// replacing invalid bytes with the Unicode replacement rune. +// So that the JSON will be safe to embed inside HTML `, + } + + b, err := json.Marshal(&page, + // Escape certain runes within a JSON string so that + // JSON will be safe to directly embed inside HTML. + jsontext.EscapeForHTML(true), + jsontext.EscapeForJS(true), + jsontext.Multiline(true)) // expand for readability + if err != nil { + log.Fatal(err) + } + fmt.Println(string(b)) + + // Output: + // { + // "Title": "Example Embedded Javascript", + // "Body": "\u003cscript\u003e console.log(\"Hello, world!\"); \u003c/script\u003e" + // } +} diff --git a/pkg/encoders/json/jsontext/export.go b/pkg/encoders/json/jsontext/export.go new file mode 100644 index 0000000..0ecccad --- /dev/null +++ b/pkg/encoders/json/jsontext/export.go @@ -0,0 +1,77 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.jsonv2 + +package jsontext + +import ( + "io" + + "encoding/json/internal" +) + +// Internal is for internal use only. +// This is exempt from the Go compatibility agreement. +var Internal exporter + +type exporter struct{} + +// Export exposes internal functionality from "jsontext" to "json". +// This cannot be dynamically called by other packages since +// they cannot obtain a reference to the internal.AllowInternalUse value. +func (exporter) Export(p *internal.NotForPublicUse) export { + if p != &internal.AllowInternalUse { + panic("unauthorized call to Export") + } + return export{} +} + +// The export type exposes functionality to packages with visibility to +// the internal.AllowInternalUse variable. The "json" package uses this +// to modify low-level state in the Encoder and Decoder types. +// It mutates the state directly instead of calling ReadToken or WriteToken +// since this is more performant. The public APIs need to track state to ensure +// that users are constructing a valid JSON value, but the "json" implementation +// guarantees that it emits valid JSON by the structure of the code itself. +type export struct{} + +// Encoder returns a pointer to the underlying encoderState. +func (export) Encoder(e *Encoder) *encoderState { return &e.s } + +// Decoder returns a pointer to the underlying decoderState. +func (export) Decoder(d *Decoder) *decoderState { return &d.s } + +func (export) GetBufferedEncoder(o ...Options) *Encoder { + return getBufferedEncoder(o...) +} +func (export) PutBufferedEncoder(e *Encoder) { + putBufferedEncoder(e) +} + +func (export) GetStreamingEncoder(w io.Writer, o ...Options) *Encoder { + return getStreamingEncoder(w, o...) +} +func (export) PutStreamingEncoder(e *Encoder) { + putStreamingEncoder(e) +} + +func (export) GetBufferedDecoder(b []byte, o ...Options) *Decoder { + return getBufferedDecoder(b, o...) +} +func (export) PutBufferedDecoder(d *Decoder) { + putBufferedDecoder(d) +} + +func (export) GetStreamingDecoder(r io.Reader, o ...Options) *Decoder { + return getStreamingDecoder(r, o...) +} +func (export) PutStreamingDecoder(d *Decoder) { + putStreamingDecoder(d) +} + +func (export) IsIOError(err error) bool { + _, ok := err.(*ioError) + return ok +} diff --git a/pkg/encoders/json/jsontext/fuzz_test.go b/pkg/encoders/json/jsontext/fuzz_test.go new file mode 100644 index 0000000..60d16b9 --- /dev/null +++ b/pkg/encoders/json/jsontext/fuzz_test.go @@ -0,0 +1,236 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.jsonv2 + +package jsontext + +import ( + "bytes" + "errors" + "io" + "math/rand" + "slices" + "testing" + + "encoding/json/internal/jsontest" +) + +func FuzzCoder(f *testing.F) { + // Add a number of inputs to the corpus including valid and invalid data. + for _, td := range coderTestdata { + f.Add(int64(0), []byte(td.in)) + } + for _, td := range decoderErrorTestdata { + f.Add(int64(0), []byte(td.in)) + } + for _, td := range encoderErrorTestdata { + f.Add(int64(0), []byte(td.wantOut)) + } + for _, td := range jsontest.Data { + f.Add(int64(0), td.Data()) + } + + f.Fuzz(func(t *testing.T, seed int64, b []byte) { + var tokVals []tokOrVal + rn := rand.NewSource(seed) + + // Read a sequence of tokens or values. Skip the test for any errors + // since we expect this with randomly generated fuzz inputs. + src := bytes.NewReader(b) + dec := NewDecoder(src) + for { + if rn.Int63()%8 > 0 { + tok, err := dec.ReadToken() + if err != nil { + if err == io.EOF { + break + } + t.Skipf("Decoder.ReadToken error: %v", err) + } + tokVals = append(tokVals, tok.Clone()) + } else { + val, err := dec.ReadValue() + if err != nil { + expectError := dec.PeekKind() == '}' || dec.PeekKind() == ']' + if expectError && errors.As(err, new(*SyntacticError)) { + continue + } + if err == io.EOF { + break + } + t.Skipf("Decoder.ReadValue error: %v", err) + } + tokVals = append(tokVals, append(zeroValue, val...)) + } + } + + // Write a sequence of tokens or values. Fail the test for any errors + // since the previous stage guarantees that the input is valid. + dst := new(bytes.Buffer) + enc := NewEncoder(dst) + for _, tokVal := range tokVals { + switch tokVal := tokVal.(type) { + case Token: + if err := enc.WriteToken(tokVal); err != nil { + t.Fatalf("Encoder.WriteToken error: %v", err) + } + case Value: + if err := enc.WriteValue(tokVal); err != nil { + t.Fatalf("Encoder.WriteValue error: %v", err) + } + } + } + + // Encoded output and original input must decode to the same thing. + var got, want []Token + for dec := NewDecoder(bytes.NewReader(b)); dec.PeekKind() > 0; { + tok, err := dec.ReadToken() + if err != nil { + t.Fatalf("Decoder.ReadToken error: %v", err) + } + got = append(got, tok.Clone()) + } + for dec := NewDecoder(dst); dec.PeekKind() > 0; { + tok, err := dec.ReadToken() + if err != nil { + t.Fatalf("Decoder.ReadToken error: %v", err) + } + want = append(want, tok.Clone()) + } + if !equalTokens(got, want) { + t.Fatalf("mismatching output:\ngot %v\nwant %v", got, want) + } + }) +} + +func FuzzResumableDecoder(f *testing.F) { + for _, td := range resumableDecoderTestdata { + f.Add(int64(0), []byte(td)) + } + + f.Fuzz(func(t *testing.T, seed int64, b []byte) { + rn := rand.NewSource(seed) + + // Regardless of how many bytes the underlying io.Reader produces, + // the provided tokens, values, and errors should always be identical. + t.Run("ReadToken", func(t *testing.T) { + decGot := NewDecoder(&FaultyBuffer{B: b, MaxBytes: 8, Rand: rn}) + decWant := NewDecoder(bytes.NewReader(b)) + gotTok, gotErr := decGot.ReadToken() + wantTok, wantErr := decWant.ReadToken() + if gotTok.String() != wantTok.String() || !equalError(gotErr, wantErr) { + t.Errorf("Decoder.ReadToken = (%v, %v), want (%v, %v)", gotTok, gotErr, wantTok, wantErr) + } + }) + t.Run("ReadValue", func(t *testing.T) { + decGot := NewDecoder(&FaultyBuffer{B: b, MaxBytes: 8, Rand: rn}) + decWant := NewDecoder(bytes.NewReader(b)) + gotVal, gotErr := decGot.ReadValue() + wantVal, wantErr := decWant.ReadValue() + if !slices.Equal(gotVal, wantVal) || !equalError(gotErr, wantErr) { + t.Errorf("Decoder.ReadValue = (%s, %v), want (%s, %v)", gotVal, gotErr, wantVal, wantErr) + } + }) + }) +} + +func FuzzValueFormat(f *testing.F) { + for _, td := range valueTestdata { + f.Add(int64(0), []byte(td.in)) + } + + // isValid reports whether b is valid according to the specified options. + isValid := func(b []byte, opts ...Options) bool { + d := NewDecoder(bytes.NewReader(b), opts...) + _, errVal := d.ReadValue() + _, errEOF := d.ReadToken() + return errVal == nil && errEOF == io.EOF + } + + // stripWhitespace removes all JSON whitespace characters from the input. + stripWhitespace := func(in []byte) (out []byte) { + out = make([]byte, 0, len(in)) + for _, c := range in { + switch c { + case ' ', '\n', '\r', '\t': + default: + out = append(out, c) + } + } + return out + } + + allOptions := []Options{ + AllowDuplicateNames(true), + AllowInvalidUTF8(true), + EscapeForHTML(true), + EscapeForJS(true), + PreserveRawStrings(true), + CanonicalizeRawInts(true), + CanonicalizeRawFloats(true), + ReorderRawObjects(true), + SpaceAfterColon(true), + SpaceAfterComma(true), + Multiline(true), + WithIndent("\t"), + WithIndentPrefix(" "), + } + + f.Fuzz(func(t *testing.T, seed int64, b []byte) { + validRFC7159 := isValid(b, AllowInvalidUTF8(true), AllowDuplicateNames(true)) + validRFC8259 := isValid(b, AllowInvalidUTF8(false), AllowDuplicateNames(true)) + validRFC7493 := isValid(b, AllowInvalidUTF8(false), AllowDuplicateNames(false)) + switch { + case !validRFC7159 && validRFC8259: + t.Errorf("invalid input per RFC 7159 implies invalid per RFC 8259") + case !validRFC8259 && validRFC7493: + t.Errorf("invalid input per RFC 8259 implies invalid per RFC 7493") + } + + gotValid := Value(b).IsValid() + wantValid := validRFC7493 + if gotValid != wantValid { + t.Errorf("Value.IsValid = %v, want %v", gotValid, wantValid) + } + + gotCompacted := Value(string(b)) + gotCompactOk := gotCompacted.Compact() == nil + wantCompactOk := validRFC7159 + if !bytes.Equal(stripWhitespace(gotCompacted), stripWhitespace(b)) { + t.Errorf("stripWhitespace(Value.Compact) = %s, want %s", stripWhitespace(gotCompacted), stripWhitespace(b)) + } + if gotCompactOk != wantCompactOk { + t.Errorf("Value.Compact success mismatch: got %v, want %v", gotCompactOk, wantCompactOk) + } + + gotIndented := Value(string(b)) + gotIndentOk := gotIndented.Indent() == nil + wantIndentOk := validRFC7159 + if !bytes.Equal(stripWhitespace(gotIndented), stripWhitespace(b)) { + t.Errorf("stripWhitespace(Value.Indent) = %s, want %s", stripWhitespace(gotIndented), stripWhitespace(b)) + } + if gotIndentOk != wantIndentOk { + t.Errorf("Value.Indent success mismatch: got %v, want %v", gotIndentOk, wantIndentOk) + } + + gotCanonicalized := Value(string(b)) + gotCanonicalizeOk := gotCanonicalized.Canonicalize() == nil + wantCanonicalizeOk := validRFC7493 + if gotCanonicalizeOk != wantCanonicalizeOk { + t.Errorf("Value.Canonicalize success mismatch: got %v, want %v", gotCanonicalizeOk, wantCanonicalizeOk) + } + + // Random options should not result in a panic. + var opts []Options + rn := rand.New(rand.NewSource(seed)) + for _, opt := range allOptions { + if rn.Intn(len(allOptions)/4) == 0 { + opts = append(opts, opt) + } + } + v := Value(b) + v.Format(opts...) // should not panic + }) +} diff --git a/pkg/encoders/json/jsontext/options.go b/pkg/encoders/json/jsontext/options.go new file mode 100644 index 0000000..7eb4f9b --- /dev/null +++ b/pkg/encoders/json/jsontext/options.go @@ -0,0 +1,304 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.jsonv2 + +package jsontext + +import ( + "strings" + + "encoding/json/internal/jsonflags" + "encoding/json/internal/jsonopts" + "encoding/json/internal/jsonwire" +) + +// Options configures [NewEncoder], [Encoder.Reset], [NewDecoder], +// and [Decoder.Reset] with specific features. +// Each function takes in a variadic list of options, where properties +// set in latter options override the value of previously set properties. +// +// There is a single Options type, which is used with both encoding and decoding. +// Some options affect both operations, while others only affect one operation: +// +// - [AllowDuplicateNames] affects encoding and decoding +// - [AllowInvalidUTF8] affects encoding and decoding +// - [EscapeForHTML] affects encoding only +// - [EscapeForJS] affects encoding only +// - [PreserveRawStrings] affects encoding only +// - [CanonicalizeRawInts] affects encoding only +// - [CanonicalizeRawFloats] affects encoding only +// - [ReorderRawObjects] affects encoding only +// - [SpaceAfterColon] affects encoding only +// - [SpaceAfterComma] affects encoding only +// - [Multiline] affects encoding only +// - [WithIndent] affects encoding only +// - [WithIndentPrefix] affects encoding only +// +// Options that do not affect a particular operation are ignored. +// +// The Options type is identical to [encoding/json.Options] and +// [encoding/json/v2.Options]. Options from the other packages may +// be passed to functionality in this package, but are ignored. +// Options from this package may be used with the other packages. +type Options = jsonopts.Options + +// AllowDuplicateNames specifies that JSON objects may contain +// duplicate member names. Disabling the duplicate name check may provide +// performance benefits, but breaks compliance with RFC 7493, section 2.3. +// The input or output will still be compliant with RFC 8259, +// which leaves the handling of duplicate names as unspecified behavior. +// +// This affects either encoding or decoding. +func AllowDuplicateNames(v bool) Options { + if v { + return jsonflags.AllowDuplicateNames | 1 + } else { + return jsonflags.AllowDuplicateNames | 0 + } +} + +// AllowInvalidUTF8 specifies that JSON strings may contain invalid UTF-8, +// which will be mangled as the Unicode replacement character, U+FFFD. +// This causes the encoder or decoder to break compliance with +// RFC 7493, section 2.1, and RFC 8259, section 8.1. +// +// This affects either encoding or decoding. +func AllowInvalidUTF8(v bool) Options { + if v { + return jsonflags.AllowInvalidUTF8 | 1 + } else { + return jsonflags.AllowInvalidUTF8 | 0 + } +} + +// EscapeForHTML specifies that '<', '>', and '&' characters within JSON strings +// should be escaped as a hexadecimal Unicode codepoint (e.g., \u003c) so that +// the output is safe to embed within HTML. +// +// This only affects encoding and is ignored when decoding. +func EscapeForHTML(v bool) Options { + if v { + return jsonflags.EscapeForHTML | 1 + } else { + return jsonflags.EscapeForHTML | 0 + } +} + +// EscapeForJS specifies that U+2028 and U+2029 characters within JSON strings +// should be escaped as a hexadecimal Unicode codepoint (e.g., \u2028) so that +// the output is valid to embed within JavaScript. See RFC 8259, section 12. +// +// This only affects encoding and is ignored when decoding. +func EscapeForJS(v bool) Options { + if v { + return jsonflags.EscapeForJS | 1 + } else { + return jsonflags.EscapeForJS | 0 + } +} + +// PreserveRawStrings specifies that when encoding a raw JSON string in a +// [Token] or [Value], pre-escaped sequences +// in a JSON string are preserved to the output. +// However, raw strings still respect [EscapeForHTML] and [EscapeForJS] +// such that the relevant characters are escaped. +// If [AllowInvalidUTF8] is enabled, bytes of invalid UTF-8 +// are preserved to the output. +// +// This only affects encoding and is ignored when decoding. +func PreserveRawStrings(v bool) Options { + if v { + return jsonflags.PreserveRawStrings | 1 + } else { + return jsonflags.PreserveRawStrings | 0 + } +} + +// CanonicalizeRawInts specifies that when encoding a raw JSON +// integer number (i.e., a number without a fraction and exponent) in a +// [Token] or [Value], the number is canonicalized +// according to RFC 8785, section 3.2.2.3. As a special case, +// the number -0 is canonicalized as 0. +// +// JSON numbers are treated as IEEE 754 double precision numbers. +// Any numbers with precision beyond what is representable by that form +// will lose their precision when canonicalized. For example, +// integer values beyond ±2⁵³ will lose their precision. +// For example, 1234567890123456789 is formatted as 1234567890123456800. +// +// This only affects encoding and is ignored when decoding. +func CanonicalizeRawInts(v bool) Options { + if v { + return jsonflags.CanonicalizeRawInts | 1 + } else { + return jsonflags.CanonicalizeRawInts | 0 + } +} + +// CanonicalizeRawFloats specifies that when encoding a raw JSON +// floating-point number (i.e., a number with a fraction or exponent) in a +// [Token] or [Value], the number is canonicalized +// according to RFC 8785, section 3.2.2.3. As a special case, +// the number -0 is canonicalized as 0. +// +// JSON numbers are treated as IEEE 754 double precision numbers. +// It is safe to canonicalize a serialized single precision number and +// parse it back as a single precision number and expect the same value. +// If a number exceeds ±1.7976931348623157e+308, which is the maximum +// finite number, then it saturated at that value and formatted as such. +// +// This only affects encoding and is ignored when decoding. +func CanonicalizeRawFloats(v bool) Options { + if v { + return jsonflags.CanonicalizeRawFloats | 1 + } else { + return jsonflags.CanonicalizeRawFloats | 0 + } +} + +// ReorderRawObjects specifies that when encoding a raw JSON object in a +// [Value], the object members are reordered according to +// RFC 8785, section 3.2.3. +// +// This only affects encoding and is ignored when decoding. +func ReorderRawObjects(v bool) Options { + if v { + return jsonflags.ReorderRawObjects | 1 + } else { + return jsonflags.ReorderRawObjects | 0 + } +} + +// SpaceAfterColon specifies that the JSON output should emit a space character +// after each colon separator following a JSON object name. +// If false, then no space character appears after the colon separator. +// +// This only affects encoding and is ignored when decoding. +func SpaceAfterColon(v bool) Options { + if v { + return jsonflags.SpaceAfterColon | 1 + } else { + return jsonflags.SpaceAfterColon | 0 + } +} + +// SpaceAfterComma specifies that the JSON output should emit a space character +// after each comma separator following a JSON object value or array element. +// If false, then no space character appears after the comma separator. +// +// This only affects encoding and is ignored when decoding. +func SpaceAfterComma(v bool) Options { + if v { + return jsonflags.SpaceAfterComma | 1 + } else { + return jsonflags.SpaceAfterComma | 0 + } +} + +// Multiline specifies that the JSON output should expand to multiple lines, +// where every JSON object member or JSON array element appears on +// a new, indented line according to the nesting depth. +// +// If [SpaceAfterColon] is not specified, then the default is true. +// If [SpaceAfterComma] is not specified, then the default is false. +// If [WithIndent] is not specified, then the default is "\t". +// +// If set to false, then the output is a single-line, +// where the only whitespace emitted is determined by the current +// values of [SpaceAfterColon] and [SpaceAfterComma]. +// +// This only affects encoding and is ignored when decoding. +func Multiline(v bool) Options { + if v { + return jsonflags.Multiline | 1 + } else { + return jsonflags.Multiline | 0 + } +} + +// WithIndent specifies that the encoder should emit multiline output +// where each element in a JSON object or array begins on a new, indented line +// beginning with the indent prefix (see [WithIndentPrefix]) +// followed by one or more copies of indent according to the nesting depth. +// The indent must only be composed of space or tab characters. +// +// If the intent to emit indented output without a preference for +// the particular indent string, then use [Multiline] instead. +// +// This only affects encoding and is ignored when decoding. +// Use of this option implies [Multiline] being set to true. +func WithIndent(indent string) Options { + // Fast-path: Return a constant for common indents, which avoids allocating. + // These are derived from analyzing the Go module proxy on 2023-07-01. + switch indent { + case "\t": + return jsonopts.Indent("\t") // ~14k usages + case " ": + return jsonopts.Indent(" ") // ~18k usages + case " ": + return jsonopts.Indent(" ") // ~1.7k usages + case " ": + return jsonopts.Indent(" ") // ~52k usages + case " ": + return jsonopts.Indent(" ") // ~12k usages + case "": + return jsonopts.Indent("") // ~1.5k usages + } + + // Otherwise, allocate for this unique value. + if s := strings.Trim(indent, " \t"); len(s) > 0 { + panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent") + } + return jsonopts.Indent(indent) +} + +// WithIndentPrefix specifies that the encoder should emit multiline output +// where each element in a JSON object or array begins on a new, indented line +// beginning with the indent prefix followed by one or more copies of indent +// (see [WithIndent]) according to the nesting depth. +// The prefix must only be composed of space or tab characters. +// +// This only affects encoding and is ignored when decoding. +// Use of this option implies [Multiline] being set to true. +func WithIndentPrefix(prefix string) Options { + if s := strings.Trim(prefix, " \t"); len(s) > 0 { + panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent prefix") + } + return jsonopts.IndentPrefix(prefix) +} + +/* +// TODO(https://go.dev/issue/56733): Implement WithByteLimit and WithDepthLimit. +// Remember to also update the "Security Considerations" section. + +// WithByteLimit sets a limit on the number of bytes of input or output bytes +// that may be consumed or produced for each top-level JSON value. +// If a [Decoder] or [Encoder] method call would need to consume/produce +// more than a total of n bytes to make progress on the top-level JSON value, +// then the call will report an error. +// Whitespace before and within the top-level value are counted against the limit. +// Whitespace after a top-level value are counted against the limit +// for the next top-level value. +// +// A non-positive limit is equivalent to no limit at all. +// If unspecified, the default limit is no limit at all. +// This affects either encoding or decoding. +func WithByteLimit(n int64) Options { + return jsonopts.ByteLimit(max(n, 0)) +} + +// WithDepthLimit sets a limit on the maximum depth of JSON nesting +// that may be consumed or produced for each top-level JSON value. +// If a [Decoder] or [Encoder] method call would need to consume or produce +// a depth greater than n to make progress on the top-level JSON value, +// then the call will report an error. +// +// A non-positive limit is equivalent to no limit at all. +// If unspecified, the default limit is 10000. +// This affects either encoding or decoding. +func WithDepthLimit(n int) Options { + return jsonopts.DepthLimit(max(n, 0)) +} +*/ diff --git a/pkg/encoders/json/jsontext/pools.go b/pkg/encoders/json/jsontext/pools.go new file mode 100644 index 0000000..4f9e0ea --- /dev/null +++ b/pkg/encoders/json/jsontext/pools.go @@ -0,0 +1,152 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.jsonv2 + +package jsontext + +import ( + "bytes" + "io" + "math/bits" + "sync" +) + +// TODO(https://go.dev/issue/47657): Use sync.PoolOf. + +var ( + // This owns the internal buffer since there is no io.Writer to output to. + // Since the buffer can get arbitrarily large in normal usage, + // there is statistical tracking logic to determine whether to recycle + // the internal buffer or not based on a history of utilization. + bufferedEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }} + + // This owns the internal buffer, but it is only used to temporarily store + // buffered JSON before flushing it to the underlying io.Writer. + // In a sufficiently efficient streaming mode, we do not expect the buffer + // to grow arbitrarily large. Thus, we avoid recycling large buffers. + streamingEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }} + + // This does not own the internal buffer since + // it is taken directly from the provided bytes.Buffer. + bytesBufferEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }} +) + +// bufferStatistics is statistics to track buffer utilization. +// It is used to determine whether to recycle a buffer or not +// to avoid https://go.dev/issue/23199. +type bufferStatistics struct { + strikes int // number of times the buffer was under-utilized + prevLen int // length of previous buffer +} + +func getBufferedEncoder(opts ...Options) *Encoder { + e := bufferedEncoderPool.Get().(*Encoder) + if e.s.Buf == nil { + // Round up to nearest 2ⁿ to make best use of malloc size classes. + // See runtime/sizeclasses.go on Go1.15. + // Logical OR with 63 to ensure 64 as the minimum buffer size. + n := 1 << bits.Len(uint(e.s.bufStats.prevLen|63)) + e.s.Buf = make([]byte, 0, n) + } + e.s.reset(e.s.Buf[:0], nil, opts...) + return e +} +func putBufferedEncoder(e *Encoder) { + // Recycle large buffers only if sufficiently utilized. + // If a buffer is under-utilized enough times sequentially, + // then it is discarded, ensuring that a single large buffer + // won't be kept alive by a continuous stream of small usages. + // + // The worst case utilization is computed as: + // MIN_UTILIZATION_THRESHOLD / (1 + MAX_NUM_STRIKES) + // + // For the constants chosen below, this is (25%)/(1+4) ⇒ 5%. + // This may seem low, but it ensures a lower bound on + // the absolute worst-case utilization. Without this check, + // this would be theoretically 0%, which is infinitely worse. + // + // See https://go.dev/issue/27735. + switch { + case cap(e.s.Buf) <= 4<<10: // always recycle buffers smaller than 4KiB + e.s.bufStats.strikes = 0 + case cap(e.s.Buf)/4 <= len(e.s.Buf): // at least 25% utilization + e.s.bufStats.strikes = 0 + case e.s.bufStats.strikes < 4: // at most 4 strikes + e.s.bufStats.strikes++ + default: // discard the buffer; too large and too often under-utilized + e.s.bufStats.strikes = 0 + e.s.bufStats.prevLen = len(e.s.Buf) // heuristic for size to allocate next time + e.s.Buf = nil + } + bufferedEncoderPool.Put(e) +} + +func getStreamingEncoder(w io.Writer, opts ...Options) *Encoder { + if _, ok := w.(*bytes.Buffer); ok { + e := bytesBufferEncoderPool.Get().(*Encoder) + e.s.reset(nil, w, opts...) // buffer taken from bytes.Buffer + return e + } else { + e := streamingEncoderPool.Get().(*Encoder) + e.s.reset(e.s.Buf[:0], w, opts...) // preserve existing buffer + return e + } +} +func putStreamingEncoder(e *Encoder) { + if _, ok := e.s.wr.(*bytes.Buffer); ok { + bytesBufferEncoderPool.Put(e) + } else { + if cap(e.s.Buf) > 64<<10 { + e.s.Buf = nil // avoid pinning arbitrarily large amounts of memory + } + streamingEncoderPool.Put(e) + } +} + +var ( + // This does not own the internal buffer since it is externally provided. + bufferedDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }} + + // This owns the internal buffer, but it is only used to temporarily store + // buffered JSON fetched from the underlying io.Reader. + // In a sufficiently efficient streaming mode, we do not expect the buffer + // to grow arbitrarily large. Thus, we avoid recycling large buffers. + streamingDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }} + + // This does not own the internal buffer since + // it is taken directly from the provided bytes.Buffer. + bytesBufferDecoderPool = bufferedDecoderPool +) + +func getBufferedDecoder(b []byte, opts ...Options) *Decoder { + d := bufferedDecoderPool.Get().(*Decoder) + d.s.reset(b, nil, opts...) + return d +} +func putBufferedDecoder(d *Decoder) { + bufferedDecoderPool.Put(d) +} + +func getStreamingDecoder(r io.Reader, opts ...Options) *Decoder { + if _, ok := r.(*bytes.Buffer); ok { + d := bytesBufferDecoderPool.Get().(*Decoder) + d.s.reset(nil, r, opts...) // buffer taken from bytes.Buffer + return d + } else { + d := streamingDecoderPool.Get().(*Decoder) + d.s.reset(d.s.buf[:0], r, opts...) // preserve existing buffer + return d + } +} +func putStreamingDecoder(d *Decoder) { + if _, ok := d.s.rd.(*bytes.Buffer); ok { + bytesBufferDecoderPool.Put(d) + } else { + if cap(d.s.buf) > 64<<10 { + d.s.buf = nil // avoid pinning arbitrarily large amounts of memory + } + streamingDecoderPool.Put(d) + } +} diff --git a/pkg/encoders/json/jsontext/quote.go b/pkg/encoders/json/jsontext/quote.go new file mode 100644 index 0000000..5ecfdbc --- /dev/null +++ b/pkg/encoders/json/jsontext/quote.go @@ -0,0 +1,41 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.jsonv2 + +package jsontext + +import ( + "encoding/json/internal/jsonflags" + "encoding/json/internal/jsonwire" +) + +// AppendQuote appends a double-quoted JSON string literal representing src +// to dst and returns the extended buffer. +// It uses the minimal string representation per RFC 8785, section 3.2.2.2. +// Invalid UTF-8 bytes are replaced with the Unicode replacement character +// and an error is returned at the end indicating the presence of invalid UTF-8. +// The dst must not overlap with the src. +func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) { + dst, err := jsonwire.AppendQuote(dst, src, &jsonflags.Flags{}) + if err != nil { + err = &SyntacticError{Err: err} + } + return dst, err +} + +// AppendUnquote appends the decoded interpretation of src as a +// double-quoted JSON string literal to dst and returns the extended buffer. +// The input src must be a JSON string without any surrounding whitespace. +// Invalid UTF-8 bytes are replaced with the Unicode replacement character +// and an error is returned at the end indicating the presence of invalid UTF-8. +// Any trailing bytes after the JSON string literal results in an error. +// The dst must not overlap with the src. +func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) { + dst, err := jsonwire.AppendUnquote(dst, src) + if err != nil { + err = &SyntacticError{Err: err} + } + return dst, err +} diff --git a/pkg/encoders/json/jsontext/state.go b/pkg/encoders/json/jsontext/state.go new file mode 100644 index 0000000..d214fd5 --- /dev/null +++ b/pkg/encoders/json/jsontext/state.go @@ -0,0 +1,828 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.jsonv2 + +package jsontext + +import ( + "errors" + "iter" + "math" + "strconv" + "strings" + "unicode/utf8" + + "encoding/json/internal/jsonwire" +) + +// ErrDuplicateName indicates that a JSON token could not be +// encoded or decoded because it results in a duplicate JSON object name. +// This error is directly wrapped within a [SyntacticError] when produced. +// +// The name of a duplicate JSON object member can be extracted as: +// +// err := ... +// var serr jsontext.SyntacticError +// if errors.As(err, &serr) && serr.Err == jsontext.ErrDuplicateName { +// ptr := serr.JSONPointer // JSON pointer to duplicate name +// name := ptr.LastToken() // duplicate name itself +// ... +// } +// +// This error is only returned if [AllowDuplicateNames] is false. +var ErrDuplicateName = errors.New("duplicate object member name") + +// ErrNonStringName indicates that a JSON token could not be +// encoded or decoded because it is not a string, +// as required for JSON object names according to RFC 8259, section 4. +// This error is directly wrapped within a [SyntacticError] when produced. +var ErrNonStringName = errors.New("object member name must be a string") + +var ( + errMissingValue = errors.New("missing value after object name") + errMismatchDelim = errors.New("mismatching structural token for object or array") + errMaxDepth = errors.New("exceeded max depth") + + errInvalidNamespace = errors.New("object namespace is in an invalid state") +) + +// Per RFC 8259, section 9, implementations may enforce a maximum depth. +// Such a limit is necessary to prevent stack overflows. +const maxNestingDepth = 10000 + +type state struct { + // Tokens validates whether the next token kind is valid. + Tokens stateMachine + + // Names is a stack of object names. + Names objectNameStack + + // Namespaces is a stack of object namespaces. + // For performance reasons, Encoder or Decoder may not update this + // if Marshal or Unmarshal is able to track names in a more efficient way. + // See makeMapArshaler and makeStructArshaler. + // Not used if AllowDuplicateNames is true. + Namespaces objectNamespaceStack +} + +// needObjectValue reports whether the next token should be an object value. +// This method is used by [wrapSyntacticError]. +func (s *state) needObjectValue() bool { + return s.Tokens.Last.needObjectValue() +} + +func (s *state) reset() { + s.Tokens.reset() + s.Names.reset() + s.Namespaces.reset() +} + +// Pointer is a JSON Pointer (RFC 6901) that references a particular JSON value +// relative to the root of the top-level JSON value. +// +// A Pointer is a slash-separated list of tokens, where each token is +// either a JSON object name or an index to a JSON array element +// encoded as a base-10 integer value. +// It is impossible to distinguish between an array index and an object name +// (that happens to be an base-10 encoded integer) without also knowing +// the structure of the top-level JSON value that the pointer refers to. +// +// There is exactly one representation of a pointer to a particular value, +// so comparability of Pointer values is equivalent to checking whether +// they both point to the exact same value. +type Pointer string + +// IsValid reports whether p is a valid JSON Pointer according to RFC 6901. +// Note that the concatenation of two valid pointers produces a valid pointer. +func (p Pointer) IsValid() bool { + for i, r := range p { + switch { + case r == '~' && (i+1 == len(p) || (p[i+1] != '0' && p[i+1] != '1')): + return false // invalid escape + case r == '\ufffd' && !strings.HasPrefix(string(p[i:]), "\ufffd"): + return false // invalid UTF-8 + } + } + return len(p) == 0 || p[0] == '/' +} + +// Contains reports whether the JSON value that p points to +// is equal to or contains the JSON value that pc points to. +func (p Pointer) Contains(pc Pointer) bool { + // Invariant: len(p) <= len(pc) if p.Contains(pc) + suffix, ok := strings.CutPrefix(string(pc), string(p)) + return ok && (suffix == "" || suffix[0] == '/') +} + +// Parent strips off the last token and returns the remaining pointer. +// The parent of an empty p is an empty string. +func (p Pointer) Parent() Pointer { + return p[:max(strings.LastIndexByte(string(p), '/'), 0)] +} + +// LastToken returns the last token in the pointer. +// The last token of an empty p is an empty string. +func (p Pointer) LastToken() string { + last := p[max(strings.LastIndexByte(string(p), '/'), 0):] + return unescapePointerToken(strings.TrimPrefix(string(last), "/")) +} + +// AppendToken appends a token to the end of p and returns the full pointer. +func (p Pointer) AppendToken(tok string) Pointer { + return Pointer(appendEscapePointerName([]byte(p+"/"), tok)) +} + +// TODO: Add Pointer.AppendTokens, +// but should this take in a ...string or an iter.Seq[string]? + +// Tokens returns an iterator over the reference tokens in the JSON pointer, +// starting from the first token until the last token (unless stopped early). +func (p Pointer) Tokens() iter.Seq[string] { + return func(yield func(string) bool) { + for len(p) > 0 { + p = Pointer(strings.TrimPrefix(string(p), "/")) + i := min(uint(strings.IndexByte(string(p), '/')), uint(len(p))) + if !yield(unescapePointerToken(string(p)[:i])) { + return + } + p = p[i:] + } + } +} + +func unescapePointerToken(token string) string { + if strings.Contains(token, "~") { + // Per RFC 6901, section 3, unescape '~' and '/' characters. + token = strings.ReplaceAll(token, "~1", "/") + token = strings.ReplaceAll(token, "~0", "~") + } + return token +} + +// appendStackPointer appends a JSON Pointer (RFC 6901) to the current value. +// +// - If where is -1, then it points to the previously processed token. +// +// - If where is 0, then it points to the parent JSON object or array, +// or an object member if in-between an object member key and value. +// This is useful when the position is ambiguous whether +// we are interested in the previous or next token, or +// when we are uncertain whether the next token +// continues or terminates the current object or array. +// +// - If where is +1, then it points to the next expected value, +// assuming that it continues the current JSON object or array. +// As a special case, if the next token is a JSON object name, +// then it points to the parent JSON object. +// +// Invariant: Must call s.names.copyQuotedBuffer beforehand. +func (s state) appendStackPointer(b []byte, where int) []byte { + var objectDepth int + for i := 1; i < s.Tokens.Depth(); i++ { + e := s.Tokens.index(i) + arrayDelta := -1 // by default point to previous array element + if isLast := i == s.Tokens.Depth()-1; isLast { + switch { + case where < 0 && e.Length() == 0 || where == 0 && !e.needObjectValue() || where > 0 && e.NeedObjectName(): + return b + case where > 0 && e.isArray(): + arrayDelta = 0 // point to next array element + } + } + switch { + case e.isObject(): + b = appendEscapePointerName(append(b, '/'), s.Names.getUnquoted(objectDepth)) + objectDepth++ + case e.isArray(): + b = strconv.AppendUint(append(b, '/'), uint64(e.Length()+int64(arrayDelta)), 10) + } + } + return b +} + +func appendEscapePointerName[Bytes ~[]byte | ~string](b []byte, name Bytes) []byte { + for _, r := range string(name) { + // Per RFC 6901, section 3, escape '~' and '/' characters. + switch r { + case '~': + b = append(b, "~0"...) + case '/': + b = append(b, "~1"...) + default: + b = utf8.AppendRune(b, r) + } + } + return b +} + +// stateMachine is a push-down automaton that validates whether +// a sequence of tokens is valid or not according to the JSON grammar. +// It is useful for both encoding and decoding. +// +// It is a stack where each entry represents a nested JSON object or array. +// The stack has a minimum depth of 1 where the first level is a +// virtual JSON array to handle a stream of top-level JSON values. +// The top-level virtual JSON array is special in that it doesn't require commas +// between each JSON value. +// +// For performance, most methods are carefully written to be inlinable. +// The zero value is a valid state machine ready for use. +type stateMachine struct { + Stack []stateEntry + Last stateEntry +} + +// reset resets the state machine. +// The machine always starts with a minimum depth of 1. +func (m *stateMachine) reset() { + m.Stack = m.Stack[:0] + if cap(m.Stack) > 1<<10 { + m.Stack = nil + } + m.Last = stateTypeArray +} + +// Depth is the current nested depth of JSON objects and arrays. +// It is one-indexed (i.e., top-level values have a depth of 1). +func (m stateMachine) Depth() int { + return len(m.Stack) + 1 +} + +// index returns a reference to the ith entry. +// It is only valid until the next push method call. +func (m *stateMachine) index(i int) *stateEntry { + if i == len(m.Stack) { + return &m.Last + } + return &m.Stack[i] +} + +// DepthLength reports the current nested depth and +// the length of the last JSON object or array. +func (m stateMachine) DepthLength() (int, int64) { + return m.Depth(), m.Last.Length() +} + +// appendLiteral appends a JSON literal as the next token in the sequence. +// If an error is returned, the state is not mutated. +func (m *stateMachine) appendLiteral() error { + switch { + case m.Last.NeedObjectName(): + return ErrNonStringName + case !m.Last.isValidNamespace(): + return errInvalidNamespace + default: + m.Last.Increment() + return nil + } +} + +// appendString appends a JSON string as the next token in the sequence. +// If an error is returned, the state is not mutated. +func (m *stateMachine) appendString() error { + switch { + case !m.Last.isValidNamespace(): + return errInvalidNamespace + default: + m.Last.Increment() + return nil + } +} + +// appendNumber appends a JSON number as the next token in the sequence. +// If an error is returned, the state is not mutated. +func (m *stateMachine) appendNumber() error { + return m.appendLiteral() +} + +// pushObject appends a JSON begin object token as next in the sequence. +// If an error is returned, the state is not mutated. +func (m *stateMachine) pushObject() error { + switch { + case m.Last.NeedObjectName(): + return ErrNonStringName + case !m.Last.isValidNamespace(): + return errInvalidNamespace + case len(m.Stack) == maxNestingDepth: + return errMaxDepth + default: + m.Last.Increment() + m.Stack = append(m.Stack, m.Last) + m.Last = stateTypeObject + return nil + } +} + +// popObject appends a JSON end object token as next in the sequence. +// If an error is returned, the state is not mutated. +func (m *stateMachine) popObject() error { + switch { + case !m.Last.isObject(): + return errMismatchDelim + case m.Last.needObjectValue(): + return errMissingValue + case !m.Last.isValidNamespace(): + return errInvalidNamespace + default: + m.Last = m.Stack[len(m.Stack)-1] + m.Stack = m.Stack[:len(m.Stack)-1] + return nil + } +} + +// pushArray appends a JSON begin array token as next in the sequence. +// If an error is returned, the state is not mutated. +func (m *stateMachine) pushArray() error { + switch { + case m.Last.NeedObjectName(): + return ErrNonStringName + case !m.Last.isValidNamespace(): + return errInvalidNamespace + case len(m.Stack) == maxNestingDepth: + return errMaxDepth + default: + m.Last.Increment() + m.Stack = append(m.Stack, m.Last) + m.Last = stateTypeArray + return nil + } +} + +// popArray appends a JSON end array token as next in the sequence. +// If an error is returned, the state is not mutated. +func (m *stateMachine) popArray() error { + switch { + case !m.Last.isArray() || len(m.Stack) == 0: // forbid popping top-level virtual JSON array + return errMismatchDelim + case !m.Last.isValidNamespace(): + return errInvalidNamespace + default: + m.Last = m.Stack[len(m.Stack)-1] + m.Stack = m.Stack[:len(m.Stack)-1] + return nil + } +} + +// NeedIndent reports whether indent whitespace should be injected. +// A zero value means that no whitespace should be injected. +// A positive value means '\n', indentPrefix, and (n-1) copies of indentBody +// should be appended to the output immediately before the next token. +func (m stateMachine) NeedIndent(next Kind) (n int) { + willEnd := next == '}' || next == ']' + switch { + case m.Depth() == 1: + return 0 // top-level values are never indented + case m.Last.Length() == 0 && willEnd: + return 0 // an empty object or array is never indented + case m.Last.Length() == 0 || m.Last.needImplicitComma(next): + return m.Depth() + case willEnd: + return m.Depth() - 1 + default: + return 0 + } +} + +// MayAppendDelim appends a colon or comma that may precede the next token. +func (m stateMachine) MayAppendDelim(b []byte, next Kind) []byte { + switch { + case m.Last.needImplicitColon(): + return append(b, ':') + case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values + return append(b, ',') + default: + return b + } +} + +// needDelim reports whether a colon or comma token should be implicitly emitted +// before the next token of the specified kind. +// A zero value means no delimiter should be emitted. +func (m stateMachine) needDelim(next Kind) (delim byte) { + switch { + case m.Last.needImplicitColon(): + return ':' + case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values + return ',' + default: + return 0 + } +} + +// InvalidateDisabledNamespaces marks all disabled namespaces as invalid. +// +// For efficiency, Marshal and Unmarshal may disable namespaces since there are +// more efficient ways to track duplicate names. However, if an error occurs, +// the namespaces in Encoder or Decoder will be left in an inconsistent state. +// Mark the namespaces as invalid so that future method calls on +// Encoder or Decoder will return an error. +func (m *stateMachine) InvalidateDisabledNamespaces() { + for i := range m.Depth() { + e := m.index(i) + if !e.isActiveNamespace() { + e.invalidateNamespace() + } + } +} + +// stateEntry encodes several artifacts within a single unsigned integer: +// - whether this represents a JSON object or array, +// - whether this object should check for duplicate names, and +// - how many elements are in this JSON object or array. +type stateEntry uint64 + +const ( + // The type mask (1 bit) records whether this is a JSON object or array. + stateTypeMask stateEntry = 0x8000_0000_0000_0000 + stateTypeObject stateEntry = 0x8000_0000_0000_0000 + stateTypeArray stateEntry = 0x0000_0000_0000_0000 + + // The name check mask (2 bit) records whether to update + // the namespaces for the current JSON object and + // whether the namespace is valid. + stateNamespaceMask stateEntry = 0x6000_0000_0000_0000 + stateDisableNamespace stateEntry = 0x4000_0000_0000_0000 + stateInvalidNamespace stateEntry = 0x2000_0000_0000_0000 + + // The count mask (61 bits) records the number of elements. + stateCountMask stateEntry = 0x1fff_ffff_ffff_ffff + stateCountLSBMask stateEntry = 0x0000_0000_0000_0001 + stateCountOdd stateEntry = 0x0000_0000_0000_0001 + stateCountEven stateEntry = 0x0000_0000_0000_0000 +) + +// Length reports the number of elements in the JSON object or array. +// Each name and value in an object entry is treated as a separate element. +func (e stateEntry) Length() int64 { + return int64(e & stateCountMask) +} + +// isObject reports whether this is a JSON object. +func (e stateEntry) isObject() bool { + return e&stateTypeMask == stateTypeObject +} + +// isArray reports whether this is a JSON array. +func (e stateEntry) isArray() bool { + return e&stateTypeMask == stateTypeArray +} + +// NeedObjectName reports whether the next token must be a JSON string, +// which is necessary for JSON object names. +func (e stateEntry) NeedObjectName() bool { + return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven +} + +// needImplicitColon reports whether an colon should occur next, +// which always occurs after JSON object names. +func (e stateEntry) needImplicitColon() bool { + return e.needObjectValue() +} + +// needObjectValue reports whether the next token must be a JSON value, +// which is necessary after every JSON object name. +func (e stateEntry) needObjectValue() bool { + return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountOdd +} + +// needImplicitComma reports whether an comma should occur next, +// which always occurs after a value in a JSON object or array +// before the next value (or name). +func (e stateEntry) needImplicitComma(next Kind) bool { + return !e.needObjectValue() && e.Length() > 0 && next != '}' && next != ']' +} + +// Increment increments the number of elements for the current object or array. +// This assumes that overflow won't practically be an issue since +// 1< 0. +func (e *stateEntry) decrement() { + (*e)-- +} + +// DisableNamespace disables the JSON object namespace such that the +// Encoder or Decoder no longer updates the namespace. +func (e *stateEntry) DisableNamespace() { + *e |= stateDisableNamespace +} + +// isActiveNamespace reports whether the JSON object namespace is actively +// being updated and used for duplicate name checks. +func (e stateEntry) isActiveNamespace() bool { + return e&(stateDisableNamespace) == 0 +} + +// invalidateNamespace marks the JSON object namespace as being invalid. +func (e *stateEntry) invalidateNamespace() { + *e |= stateInvalidNamespace +} + +// isValidNamespace reports whether the JSON object namespace is valid. +func (e stateEntry) isValidNamespace() bool { + return e&(stateInvalidNamespace) == 0 +} + +// objectNameStack is a stack of names when descending into a JSON object. +// In contrast to objectNamespaceStack, this only has to remember a single name +// per JSON object. +// +// This data structure may contain offsets to encodeBuffer or decodeBuffer. +// It violates clean abstraction of layers, but is significantly more efficient. +// This ensures that popping and pushing in the common case is a trivial +// push/pop of an offset integer. +// +// The zero value is an empty names stack ready for use. +type objectNameStack struct { + // offsets is a stack of offsets for each name. + // A non-negative offset is the ending offset into the local names buffer. + // A negative offset is the bit-wise inverse of a starting offset into + // a remote buffer (e.g., encodeBuffer or decodeBuffer). + // A math.MinInt offset at the end implies that the last object is empty. + // Invariant: Positive offsets always occur before negative offsets. + offsets []int + // unquotedNames is a back-to-back concatenation of names. + unquotedNames []byte +} + +func (ns *objectNameStack) reset() { + ns.offsets = ns.offsets[:0] + ns.unquotedNames = ns.unquotedNames[:0] + if cap(ns.offsets) > 1<<6 { + ns.offsets = nil // avoid pinning arbitrarily large amounts of memory + } + if cap(ns.unquotedNames) > 1<<10 { + ns.unquotedNames = nil // avoid pinning arbitrarily large amounts of memory + } +} + +func (ns *objectNameStack) length() int { + return len(ns.offsets) +} + +// getUnquoted retrieves the ith unquoted name in the stack. +// It returns an empty string if the last object is empty. +// +// Invariant: Must call copyQuotedBuffer beforehand. +func (ns *objectNameStack) getUnquoted(i int) []byte { + ns.ensureCopiedBuffer() + if i == 0 { + return ns.unquotedNames[:ns.offsets[0]] + } else { + return ns.unquotedNames[ns.offsets[i-1]:ns.offsets[i-0]] + } +} + +// invalidOffset indicates that the last JSON object currently has no name. +const invalidOffset = math.MinInt + +// push descends into a nested JSON object. +func (ns *objectNameStack) push() { + ns.offsets = append(ns.offsets, invalidOffset) +} + +// ReplaceLastQuotedOffset replaces the last name with the starting offset +// to the quoted name in some remote buffer. All offsets provided must be +// relative to the same buffer until copyQuotedBuffer is called. +func (ns *objectNameStack) ReplaceLastQuotedOffset(i int) { + // Use bit-wise inversion instead of naive multiplication by -1 to avoid + // ambiguity regarding zero (which is a valid offset into the names field). + // Bit-wise inversion is mathematically equivalent to -i-1, + // such that 0 becomes -1, 1 becomes -2, and so forth. + // This ensures that remote offsets are always negative. + ns.offsets[len(ns.offsets)-1] = ^i +} + +// replaceLastUnquotedName replaces the last name with the provided name. +// +// Invariant: Must call copyQuotedBuffer beforehand. +func (ns *objectNameStack) replaceLastUnquotedName(s string) { + ns.ensureCopiedBuffer() + var startOffset int + if len(ns.offsets) > 1 { + startOffset = ns.offsets[len(ns.offsets)-2] + } + ns.unquotedNames = append(ns.unquotedNames[:startOffset], s...) + ns.offsets[len(ns.offsets)-1] = len(ns.unquotedNames) +} + +// clearLast removes any name in the last JSON object. +// It is semantically equivalent to ns.push followed by ns.pop. +func (ns *objectNameStack) clearLast() { + ns.offsets[len(ns.offsets)-1] = invalidOffset +} + +// pop ascends out of a nested JSON object. +func (ns *objectNameStack) pop() { + ns.offsets = ns.offsets[:len(ns.offsets)-1] +} + +// copyQuotedBuffer copies names from the remote buffer into the local names +// buffer so that there are no more offset references into the remote buffer. +// This allows the remote buffer to change contents without affecting +// the names that this data structure is trying to remember. +func (ns *objectNameStack) copyQuotedBuffer(b []byte) { + // Find the first negative offset. + var i int + for i = len(ns.offsets) - 1; i >= 0 && ns.offsets[i] < 0; i-- { + continue + } + + // Copy each name from the remote buffer into the local buffer. + for i = i + 1; i < len(ns.offsets); i++ { + if i == len(ns.offsets)-1 && ns.offsets[i] == invalidOffset { + if i == 0 { + ns.offsets[i] = 0 + } else { + ns.offsets[i] = ns.offsets[i-1] + } + break // last JSON object had a push without any names + } + + // As a form of Hyrum proofing, we write an invalid character into the + // buffer to make misuse of Decoder.ReadToken more obvious. + // We need to undo that mutation here. + quotedName := b[^ns.offsets[i]:] + if quotedName[0] == invalidateBufferByte { + quotedName[0] = '"' + } + + // Append the unquoted name to the local buffer. + var startOffset int + if i > 0 { + startOffset = ns.offsets[i-1] + } + if n := jsonwire.ConsumeSimpleString(quotedName); n > 0 { + ns.unquotedNames = append(ns.unquotedNames[:startOffset], quotedName[len(`"`):n-len(`"`)]...) + } else { + ns.unquotedNames, _ = jsonwire.AppendUnquote(ns.unquotedNames[:startOffset], quotedName) + } + ns.offsets[i] = len(ns.unquotedNames) + } +} + +func (ns *objectNameStack) ensureCopiedBuffer() { + if len(ns.offsets) > 0 && ns.offsets[len(ns.offsets)-1] < 0 { + panic("BUG: copyQuotedBuffer not called beforehand") + } +} + +// objectNamespaceStack is a stack of object namespaces. +// This data structure assists in detecting duplicate names. +type objectNamespaceStack []objectNamespace + +// reset resets the object namespace stack. +func (nss *objectNamespaceStack) reset() { + if cap(*nss) > 1<<10 { + *nss = nil + } + *nss = (*nss)[:0] +} + +// push starts a new namespace for a nested JSON object. +func (nss *objectNamespaceStack) push() { + if cap(*nss) > len(*nss) { + *nss = (*nss)[:len(*nss)+1] + nss.Last().reset() + } else { + *nss = append(*nss, objectNamespace{}) + } +} + +// Last returns a pointer to the last JSON object namespace. +func (nss objectNamespaceStack) Last() *objectNamespace { + return &nss[len(nss)-1] +} + +// pop terminates the namespace for a nested JSON object. +func (nss *objectNamespaceStack) pop() { + *nss = (*nss)[:len(*nss)-1] +} + +// objectNamespace is the namespace for a JSON object. +// In contrast to objectNameStack, this needs to remember a all names +// per JSON object. +// +// The zero value is an empty namespace ready for use. +type objectNamespace struct { + // It relies on a linear search over all the names before switching + // to use a Go map for direct lookup. + + // endOffsets is a list of offsets to the end of each name in buffers. + // The length of offsets is the number of names in the namespace. + endOffsets []uint + // allUnquotedNames is a back-to-back concatenation of every name in the namespace. + allUnquotedNames []byte + // mapNames is a Go map containing every name in the namespace. + // Only valid if non-nil. + mapNames map[string]struct{} +} + +// reset resets the namespace to be empty. +func (ns *objectNamespace) reset() { + ns.endOffsets = ns.endOffsets[:0] + ns.allUnquotedNames = ns.allUnquotedNames[:0] + ns.mapNames = nil + if cap(ns.endOffsets) > 1<<6 { + ns.endOffsets = nil // avoid pinning arbitrarily large amounts of memory + } + if cap(ns.allUnquotedNames) > 1<<10 { + ns.allUnquotedNames = nil // avoid pinning arbitrarily large amounts of memory + } +} + +// length reports the number of names in the namespace. +func (ns *objectNamespace) length() int { + return len(ns.endOffsets) +} + +// getUnquoted retrieves the ith unquoted name in the namespace. +func (ns *objectNamespace) getUnquoted(i int) []byte { + if i == 0 { + return ns.allUnquotedNames[:ns.endOffsets[0]] + } else { + return ns.allUnquotedNames[ns.endOffsets[i-1]:ns.endOffsets[i-0]] + } +} + +// lastUnquoted retrieves the last name in the namespace. +func (ns *objectNamespace) lastUnquoted() []byte { + return ns.getUnquoted(ns.length() - 1) +} + +// insertQuoted inserts a name and reports whether it was inserted, +// which only occurs if name is not already in the namespace. +// The provided name must be a valid JSON string. +func (ns *objectNamespace) insertQuoted(name []byte, isVerbatim bool) bool { + if isVerbatim { + name = name[len(`"`) : len(name)-len(`"`)] + } + return ns.insert(name, !isVerbatim) +} +func (ns *objectNamespace) InsertUnquoted(name []byte) bool { + return ns.insert(name, false) +} +func (ns *objectNamespace) insert(name []byte, quoted bool) bool { + var allNames []byte + if quoted { + allNames, _ = jsonwire.AppendUnquote(ns.allUnquotedNames, name) + } else { + allNames = append(ns.allUnquotedNames, name...) + } + name = allNames[len(ns.allUnquotedNames):] + + // Switch to a map if the buffer is too large for linear search. + // This does not add the current name to the map. + if ns.mapNames == nil && (ns.length() > 64 || len(ns.allUnquotedNames) > 1024) { + ns.mapNames = make(map[string]struct{}) + var startOffset uint + for _, endOffset := range ns.endOffsets { + name := ns.allUnquotedNames[startOffset:endOffset] + ns.mapNames[string(name)] = struct{}{} // allocates a new string + startOffset = endOffset + } + } + + if ns.mapNames == nil { + // Perform linear search over the buffer to find matching names. + // It provides O(n) lookup, but does not require any allocations. + var startOffset uint + for _, endOffset := range ns.endOffsets { + if string(ns.allUnquotedNames[startOffset:endOffset]) == string(name) { + return false + } + startOffset = endOffset + } + } else { + // Use the map if it is populated. + // It provides O(1) lookup, but requires a string allocation per name. + if _, ok := ns.mapNames[string(name)]; ok { + return false + } + ns.mapNames[string(name)] = struct{}{} // allocates a new string + } + + ns.allUnquotedNames = allNames + ns.endOffsets = append(ns.endOffsets, uint(len(ns.allUnquotedNames))) + return true +} + +// removeLast removes the last name in the namespace. +func (ns *objectNamespace) removeLast() { + if ns.mapNames != nil { + delete(ns.mapNames, string(ns.lastUnquoted())) + } + if ns.length()-1 == 0 { + ns.endOffsets = ns.endOffsets[:0] + ns.allUnquotedNames = ns.allUnquotedNames[:0] + } else { + ns.endOffsets = ns.endOffsets[:ns.length()-1] + ns.allUnquotedNames = ns.allUnquotedNames[:ns.endOffsets[ns.length()-1]] + } +} diff --git a/pkg/encoders/json/jsontext/state_test.go b/pkg/encoders/json/jsontext/state_test.go new file mode 100644 index 0000000..c227600 --- /dev/null +++ b/pkg/encoders/json/jsontext/state_test.go @@ -0,0 +1,396 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.jsonv2 + +package jsontext + +import ( + "fmt" + "slices" + "strings" + "testing" + "unicode/utf8" +) + +func TestPointer(t *testing.T) { + tests := []struct { + in Pointer + wantParent Pointer + wantLast string + wantTokens []string + wantValid bool + }{ + {"", "", "", nil, true}, + {"a", "", "a", []string{"a"}, false}, + {"~", "", "~", []string{"~"}, false}, + {"/a", "", "a", []string{"a"}, true}, + {"/foo/bar", "/foo", "bar", []string{"foo", "bar"}, true}, + {"///", "//", "", []string{"", "", ""}, true}, + {"/~0~1", "", "~/", []string{"~/"}, true}, + {"/\xde\xad\xbe\xef", "", "\xde\xad\xbe\xef", []string{"\xde\xad\xbe\xef"}, false}, + } + for _, tt := range tests { + if got := tt.in.Parent(); got != tt.wantParent { + t.Errorf("Pointer(%q).Parent = %q, want %q", tt.in, got, tt.wantParent) + } + if got := tt.in.LastToken(); got != tt.wantLast { + t.Errorf("Pointer(%q).Last = %q, want %q", tt.in, got, tt.wantLast) + } + if strings.HasPrefix(string(tt.in), "/") { + wantRoundtrip := tt.in + if !utf8.ValidString(string(wantRoundtrip)) { + // Replace bytes of invalid UTF-8 with Unicode replacement character. + wantRoundtrip = Pointer([]rune(wantRoundtrip)) + } + if got := tt.in.Parent().AppendToken(tt.in.LastToken()); got != wantRoundtrip { + t.Errorf("Pointer(%q).Parent().AppendToken(LastToken()) = %q, want %q", tt.in, got, tt.in) + } + in := tt.in + for { + if (in + "x").Contains(tt.in) { + t.Errorf("Pointer(%q).Contains(%q) = true, want false", in+"x", tt.in) + } + if !in.Contains(tt.in) { + t.Errorf("Pointer(%q).Contains(%q) = false, want true", in, tt.in) + } + if in == in.Parent() { + break + } + in = in.Parent() + } + } + if got := slices.Collect(tt.in.Tokens()); !slices.Equal(got, tt.wantTokens) { + t.Errorf("Pointer(%q).Tokens = %q, want %q", tt.in, got, tt.wantTokens) + } + if got := tt.in.IsValid(); got != tt.wantValid { + t.Errorf("Pointer(%q).IsValid = %v, want %v", tt.in, got, tt.wantValid) + } + } +} + +func TestStateMachine(t *testing.T) { + // To test a state machine, we pass an ordered sequence of operations and + // check whether the current state is as expected. + // The operation type is a union type of various possible operations, + // which either call mutating methods on the state machine or + // call accessor methods on state machine and verify the results. + type operation any + type ( + // stackLengths checks the results of stateEntry.length accessors. + stackLengths []int64 + + // appendTokens is sequence of token kinds to append where + // none of them are expected to fail. + // + // For example: `[nft]` is equivalent to the following sequence: + // + // pushArray() + // appendLiteral() + // appendString() + // appendNumber() + // popArray() + // + appendTokens string + + // appendToken is a single token kind to append with the expected error. + appendToken struct { + kind Kind + want error + } + + // needDelim checks the result of the needDelim accessor. + needDelim struct { + next Kind + want byte + } + ) + + // Each entry is a sequence of tokens to pass to the state machine. + tests := []struct { + label string + ops []operation + }{{ + "TopLevelValues", + []operation{ + stackLengths{0}, + needDelim{'n', 0}, + appendTokens(`nft`), + stackLengths{3}, + needDelim{'"', 0}, + appendTokens(`"0[]{}`), + stackLengths{7}, + }, + }, { + "ArrayValues", + []operation{ + stackLengths{0}, + needDelim{'[', 0}, + appendTokens(`[`), + stackLengths{1, 0}, + needDelim{'n', 0}, + appendTokens(`nft`), + stackLengths{1, 3}, + needDelim{'"', ','}, + appendTokens(`"0[]{}`), + stackLengths{1, 7}, + needDelim{']', 0}, + appendTokens(`]`), + stackLengths{1}, + }, + }, { + "ObjectValues", + []operation{ + stackLengths{0}, + needDelim{'{', 0}, + appendTokens(`{`), + stackLengths{1, 0}, + needDelim{'"', 0}, + appendTokens(`"`), + stackLengths{1, 1}, + needDelim{'n', ':'}, + appendTokens(`n`), + stackLengths{1, 2}, + needDelim{'"', ','}, + appendTokens(`"f"t`), + stackLengths{1, 6}, + appendTokens(`"""0"[]"{}`), + stackLengths{1, 14}, + needDelim{'}', 0}, + appendTokens(`}`), + stackLengths{1}, + }, + }, { + "ObjectCardinality", + []operation{ + appendTokens(`{`), + + // Appending any kind other than string for object name is an error. + appendToken{'n', ErrNonStringName}, + appendToken{'f', ErrNonStringName}, + appendToken{'t', ErrNonStringName}, + appendToken{'0', ErrNonStringName}, + appendToken{'{', ErrNonStringName}, + appendToken{'[', ErrNonStringName}, + appendTokens(`"`), + + // Appending '}' without first appending any value is an error. + appendToken{'}', errMissingValue}, + appendTokens(`"`), + + appendTokens(`}`), + }, + }, { + "MismatchingDelims", + []operation{ + appendToken{'}', errMismatchDelim}, // appending '}' without preceding '{' + appendTokens(`[[{`), + appendToken{']', errMismatchDelim}, // appending ']' that mismatches preceding '{' + appendTokens(`}]`), + appendToken{'}', errMismatchDelim}, // appending '}' that mismatches preceding '[' + appendTokens(`]`), + appendToken{']', errMismatchDelim}, // appending ']' without preceding '[' + }, + }} + + for _, tt := range tests { + t.Run(tt.label, func(t *testing.T) { + // Flatten appendTokens to sequence of appendToken entries. + var ops []operation + for _, op := range tt.ops { + if toks, ok := op.(appendTokens); ok { + for _, k := range []byte(toks) { + ops = append(ops, appendToken{Kind(k), nil}) + } + continue + } + ops = append(ops, op) + } + + // Append each token to the state machine and check the output. + var state stateMachine + state.reset() + var sequence []Kind + for _, op := range ops { + switch op := op.(type) { + case stackLengths: + var got []int64 + for i := range state.Depth() { + e := state.index(i) + got = append(got, e.Length()) + } + want := []int64(op) + if !slices.Equal(got, want) { + t.Fatalf("%s: stack lengths mismatch:\ngot %v\nwant %v", sequence, got, want) + } + case appendToken: + got := state.append(op.kind) + if !equalError(got, op.want) { + t.Fatalf("%s: append('%c') = %v, want %v", sequence, op.kind, got, op.want) + } + if got == nil { + sequence = append(sequence, op.kind) + } + case needDelim: + if got := state.needDelim(op.next); got != op.want { + t.Fatalf("%s: needDelim('%c') = '%c', want '%c'", sequence, op.next, got, op.want) + } + default: + panic(fmt.Sprintf("unknown operation: %T", op)) + } + } + }) + } +} + +// append is a thin wrapper over the other append, pop, or push methods +// based on the token kind. +func (s *stateMachine) append(k Kind) error { + switch k { + case 'n', 'f', 't': + return s.appendLiteral() + case '"': + return s.appendString() + case '0': + return s.appendNumber() + case '{': + return s.pushObject() + case '}': + return s.popObject() + case '[': + return s.pushArray() + case ']': + return s.popArray() + default: + panic(fmt.Sprintf("invalid token kind: '%c'", k)) + } +} + +func TestObjectNamespace(t *testing.T) { + type operation any + type ( + insert struct { + name string + wantInserted bool + } + removeLast struct{} + ) + + // Sequence of insert operations to perform (order matters). + ops := []operation{ + insert{`""`, true}, + removeLast{}, + insert{`""`, true}, + insert{`""`, false}, + + // Test insertion of the same name with different formatting. + insert{`"alpha"`, true}, + insert{`"ALPHA"`, true}, // case-sensitive matching + insert{`"alpha"`, false}, + insert{`"\u0061\u006c\u0070\u0068\u0061"`, false}, // unescapes to "alpha" + removeLast{}, // removes "ALPHA" + insert{`"alpha"`, false}, + removeLast{}, // removes "alpha" + insert{`"alpha"`, true}, + removeLast{}, + + // Bulk insert simple names. + insert{`"alpha"`, true}, + insert{`"bravo"`, true}, + insert{`"charlie"`, true}, + insert{`"delta"`, true}, + insert{`"echo"`, true}, + insert{`"foxtrot"`, true}, + insert{`"golf"`, true}, + insert{`"hotel"`, true}, + insert{`"india"`, true}, + insert{`"juliet"`, true}, + insert{`"kilo"`, true}, + insert{`"lima"`, true}, + insert{`"mike"`, true}, + insert{`"november"`, true}, + insert{`"oscar"`, true}, + insert{`"papa"`, true}, + insert{`"quebec"`, true}, + insert{`"romeo"`, true}, + insert{`"sierra"`, true}, + insert{`"tango"`, true}, + insert{`"uniform"`, true}, + insert{`"victor"`, true}, + insert{`"whiskey"`, true}, + insert{`"xray"`, true}, + insert{`"yankee"`, true}, + insert{`"zulu"`, true}, + + // Test insertion of invalid UTF-8. + insert{`"` + "\ufffd" + `"`, true}, + insert{`"` + "\ufffd" + `"`, false}, + insert{`"\ufffd"`, false}, // unescapes to Unicode replacement character + insert{`"\uFFFD"`, false}, // unescapes to Unicode replacement character + insert{`"` + "\xff" + `"`, false}, // mangles as Unicode replacement character + removeLast{}, + insert{`"` + "\ufffd" + `"`, true}, + + // Test insertion of unicode characters. + insert{`"☺☻☹"`, true}, + insert{`"☺☻☹"`, false}, + removeLast{}, + insert{`"☺☻☹"`, true}, + } + + // Execute the sequence of operations twice: + // 1) on a fresh namespace and 2) on a namespace that has been reset. + var ns objectNamespace + wantNames := []string{} + for _, reset := range []bool{false, true} { + if reset { + ns.reset() + wantNames = nil + } + + // Execute the operations and ensure the state is consistent. + for i, op := range ops { + switch op := op.(type) { + case insert: + gotInserted := ns.insertQuoted([]byte(op.name), false) + if gotInserted != op.wantInserted { + t.Fatalf("%d: objectNamespace{%v}.insert(%v) = %v, want %v", i, strings.Join(wantNames, " "), op.name, gotInserted, op.wantInserted) + } + if gotInserted { + b, _ := AppendUnquote(nil, []byte(op.name)) + wantNames = append(wantNames, string(b)) + } + case removeLast: + ns.removeLast() + wantNames = wantNames[:len(wantNames)-1] + default: + panic(fmt.Sprintf("unknown operation: %T", op)) + } + + // Check that the namespace is consistent. + gotNames := []string{} + for i := range ns.length() { + gotNames = append(gotNames, string(ns.getUnquoted(i))) + } + if !slices.Equal(gotNames, wantNames) { + t.Fatalf("%d: objectNamespace = {%v}, want {%v}", i, strings.Join(gotNames, " "), strings.Join(wantNames, " ")) + } + } + + // Verify that we have not switched to using a Go map. + if ns.mapNames != nil { + t.Errorf("objectNamespace.mapNames = non-nil, want nil") + } + + // Insert a large number of names. + for i := range 64 { + ns.InsertUnquoted([]byte(fmt.Sprintf(`name%d`, i))) + } + + // Verify that we did switch to using a Go map. + if ns.mapNames == nil { + t.Errorf("objectNamespace.mapNames = nil, want non-nil") + } + } +} diff --git a/pkg/encoders/json/jsontext/token.go b/pkg/encoders/json/jsontext/token.go new file mode 100644 index 0000000..e78c3f8 --- /dev/null +++ b/pkg/encoders/json/jsontext/token.go @@ -0,0 +1,527 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.jsonv2 + +package jsontext + +import ( + "bytes" + "errors" + "math" + "strconv" + + "encoding/json/internal/jsonflags" + "encoding/json/internal/jsonwire" +) + +// NOTE: Token is analogous to v1 json.Token. + +const ( + maxInt64 = math.MaxInt64 + minInt64 = math.MinInt64 + maxUint64 = math.MaxUint64 + minUint64 = 0 // for consistency and readability purposes + + invalidTokenPanic = "invalid jsontext.Token; it has been voided by a subsequent json.Decoder call" +) + +var errInvalidToken = errors.New("invalid jsontext.Token") + +// Token represents a lexical JSON token, which may be one of the following: +// - a JSON literal (i.e., null, true, or false) +// - a JSON string (e.g., "hello, world!") +// - a JSON number (e.g., 123.456) +// - a begin or end delimiter for a JSON object (i.e., { or } ) +// - a begin or end delimiter for a JSON array (i.e., [ or ] ) +// +// A Token cannot represent entire array or object values, while a [Value] can. +// There is no Token to represent commas and colons since +// these structural tokens can be inferred from the surrounding context. +type Token struct { + nonComparable + + // Tokens can exist in either a "raw" or an "exact" form. + // Tokens produced by the Decoder are in the "raw" form. + // Tokens returned by constructors are usually in the "exact" form. + // The Encoder accepts Tokens in either the "raw" or "exact" form. + // + // The following chart shows the possible values for each Token type: + // ╔═════════════════╦════════════╤════════════╤════════════╗ + // ║ Token type ║ raw field │ str field │ num field ║ + // ╠═════════════════╬════════════╪════════════╪════════════╣ + // ║ null (raw) ║ "null" │ "" │ 0 ║ + // ║ false (raw) ║ "false" │ "" │ 0 ║ + // ║ true (raw) ║ "true" │ "" │ 0 ║ + // ║ string (raw) ║ non-empty │ "" │ offset ║ + // ║ string (string) ║ nil │ non-empty │ 0 ║ + // ║ number (raw) ║ non-empty │ "" │ offset ║ + // ║ number (float) ║ nil │ "f" │ non-zero ║ + // ║ number (int64) ║ nil │ "i" │ non-zero ║ + // ║ number (uint64) ║ nil │ "u" │ non-zero ║ + // ║ object (delim) ║ "{" or "}" │ "" │ 0 ║ + // ║ array (delim) ║ "[" or "]" │ "" │ 0 ║ + // ╚═════════════════╩════════════╧════════════╧════════════╝ + // + // Notes: + // - For tokens stored in "raw" form, the num field contains the + // absolute offset determined by raw.previousOffsetStart(). + // The buffer itself is stored in raw.previousBuffer(). + // - JSON literals and structural characters are always in the "raw" form. + // - JSON strings and numbers can be in either "raw" or "exact" forms. + // - The exact zero value of JSON strings and numbers in the "exact" forms + // have ambiguous representation. Thus, they are always represented + // in the "raw" form. + + // raw contains a reference to the raw decode buffer. + // If non-nil, then its value takes precedence over str and num. + // It is only valid if num == raw.previousOffsetStart(). + raw *decodeBuffer + + // str is the unescaped JSON string if num is zero. + // Otherwise, it is "f", "i", or "u" if num should be interpreted + // as a float64, int64, or uint64, respectively. + str string + + // num is a float64, int64, or uint64 stored as a uint64 value. + // It is non-zero for any JSON number in the "exact" form. + num uint64 +} + +// TODO: Does representing 1-byte delimiters as *decodeBuffer cause performance issues? + +var ( + Null Token = rawToken("null") + False Token = rawToken("false") + True Token = rawToken("true") + + BeginObject Token = rawToken("{") + EndObject Token = rawToken("}") + BeginArray Token = rawToken("[") + EndArray Token = rawToken("]") + + zeroString Token = rawToken(`""`) + zeroNumber Token = rawToken(`0`) + + nanString Token = String("NaN") + pinfString Token = String("Infinity") + ninfString Token = String("-Infinity") +) + +func rawToken(s string) Token { + return Token{raw: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}} +} + +// Bool constructs a Token representing a JSON boolean. +func Bool(b bool) Token { + if b { + return True + } + return False +} + +// String constructs a Token representing a JSON string. +// The provided string should contain valid UTF-8, otherwise invalid characters +// may be mangled as the Unicode replacement character. +func String(s string) Token { + if len(s) == 0 { + return zeroString + } + return Token{str: s} +} + +// Float constructs a Token representing a JSON number. +// The values NaN, +Inf, and -Inf will be represented +// as a JSON string with the values "NaN", "Infinity", and "-Infinity". +func Float(n float64) Token { + switch { + case math.Float64bits(n) == 0: + return zeroNumber + case math.IsNaN(n): + return nanString + case math.IsInf(n, +1): + return pinfString + case math.IsInf(n, -1): + return ninfString + } + return Token{str: "f", num: math.Float64bits(n)} +} + +// Int constructs a Token representing a JSON number from an int64. +func Int(n int64) Token { + if n == 0 { + return zeroNumber + } + return Token{str: "i", num: uint64(n)} +} + +// Uint constructs a Token representing a JSON number from a uint64. +func Uint(n uint64) Token { + if n == 0 { + return zeroNumber + } + return Token{str: "u", num: uint64(n)} +} + +// Clone makes a copy of the Token such that its value remains valid +// even after a subsequent [Decoder.Read] call. +func (t Token) Clone() Token { + // TODO: Allow caller to avoid any allocations? + if raw := t.raw; raw != nil { + // Avoid copying globals. + if t.raw.prevStart == 0 { + switch t.raw { + case Null.raw: + return Null + case False.raw: + return False + case True.raw: + return True + case BeginObject.raw: + return BeginObject + case EndObject.raw: + return EndObject + case BeginArray.raw: + return BeginArray + case EndArray.raw: + return EndArray + } + } + + if uint64(raw.previousOffsetStart()) != t.num { + panic(invalidTokenPanic) + } + buf := bytes.Clone(raw.previousBuffer()) + return Token{raw: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}} + } + return t +} + +// Bool returns the value for a JSON boolean. +// It panics if the token kind is not a JSON boolean. +func (t Token) Bool() bool { + switch t.raw { + case True.raw: + return true + case False.raw: + return false + default: + panic("invalid JSON token kind: " + t.Kind().String()) + } +} + +// appendString appends a JSON string to dst and returns it. +// It panics if t is not a JSON string. +func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) { + if raw := t.raw; raw != nil { + // Handle raw string value. + buf := raw.previousBuffer() + if Kind(buf[0]) == '"' { + if jsonwire.ConsumeSimpleString(buf) == len(buf) { + return append(dst, buf...), nil + } + dst, _, err := jsonwire.ReformatString(dst, buf, flags) + return dst, err + } + } else if len(t.str) != 0 && t.num == 0 { + // Handle exact string value. + return jsonwire.AppendQuote(dst, t.str, flags) + } + + panic("invalid JSON token kind: " + t.Kind().String()) +} + +// String returns the unescaped string value for a JSON string. +// For other JSON kinds, this returns the raw JSON representation. +func (t Token) String() string { + // This is inlinable to take advantage of "function outlining". + // This avoids an allocation for the string(b) conversion + // if the caller does not use the string in an escaping manner. + // See https://blog.filippo.io/efficient-go-apis-with-the-inliner/ + s, b := t.string() + if len(b) > 0 { + return string(b) + } + return s +} +func (t Token) string() (string, []byte) { + if raw := t.raw; raw != nil { + if uint64(raw.previousOffsetStart()) != t.num { + panic(invalidTokenPanic) + } + buf := raw.previousBuffer() + if buf[0] == '"' { + // TODO: Preserve ValueFlags in Token? + isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf) + return "", jsonwire.UnquoteMayCopy(buf, isVerbatim) + } + // Handle tokens that are not JSON strings for fmt.Stringer. + return "", buf + } + if len(t.str) != 0 && t.num == 0 { + return t.str, nil + } + // Handle tokens that are not JSON strings for fmt.Stringer. + if t.num > 0 { + switch t.str[0] { + case 'f': + return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.num), 64)), nil + case 'i': + return strconv.FormatInt(int64(t.num), 10), nil + case 'u': + return strconv.FormatUint(uint64(t.num), 10), nil + } + } + return "", nil +} + +// appendNumber appends a JSON number to dst and returns it. +// It panics if t is not a JSON number. +func (t Token) appendNumber(dst []byte, flags *jsonflags.Flags) ([]byte, error) { + if raw := t.raw; raw != nil { + // Handle raw number value. + buf := raw.previousBuffer() + if Kind(buf[0]).normalize() == '0' { + dst, _, err := jsonwire.ReformatNumber(dst, buf, flags) + return dst, err + } + } else if t.num != 0 { + // Handle exact number value. + switch t.str[0] { + case 'f': + return jsonwire.AppendFloat(dst, math.Float64frombits(t.num), 64), nil + case 'i': + return strconv.AppendInt(dst, int64(t.num), 10), nil + case 'u': + return strconv.AppendUint(dst, uint64(t.num), 10), nil + } + } + + panic("invalid JSON token kind: " + t.Kind().String()) +} + +// Float returns the floating-point value for a JSON number. +// It returns a NaN, +Inf, or -Inf value for any JSON string +// with the values "NaN", "Infinity", or "-Infinity". +// It panics for all other cases. +func (t Token) Float() float64 { + if raw := t.raw; raw != nil { + // Handle raw number value. + if uint64(raw.previousOffsetStart()) != t.num { + panic(invalidTokenPanic) + } + buf := raw.previousBuffer() + if Kind(buf[0]).normalize() == '0' { + fv, _ := jsonwire.ParseFloat(buf, 64) + return fv + } + } else if t.num != 0 { + // Handle exact number value. + switch t.str[0] { + case 'f': + return math.Float64frombits(t.num) + case 'i': + return float64(int64(t.num)) + case 'u': + return float64(uint64(t.num)) + } + } + + // Handle string values with "NaN", "Infinity", or "-Infinity". + if t.Kind() == '"' { + switch t.String() { + case "NaN": + return math.NaN() + case "Infinity": + return math.Inf(+1) + case "-Infinity": + return math.Inf(-1) + } + } + + panic("invalid JSON token kind: " + t.Kind().String()) +} + +// Int returns the signed integer value for a JSON number. +// The fractional component of any number is ignored (truncation toward zero). +// Any number beyond the representation of an int64 will be saturated +// to the closest representable value. +// It panics if the token kind is not a JSON number. +func (t Token) Int() int64 { + if raw := t.raw; raw != nil { + // Handle raw integer value. + if uint64(raw.previousOffsetStart()) != t.num { + panic(invalidTokenPanic) + } + neg := false + buf := raw.previousBuffer() + if len(buf) > 0 && buf[0] == '-' { + neg, buf = true, buf[1:] + } + if numAbs, ok := jsonwire.ParseUint(buf); ok { + if neg { + if numAbs > -minInt64 { + return minInt64 + } + return -1 * int64(numAbs) + } else { + if numAbs > +maxInt64 { + return maxInt64 + } + return +1 * int64(numAbs) + } + } + } else if t.num != 0 { + // Handle exact integer value. + switch t.str[0] { + case 'i': + return int64(t.num) + case 'u': + if t.num > maxInt64 { + return maxInt64 + } + return int64(t.num) + } + } + + // Handle JSON number that is a floating-point value. + if t.Kind() == '0' { + switch fv := t.Float(); { + case fv >= maxInt64: + return maxInt64 + case fv <= minInt64: + return minInt64 + default: + return int64(fv) // truncation toward zero + } + } + + panic("invalid JSON token kind: " + t.Kind().String()) +} + +// Uint returns the unsigned integer value for a JSON number. +// The fractional component of any number is ignored (truncation toward zero). +// Any number beyond the representation of an uint64 will be saturated +// to the closest representable value. +// It panics if the token kind is not a JSON number. +func (t Token) Uint() uint64 { + // NOTE: This accessor returns 0 for any negative JSON number, + // which might be surprising, but is at least consistent with the behavior + // of saturating out-of-bounds numbers to the closest representable number. + + if raw := t.raw; raw != nil { + // Handle raw integer value. + if uint64(raw.previousOffsetStart()) != t.num { + panic(invalidTokenPanic) + } + neg := false + buf := raw.previousBuffer() + if len(buf) > 0 && buf[0] == '-' { + neg, buf = true, buf[1:] + } + if num, ok := jsonwire.ParseUint(buf); ok { + if neg { + return minUint64 + } + return num + } + } else if t.num != 0 { + // Handle exact integer value. + switch t.str[0] { + case 'u': + return t.num + case 'i': + if int64(t.num) < minUint64 { + return minUint64 + } + return uint64(int64(t.num)) + } + } + + // Handle JSON number that is a floating-point value. + if t.Kind() == '0' { + switch fv := t.Float(); { + case fv >= maxUint64: + return maxUint64 + case fv <= minUint64: + return minUint64 + default: + return uint64(fv) // truncation toward zero + } + } + + panic("invalid JSON token kind: " + t.Kind().String()) +} + +// Kind returns the token kind. +func (t Token) Kind() Kind { + switch { + case t.raw != nil: + raw := t.raw + if uint64(raw.previousOffsetStart()) != t.num { + panic(invalidTokenPanic) + } + return Kind(t.raw.buf[raw.prevStart]).normalize() + case t.num != 0: + return '0' + case len(t.str) != 0: + return '"' + default: + return invalidKind + } +} + +// Kind represents each possible JSON token kind with a single byte, +// which is conveniently the first byte of that kind's grammar +// with the restriction that numbers always be represented with '0': +// +// - 'n': null +// - 'f': false +// - 't': true +// - '"': string +// - '0': number +// - '{': object begin +// - '}': object end +// - '[': array begin +// - ']': array end +// +// An invalid kind is usually represented using 0, +// but may be non-zero due to invalid JSON data. +type Kind byte + +const invalidKind Kind = 0 + +// String prints the kind in a humanly readable fashion. +func (k Kind) String() string { + switch k { + case 'n': + return "null" + case 'f': + return "false" + case 't': + return "true" + case '"': + return "string" + case '0': + return "number" + case '{': + return "{" + case '}': + return "}" + case '[': + return "[" + case ']': + return "]" + default: + return "" + } +} + +// normalize coalesces all possible starting characters of a number as just '0'. +func (k Kind) normalize() Kind { + if k == '-' || ('0' <= k && k <= '9') { + return '0' + } + return k +} diff --git a/pkg/encoders/json/jsontext/token_test.go b/pkg/encoders/json/jsontext/token_test.go new file mode 100644 index 0000000..ebe324e --- /dev/null +++ b/pkg/encoders/json/jsontext/token_test.go @@ -0,0 +1,168 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.jsonv2 + +package jsontext + +import ( + "math" + "reflect" + "testing" +) + +func TestTokenStringAllocations(t *testing.T) { + if testing.CoverMode() != "" { + t.Skip("coverage mode breaks the compiler optimization this depends on") + } + + tok := rawToken(`"hello"`) + var m map[string]bool + got := int(testing.AllocsPerRun(10, func() { + // This function uses tok.String() is a non-escaping manner + // (i.e., looking it up in a Go map). It should not allocate. + if m[tok.String()] { + panic("never executed") + } + })) + if got > 0 { + t.Errorf("Token.String allocated %d times, want 0", got) + } +} + +func TestTokenAccessors(t *testing.T) { + type token struct { + Bool bool + String string + Float float64 + Int int64 + Uint uint64 + Kind Kind + } + + tests := []struct { + in Token + want token + }{ + {Token{}, token{String: ""}}, + {Null, token{String: "null", Kind: 'n'}}, + {False, token{Bool: false, String: "false", Kind: 'f'}}, + {True, token{Bool: true, String: "true", Kind: 't'}}, + {Bool(false), token{Bool: false, String: "false", Kind: 'f'}}, + {Bool(true), token{Bool: true, String: "true", Kind: 't'}}, + {BeginObject, token{String: "{", Kind: '{'}}, + {EndObject, token{String: "}", Kind: '}'}}, + {BeginArray, token{String: "[", Kind: '['}}, + {EndArray, token{String: "]", Kind: ']'}}, + {String(""), token{String: "", Kind: '"'}}, + {String("hello, world!"), token{String: "hello, world!", Kind: '"'}}, + {rawToken(`"hello, world!"`), token{String: "hello, world!", Kind: '"'}}, + {Float(0), token{String: "0", Float: 0, Int: 0, Uint: 0, Kind: '0'}}, + {Float(math.Copysign(0, -1)), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}}, + {Float(math.NaN()), token{String: "NaN", Float: math.NaN(), Int: 0, Uint: 0, Kind: '"'}}, + {Float(math.Inf(+1)), token{String: "Infinity", Float: math.Inf(+1), Kind: '"'}}, + {Float(math.Inf(-1)), token{String: "-Infinity", Float: math.Inf(-1), Kind: '"'}}, + {Int(minInt64), token{String: "-9223372036854775808", Float: minInt64, Int: minInt64, Uint: minUint64, Kind: '0'}}, + {Int(minInt64 + 1), token{String: "-9223372036854775807", Float: minInt64 + 1, Int: minInt64 + 1, Uint: minUint64, Kind: '0'}}, + {Int(-1), token{String: "-1", Float: -1, Int: -1, Uint: minUint64, Kind: '0'}}, + {Int(0), token{String: "0", Float: 0, Int: 0, Uint: 0, Kind: '0'}}, + {Int(+1), token{String: "1", Float: +1, Int: +1, Uint: +1, Kind: '0'}}, + {Int(maxInt64 - 1), token{String: "9223372036854775806", Float: maxInt64 - 1, Int: maxInt64 - 1, Uint: maxInt64 - 1, Kind: '0'}}, + {Int(maxInt64), token{String: "9223372036854775807", Float: maxInt64, Int: maxInt64, Uint: maxInt64, Kind: '0'}}, + {Uint(minUint64), token{String: "0", Kind: '0'}}, + {Uint(minUint64 + 1), token{String: "1", Float: minUint64 + 1, Int: minUint64 + 1, Uint: minUint64 + 1, Kind: '0'}}, + {Uint(maxUint64 - 1), token{String: "18446744073709551614", Float: maxUint64 - 1, Int: maxInt64, Uint: maxUint64 - 1, Kind: '0'}}, + {Uint(maxUint64), token{String: "18446744073709551615", Float: maxUint64, Int: maxInt64, Uint: maxUint64, Kind: '0'}}, + {rawToken(`-0`), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}}, + {rawToken(`1e1000`), token{String: "1e1000", Float: math.MaxFloat64, Int: maxInt64, Uint: maxUint64, Kind: '0'}}, + {rawToken(`-1e1000`), token{String: "-1e1000", Float: -math.MaxFloat64, Int: minInt64, Uint: minUint64, Kind: '0'}}, + {rawToken(`0.1`), token{String: "0.1", Float: 0.1, Int: 0, Uint: 0, Kind: '0'}}, + {rawToken(`0.5`), token{String: "0.5", Float: 0.5, Int: 0, Uint: 0, Kind: '0'}}, + {rawToken(`0.9`), token{String: "0.9", Float: 0.9, Int: 0, Uint: 0, Kind: '0'}}, + {rawToken(`1.1`), token{String: "1.1", Float: 1.1, Int: 1, Uint: 1, Kind: '0'}}, + {rawToken(`-0.1`), token{String: "-0.1", Float: -0.1, Int: 0, Uint: 0, Kind: '0'}}, + {rawToken(`-0.5`), token{String: "-0.5", Float: -0.5, Int: 0, Uint: 0, Kind: '0'}}, + {rawToken(`-0.9`), token{String: "-0.9", Float: -0.9, Int: 0, Uint: 0, Kind: '0'}}, + {rawToken(`-1.1`), token{String: "-1.1", Float: -1.1, Int: -1, Uint: 0, Kind: '0'}}, + {rawToken(`99999999999999999999`), token{String: "99999999999999999999", Float: 1e20 - 1, Int: maxInt64, Uint: maxUint64, Kind: '0'}}, + {rawToken(`-99999999999999999999`), token{String: "-99999999999999999999", Float: -1e20 - 1, Int: minInt64, Uint: minUint64, Kind: '0'}}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + got := token{ + Bool: func() bool { + defer func() { recover() }() + return tt.in.Bool() + }(), + String: tt.in.String(), + Float: func() float64 { + defer func() { recover() }() + return tt.in.Float() + }(), + Int: func() int64 { + defer func() { recover() }() + return tt.in.Int() + }(), + Uint: func() uint64 { + defer func() { recover() }() + return tt.in.Uint() + }(), + Kind: tt.in.Kind(), + } + + if got.Bool != tt.want.Bool { + t.Errorf("Token(%s).Bool() = %v, want %v", tt.in, got.Bool, tt.want.Bool) + } + if got.String != tt.want.String { + t.Errorf("Token(%s).String() = %v, want %v", tt.in, got.String, tt.want.String) + } + if math.Float64bits(got.Float) != math.Float64bits(tt.want.Float) { + t.Errorf("Token(%s).Float() = %v, want %v", tt.in, got.Float, tt.want.Float) + } + if got.Int != tt.want.Int { + t.Errorf("Token(%s).Int() = %v, want %v", tt.in, got.Int, tt.want.Int) + } + if got.Uint != tt.want.Uint { + t.Errorf("Token(%s).Uint() = %v, want %v", tt.in, got.Uint, tt.want.Uint) + } + if got.Kind != tt.want.Kind { + t.Errorf("Token(%s).Kind() = %v, want %v", tt.in, got.Kind, tt.want.Kind) + } + }) + } +} + +func TestTokenClone(t *testing.T) { + tests := []struct { + in Token + wantExactRaw bool + }{ + {Token{}, true}, + {Null, true}, + {False, true}, + {True, true}, + {BeginObject, true}, + {EndObject, true}, + {BeginArray, true}, + {EndArray, true}, + {String("hello, world!"), true}, + {rawToken(`"hello, world!"`), false}, + {Float(3.14159), true}, + {rawToken(`3.14159`), false}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + got := tt.in.Clone() + if !reflect.DeepEqual(got, tt.in) { + t.Errorf("Token(%s) == Token(%s).Clone() = false, want true", tt.in, tt.in) + } + gotExactRaw := got.raw == tt.in.raw + if gotExactRaw != tt.wantExactRaw { + t.Errorf("Token(%s).raw == Token(%s).Clone().raw = %v, want %v", tt.in, tt.in, gotExactRaw, tt.wantExactRaw) + } + }) + } +} diff --git a/pkg/encoders/json/jsontext/value.go b/pkg/encoders/json/jsontext/value.go new file mode 100644 index 0000000..a4b06b2 --- /dev/null +++ b/pkg/encoders/json/jsontext/value.go @@ -0,0 +1,395 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.jsonv2 + +package jsontext + +import ( + "bytes" + "errors" + "io" + "slices" + "sync" + + "encoding/json/internal/jsonflags" + "encoding/json/internal/jsonwire" +) + +// NOTE: Value is analogous to v1 json.RawMessage. + +// AppendFormat formats the JSON value in src and appends it to dst +// according to the specified options. +// See [Value.Format] for more details about the formatting behavior. +// +// The dst and src may overlap. +// If an error is reported, then the entirety of src is appended to dst. +func AppendFormat(dst, src []byte, opts ...Options) ([]byte, error) { + e := getBufferedEncoder(opts...) + defer putBufferedEncoder(e) + e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1) + if err := e.s.WriteValue(src); err != nil { + return append(dst, src...), err + } + return append(dst, e.s.Buf...), nil +} + +// Value represents a single raw JSON value, which may be one of the following: +// - a JSON literal (i.e., null, true, or false) +// - a JSON string (e.g., "hello, world!") +// - a JSON number (e.g., 123.456) +// - an entire JSON object (e.g., {"fizz":"buzz"} ) +// - an entire JSON array (e.g., [1,2,3] ) +// +// Value can represent entire array or object values, while [Token] cannot. +// Value may contain leading and/or trailing whitespace. +type Value []byte + +// Clone returns a copy of v. +func (v Value) Clone() Value { + return bytes.Clone(v) +} + +// String returns the string formatting of v. +func (v Value) String() string { + if v == nil { + return "null" + } + return string(v) +} + +// IsValid reports whether the raw JSON value is syntactically valid +// according to the specified options. +// +// By default (if no options are specified), it validates according to RFC 7493. +// It verifies whether the input is properly encoded as UTF-8, +// that escape sequences within strings decode to valid Unicode codepoints, and +// that all names in each object are unique. +// It does not verify whether numbers are representable within the limits +// of any common numeric type (e.g., float64, int64, or uint64). +// +// Relevant options include: +// - [AllowDuplicateNames] +// - [AllowInvalidUTF8] +// +// All other options are ignored. +func (v Value) IsValid(opts ...Options) bool { + // TODO: Document support for [WithByteLimit] and [WithDepthLimit]. + d := getBufferedDecoder(v, opts...) + defer putBufferedDecoder(d) + _, errVal := d.ReadValue() + _, errEOF := d.ReadToken() + return errVal == nil && errEOF == io.EOF +} + +// Format formats the raw JSON value in place. +// +// By default (if no options are specified), it validates according to RFC 7493 +// and produces the minimal JSON representation, where +// all whitespace is elided and JSON strings use the shortest encoding. +// +// Relevant options include: +// - [AllowDuplicateNames] +// - [AllowInvalidUTF8] +// - [EscapeForHTML] +// - [EscapeForJS] +// - [PreserveRawStrings] +// - [CanonicalizeRawInts] +// - [CanonicalizeRawFloats] +// - [ReorderRawObjects] +// - [SpaceAfterColon] +// - [SpaceAfterComma] +// - [Multiline] +// - [WithIndent] +// - [WithIndentPrefix] +// +// All other options are ignored. +// +// It is guaranteed to succeed if the value is valid according to the same options. +// If the value is already formatted, then the buffer is not mutated. +func (v *Value) Format(opts ...Options) error { + // TODO: Document support for [WithByteLimit] and [WithDepthLimit]. + return v.format(opts, nil) +} + +// format accepts two []Options to avoid the allocation appending them together. +// It is equivalent to v.Format(append(opts1, opts2...)...). +func (v *Value) format(opts1, opts2 []Options) error { + e := getBufferedEncoder(opts1...) + defer putBufferedEncoder(e) + e.s.Join(opts2...) + e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1) + if err := e.s.WriteValue(*v); err != nil { + return err + } + if !bytes.Equal(*v, e.s.Buf) { + *v = append((*v)[:0], e.s.Buf...) + } + return nil +} + +// Compact removes all whitespace from the raw JSON value. +// +// It does not reformat JSON strings or numbers to use any other representation. +// To maximize the set of JSON values that can be formatted, +// this permits values with duplicate names and invalid UTF-8. +// +// Compact is equivalent to calling [Value.Format] with the following options: +// - [AllowDuplicateNames](true) +// - [AllowInvalidUTF8](true) +// - [PreserveRawStrings](true) +// +// Any options specified by the caller are applied after the initial set +// and may deliberately override prior options. +func (v *Value) Compact(opts ...Options) error { + return v.format([]Options{ + AllowDuplicateNames(true), + AllowInvalidUTF8(true), + PreserveRawStrings(true), + }, opts) +} + +// Indent reformats the whitespace in the raw JSON value so that each element +// in a JSON object or array begins on a indented line according to the nesting. +// +// It does not reformat JSON strings or numbers to use any other representation. +// To maximize the set of JSON values that can be formatted, +// this permits values with duplicate names and invalid UTF-8. +// +// Indent is equivalent to calling [Value.Format] with the following options: +// - [AllowDuplicateNames](true) +// - [AllowInvalidUTF8](true) +// - [PreserveRawStrings](true) +// - [Multiline](true) +// +// Any options specified by the caller are applied after the initial set +// and may deliberately override prior options. +func (v *Value) Indent(opts ...Options) error { + return v.format([]Options{ + AllowDuplicateNames(true), + AllowInvalidUTF8(true), + PreserveRawStrings(true), + Multiline(true), + }, opts) +} + +// Canonicalize canonicalizes the raw JSON value according to the +// JSON Canonicalization Scheme (JCS) as defined by RFC 8785 +// where it produces a stable representation of a JSON value. +// +// JSON strings are formatted to use their minimal representation, +// JSON numbers are formatted as double precision numbers according +// to some stable serialization algorithm. +// JSON object members are sorted in ascending order by name. +// All whitespace is removed. +// +// The output stability is dependent on the stability of the application data +// (see RFC 8785, Appendix E). It cannot produce stable output from +// fundamentally unstable input. For example, if the JSON value +// contains ephemeral data (e.g., a frequently changing timestamp), +// then the value is still unstable regardless of whether this is called. +// +// Canonicalize is equivalent to calling [Value.Format] with the following options: +// - [CanonicalizeRawInts](true) +// - [CanonicalizeRawFloats](true) +// - [ReorderRawObjects](true) +// +// Any options specified by the caller are applied after the initial set +// and may deliberately override prior options. +// +// Note that JCS treats all JSON numbers as IEEE 754 double precision numbers. +// Any numbers with precision beyond what is representable by that form +// will lose their precision when canonicalized. For example, integer values +// beyond ±2⁵³ will lose their precision. To preserve the original representation +// of JSON integers, additionally set [CanonicalizeRawInts] to false: +// +// v.Canonicalize(jsontext.CanonicalizeRawInts(false)) +func (v *Value) Canonicalize(opts ...Options) error { + return v.format([]Options{ + CanonicalizeRawInts(true), + CanonicalizeRawFloats(true), + ReorderRawObjects(true), + }, opts) +} + +// MarshalJSON returns v as the JSON encoding of v. +// It returns the stored value as the raw JSON output without any validation. +// If v is nil, then this returns a JSON null. +func (v Value) MarshalJSON() ([]byte, error) { + // NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON. + if v == nil { + return []byte("null"), nil + } + return v, nil +} + +// UnmarshalJSON sets v as the JSON encoding of b. +// It stores a copy of the provided raw JSON input without any validation. +func (v *Value) UnmarshalJSON(b []byte) error { + // NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON. + if v == nil { + return errors.New("jsontext.Value: UnmarshalJSON on nil pointer") + } + *v = append((*v)[:0], b...) + return nil +} + +// Kind returns the starting token kind. +// For a valid value, this will never include '}' or ']'. +func (v Value) Kind() Kind { + if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 { + return Kind(v[0]).normalize() + } + return invalidKind +} + +const commaAndWhitespace = ", \n\r\t" + +type objectMember struct { + // name is the unquoted name. + name []byte // e.g., "name" + // buffer is the entirety of the raw JSON object member + // starting from right after the previous member (or opening '{') + // until right after the member value. + buffer []byte // e.g., `, \n\r\t"name": "value"` +} + +func (x objectMember) Compare(y objectMember) int { + if c := jsonwire.CompareUTF16(x.name, y.name); c != 0 { + return c + } + // With [AllowDuplicateNames] or [AllowInvalidUTF8], + // names could be identical, so also sort using the member value. + return jsonwire.CompareUTF16( + bytes.TrimLeft(x.buffer, commaAndWhitespace), + bytes.TrimLeft(y.buffer, commaAndWhitespace)) +} + +var objectMemberPool = sync.Pool{New: func() any { return new([]objectMember) }} + +func getObjectMembers() *[]objectMember { + ns := objectMemberPool.Get().(*[]objectMember) + *ns = (*ns)[:0] + return ns +} +func putObjectMembers(ns *[]objectMember) { + if cap(*ns) < 1<<10 { + clear(*ns) // avoid pinning name and buffer + objectMemberPool.Put(ns) + } +} + +// mustReorderObjects reorders in-place all object members in a JSON value, +// which must be valid otherwise it panics. +func mustReorderObjects(b []byte) { + // Obtain a buffered encoder just to use its internal buffer as + // a scratch buffer for reordering object members. + e2 := getBufferedEncoder() + defer putBufferedEncoder(e2) + + // Disable unnecessary checks to syntactically parse the JSON value. + d := getBufferedDecoder(b) + defer putBufferedDecoder(d) + d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1) + mustReorderObjectsFromDecoder(d, &e2.s.Buf) // per RFC 8785, section 3.2.3 +} + +// mustReorderObjectsFromDecoder recursively reorders all object members in place +// according to the ordering specified in RFC 8785, section 3.2.3. +// +// Pre-conditions: +// - The value is valid (i.e., no decoder errors should ever occur). +// - Initial call is provided a Decoder reading from the start of v. +// +// Post-conditions: +// - Exactly one JSON value is read from the Decoder. +// - All fully-parsed JSON objects are reordered by directly moving +// the members in the value buffer. +// +// The runtime is approximately O(n·log(n)) + O(m·log(m)), +// where n is len(v) and m is the total number of object members. +func mustReorderObjectsFromDecoder(d *Decoder, scratch *[]byte) { + switch tok, err := d.ReadToken(); tok.Kind() { + case '{': + // Iterate and collect the name and offsets for every object member. + members := getObjectMembers() + defer putObjectMembers(members) + var prevMember objectMember + isSorted := true + + beforeBody := d.InputOffset() // offset after '{' + for d.PeekKind() != '}' { + beforeName := d.InputOffset() + var flags jsonwire.ValueFlags + name, _ := d.s.ReadValue(&flags) + name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim()) + mustReorderObjectsFromDecoder(d, scratch) + afterValue := d.InputOffset() + + currMember := objectMember{name, d.s.buf[beforeName:afterValue]} + if isSorted && len(*members) > 0 { + isSorted = objectMember.Compare(prevMember, currMember) < 0 + } + *members = append(*members, currMember) + prevMember = currMember + } + afterBody := d.InputOffset() // offset before '}' + d.ReadToken() + + // Sort the members; return early if it's already sorted. + if isSorted { + return + } + firstBufferBeforeSorting := (*members)[0].buffer + slices.SortFunc(*members, objectMember.Compare) + firstBufferAfterSorting := (*members)[0].buffer + + // Append the reordered members to a new buffer, + // then copy the reordered members back over the original members. + // Avoid swapping in place since each member may be a different size + // where moving a member over a smaller member may corrupt the data + // for subsequent members before they have been moved. + // + // The following invariant must hold: + // sum([m.after-m.before for m in members]) == afterBody-beforeBody + commaAndWhitespacePrefix := func(b []byte) []byte { + return b[:len(b)-len(bytes.TrimLeft(b, commaAndWhitespace))] + } + sorted := (*scratch)[:0] + for i, member := range *members { + switch { + case i == 0 && &member.buffer[0] != &firstBufferBeforeSorting[0]: + // First member after sorting is not the first member before sorting, + // so use the prefix of the first member before sorting. + sorted = append(sorted, commaAndWhitespacePrefix(firstBufferBeforeSorting)...) + sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...) + case i != 0 && &member.buffer[0] == &firstBufferBeforeSorting[0]: + // Later member after sorting is the first member before sorting, + // so use the prefix of the first member after sorting. + sorted = append(sorted, commaAndWhitespacePrefix(firstBufferAfterSorting)...) + sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...) + default: + sorted = append(sorted, member.buffer...) + } + } + if int(afterBody-beforeBody) != len(sorted) { + panic("BUG: length invariant violated") + } + copy(d.s.buf[beforeBody:afterBody], sorted) + + // Update scratch buffer to the largest amount ever used. + if len(sorted) > len(*scratch) { + *scratch = sorted + } + case '[': + for d.PeekKind() != ']' { + mustReorderObjectsFromDecoder(d, scratch) + } + d.ReadToken() + default: + if err != nil { + panic("BUG: " + err.Error()) + } + } +} diff --git a/pkg/encoders/json/jsontext/value_test.go b/pkg/encoders/json/jsontext/value_test.go new file mode 100644 index 0000000..184a27d --- /dev/null +++ b/pkg/encoders/json/jsontext/value_test.go @@ -0,0 +1,200 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.jsonv2 + +package jsontext + +import ( + "io" + "strings" + "testing" + + "encoding/json/internal/jsontest" + "encoding/json/internal/jsonwire" +) + +type valueTestdataEntry struct { + name jsontest.CaseName + in string + wantValid bool + wantCompacted string + wantCompactErr error // implies wantCompacted is in + wantIndented string // wantCompacted if empty; uses "\t" for indent prefix and " " for indent + wantIndentErr error // implies wantCompacted is in + wantCanonicalized string // wantCompacted if empty + wantCanonicalizeErr error // implies wantCompacted is in +} + +var valueTestdata = append(func() (out []valueTestdataEntry) { + // Initialize valueTestdata from coderTestdata. + for _, td := range coderTestdata { + // NOTE: The Compact method preserves the raw formatting of strings, + // while the Encoder (by default) does not. + if td.name.Name == "ComplicatedString" { + td.outCompacted = strings.TrimSpace(td.in) + } + out = append(out, valueTestdataEntry{ + name: td.name, + in: td.in, + wantValid: true, + wantCompacted: td.outCompacted, + wantIndented: td.outIndented, + wantCanonicalized: td.outCanonicalized, + }) + } + return out +}(), []valueTestdataEntry{{ + name: jsontest.Name("RFC8785/Primitives"), + in: `{ + "numbers": [333333333.33333329, 1E30, 4.50, + 2e-3, 0.000000000000000000000000001, -0], + "string": "\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/", + "literals": [null, true, false] + }`, + wantValid: true, + wantCompacted: `{"numbers":[333333333.33333329,1E30,4.50,2e-3,0.000000000000000000000000001,-0],"string":"\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/","literals":[null,true,false]}`, + wantIndented: `{ + "numbers": [ + 333333333.33333329, + 1E30, + 4.50, + 2e-3, + 0.000000000000000000000000001, + -0 + ], + "string": "\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/", + "literals": [ + null, + true, + false + ] + }`, + wantCanonicalized: `{"literals":[null,true,false],"numbers":[333333333.3333333,1e+30,4.5,0.002,1e-27,0],"string":"€$\u000f\nA'B\"\\\\\"/"}`, +}, { + name: jsontest.Name("RFC8785/ObjectOrdering"), + in: `{ + "\u20ac": "Euro Sign", + "\r": "Carriage Return", + "\ufb33": "Hebrew Letter Dalet With Dagesh", + "1": "One", + "\ud83d\ude00": "Emoji: Grinning Face", + "\u0080": "Control", + "\u00f6": "Latin Small Letter O With Diaeresis" + }`, + wantValid: true, + wantCompacted: `{"\u20ac":"Euro Sign","\r":"Carriage Return","\ufb33":"Hebrew Letter Dalet With Dagesh","1":"One","\ud83d\ude00":"Emoji: Grinning Face","\u0080":"Control","\u00f6":"Latin Small Letter O With Diaeresis"}`, + wantIndented: `{ + "\u20ac": "Euro Sign", + "\r": "Carriage Return", + "\ufb33": "Hebrew Letter Dalet With Dagesh", + "1": "One", + "\ud83d\ude00": "Emoji: Grinning Face", + "\u0080": "Control", + "\u00f6": "Latin Small Letter O With Diaeresis" + }`, + wantCanonicalized: `{"\r":"Carriage Return","1":"One","€":"Control","ö":"Latin Small Letter O With Diaeresis","€":"Euro Sign","😀":"Emoji: Grinning Face","דּ":"Hebrew Letter Dalet With Dagesh"}`, +}, { + name: jsontest.Name("LargeIntegers"), + in: ` [ -9223372036854775808 , 9223372036854775807 ] `, + wantValid: true, + wantCompacted: `[-9223372036854775808,9223372036854775807]`, + wantIndented: `[ + -9223372036854775808, + 9223372036854775807 + ]`, + wantCanonicalized: `[-9223372036854776000,9223372036854776000]`, // NOTE: Loss of precision due to numbers being treated as floats. +}, { + name: jsontest.Name("InvalidUTF8"), + in: ` "living` + "\xde\xad\xbe\xef" + `\ufffd�" `, + wantValid: false, // uses RFC 7493 as the definition; which validates UTF-8 + wantCompacted: `"living` + "\xde\xad\xbe\xef" + `\ufffd�"`, + wantCanonicalizeErr: E(jsonwire.ErrInvalidUTF8).withPos(` "living`+"\xde\xad", ""), +}, { + name: jsontest.Name("InvalidUTF8/SurrogateHalf"), + in: `"\ud800"`, + wantValid: false, // uses RFC 7493 as the definition; which validates UTF-8 + wantCompacted: `"\ud800"`, + wantCanonicalizeErr: newInvalidEscapeSequenceError(`\ud800"`).withPos(`"`, ""), +}, { + name: jsontest.Name("UppercaseEscaped"), + in: `"\u000B"`, + wantValid: true, + wantCompacted: `"\u000B"`, + wantCanonicalized: `"\u000b"`, +}, { + name: jsontest.Name("DuplicateNames"), + in: ` { "0" : 0 , "1" : 1 , "0" : 0 }`, + wantValid: false, // uses RFC 7493 as the definition; which does check for object uniqueness + wantCompacted: `{"0":0,"1":1,"0":0}`, + wantIndented: `{ + "0": 0, + "1": 1, + "0": 0 + }`, + wantCanonicalizeErr: E(ErrDuplicateName).withPos(` { "0" : 0 , "1" : 1 , `, "/0"), +}, { + name: jsontest.Name("Whitespace"), + in: " \n\r\t", + wantValid: false, + wantCompacted: " \n\r\t", + wantCompactErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""), + wantIndentErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""), + wantCanonicalizeErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""), +}}...) + +func TestValueMethods(t *testing.T) { + for _, td := range valueTestdata { + t.Run(td.name.Name, func(t *testing.T) { + if td.wantIndented == "" { + td.wantIndented = td.wantCompacted + } + if td.wantCanonicalized == "" { + td.wantCanonicalized = td.wantCompacted + } + if td.wantCompactErr != nil { + td.wantCompacted = td.in + } + if td.wantIndentErr != nil { + td.wantIndented = td.in + } + if td.wantCanonicalizeErr != nil { + td.wantCanonicalized = td.in + } + + v := Value(td.in) + gotValid := v.IsValid() + if gotValid != td.wantValid { + t.Errorf("%s: Value.IsValid = %v, want %v", td.name.Where, gotValid, td.wantValid) + } + + gotCompacted := Value(td.in) + gotCompactErr := gotCompacted.Compact() + if string(gotCompacted) != td.wantCompacted { + t.Errorf("%s: Value.Compact = %s, want %s", td.name.Where, gotCompacted, td.wantCompacted) + } + if !equalError(gotCompactErr, td.wantCompactErr) { + t.Errorf("%s: Value.Compact error mismatch:\ngot %v\nwant %v", td.name.Where, gotCompactErr, td.wantCompactErr) + } + + gotIndented := Value(td.in) + gotIndentErr := gotIndented.Indent(WithIndentPrefix("\t"), WithIndent(" ")) + if string(gotIndented) != td.wantIndented { + t.Errorf("%s: Value.Indent = %s, want %s", td.name.Where, gotIndented, td.wantIndented) + } + if !equalError(gotIndentErr, td.wantIndentErr) { + t.Errorf("%s: Value.Indent error mismatch:\ngot %v\nwant %v", td.name.Where, gotIndentErr, td.wantIndentErr) + } + + gotCanonicalized := Value(td.in) + gotCanonicalizeErr := gotCanonicalized.Canonicalize() + if string(gotCanonicalized) != td.wantCanonicalized { + t.Errorf("%s: Value.Canonicalize = %s, want %s", td.name.Where, gotCanonicalized, td.wantCanonicalized) + } + if !equalError(gotCanonicalizeErr, td.wantCanonicalizeErr) { + t.Errorf("%s: Value.Canonicalize error mismatch:\ngot %v\nwant %v", td.name.Where, gotCanonicalizeErr, td.wantCanonicalizeErr) + } + }) + } +} diff --git a/pkg/encoders/json/number_test.go b/pkg/encoders/json/number_test.go new file mode 100644 index 0000000..69eccaa --- /dev/null +++ b/pkg/encoders/json/number_test.go @@ -0,0 +1,120 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !goexperiment.jsonv2 + +package json + +import ( + "regexp" + "testing" +) + +func TestNumberIsValid(t *testing.T) { + // From: https://stackoverflow.com/a/13340826 + var jsonNumberRegexp = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`) + + validTests := []string{ + "0", + "-0", + "1", + "-1", + "0.1", + "-0.1", + "1234", + "-1234", + "12.34", + "-12.34", + "12E0", + "12E1", + "12e34", + "12E-0", + "12e+1", + "12e-34", + "-12E0", + "-12E1", + "-12e34", + "-12E-0", + "-12e+1", + "-12e-34", + "1.2E0", + "1.2E1", + "1.2e34", + "1.2E-0", + "1.2e+1", + "1.2e-34", + "-1.2E0", + "-1.2E1", + "-1.2e34", + "-1.2E-0", + "-1.2e+1", + "-1.2e-34", + "0E0", + "0E1", + "0e34", + "0E-0", + "0e+1", + "0e-34", + "-0E0", + "-0E1", + "-0e34", + "-0E-0", + "-0e+1", + "-0e-34", + } + + for _, test := range validTests { + if !isValidNumber(test) { + t.Errorf("%s should be valid", test) + } + + var f float64 + if err := Unmarshal([]byte(test), &f); err != nil { + t.Errorf("%s should be valid but Unmarshal failed: %v", test, err) + } + + if !jsonNumberRegexp.MatchString(test) { + t.Errorf("%s should be valid but regexp does not match", test) + } + } + + invalidTests := []string{ + "", + "invalid", + "1.0.1", + "1..1", + "-1-2", + "012a42", + "01.2", + "012", + "12E12.12", + "1e2e3", + "1e+-2", + "1e--23", + "1e", + "e1", + "1e+", + "1ea", + "1a", + "1.a", + "1.", + "01", + "1.e1", + } + + for _, test := range invalidTests { + if isValidNumber(test) { + t.Errorf("%s should be invalid", test) + } + + var f float64 + if err := Unmarshal([]byte(test), &f); err == nil { + t.Errorf("%s should be invalid but unmarshal wrote %v", test, f) + } + + if jsonNumberRegexp.MatchString(test) { + t.Errorf("%s should be invalid but matches regexp", test) + } + } +} diff --git a/pkg/encoders/json/scanner.go b/pkg/encoders/json/scanner.go new file mode 100644 index 0000000..f408618 --- /dev/null +++ b/pkg/encoders/json/scanner.go @@ -0,0 +1,612 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !goexperiment.jsonv2 + +package json + +// JSON value parser state machine. +// Just about at the limit of what is reasonable to write by hand. +// Some parts are a bit tedious, but overall it nicely factors out the +// otherwise common code from the multiple scanning functions +// in this package (Compact, Indent, checkValid, etc). +// +// This file starts with two simple examples using the scanner +// before diving into the scanner itself. + +import ( + "strconv" + "sync" +) + +// Valid reports whether data is a valid JSON encoding. +func Valid(data []byte) bool { + scan := newScanner() + defer freeScanner(scan) + return checkValid(data, scan) == nil +} + +// checkValid verifies that data is valid JSON-encoded data. +// scan is passed in for use by checkValid to avoid an allocation. +// checkValid returns nil or a SyntaxError. +func checkValid(data []byte, scan *scanner) error { + scan.reset() + for _, c := range data { + scan.bytes++ + if scan.step(scan, c) == scanError { + return scan.err + } + } + if scan.eof() == scanError { + return scan.err + } + return nil +} + +// A SyntaxError is a description of a JSON syntax error. +// [Unmarshal] will return a SyntaxError if the JSON can't be parsed. +type SyntaxError struct { + msg string // description of error + Offset int64 // error occurred after reading Offset bytes +} + +func (e *SyntaxError) Error() string { return e.msg } + +// A scanner is a JSON scanning state machine. +// Callers call scan.reset and then pass bytes in one at a time +// by calling scan.step(&scan, c) for each byte. +// The return value, referred to as an opcode, tells the +// caller about significant parsing events like beginning +// and ending literals, objects, and arrays, so that the +// caller can follow along if it wishes. +// The return value scanEnd indicates that a single top-level +// JSON value has been completed, *before* the byte that +// just got passed in. (The indication must be delayed in order +// to recognize the end of numbers: is 123 a whole value or +// the beginning of 12345e+6?). +type scanner struct { + // The step is a func to be called to execute the next transition. + // Also tried using an integer constant and a single func + // with a switch, but using the func directly was 10% faster + // on a 64-bit Mac Mini, and it's nicer to read. + step func(*scanner, byte) int + + // Reached end of top-level value. + endTop bool + + // Stack of what we're in the middle of - array values, object keys, object values. + parseState []int + + // Error that happened, if any. + err error + + // total bytes consumed, updated by decoder.Decode (and deliberately + // not set to zero by scan.reset) + bytes int64 +} + +var scannerPool = sync.Pool{ + New: func() any { + return &scanner{} + }, +} + +func newScanner() *scanner { + scan := scannerPool.Get().(*scanner) + // scan.reset by design doesn't set bytes to zero + scan.bytes = 0 + scan.reset() + return scan +} + +func freeScanner(scan *scanner) { + // Avoid hanging on to too much memory in extreme cases. + if len(scan.parseState) > 1024 { + scan.parseState = nil + } + scannerPool.Put(scan) +} + +// These values are returned by the state transition functions +// assigned to scanner.state and the method scanner.eof. +// They give details about the current state of the scan that +// callers might be interested to know about. +// It is okay to ignore the return value of any particular +// call to scanner.state: if one call returns scanError, +// every subsequent call will return scanError too. +const ( + // Continue. + scanContinue = iota // uninteresting byte + scanBeginLiteral // end implied by next result != scanContinue + scanBeginObject // begin object + scanObjectKey // just finished object key (string) + scanObjectValue // just finished non-last object value + scanEndObject // end object (implies scanObjectValue if possible) + scanBeginArray // begin array + scanArrayValue // just finished array value + scanEndArray // end array (implies scanArrayValue if possible) + scanSkipSpace // space byte; can skip; known to be last "continue" result + + // Stop. + scanEnd // top-level value ended *before* this byte; known to be first "stop" result + scanError // hit an error, scanner.err. +) + +// These values are stored in the parseState stack. +// They give the current state of a composite value +// being scanned. If the parser is inside a nested value +// the parseState describes the nested state, outermost at entry 0. +const ( + parseObjectKey = iota // parsing object key (before colon) + parseObjectValue // parsing object value (after colon) + parseArrayValue // parsing array value +) + +// This limits the max nesting depth to prevent stack overflow. +// This is permitted by https://tools.ietf.org/html/rfc7159#section-9 +const maxNestingDepth = 10000 + +// reset prepares the scanner for use. +// It must be called before calling s.step. +func (s *scanner) reset() { + s.step = stateBeginValue + s.parseState = s.parseState[0:0] + s.err = nil + s.endTop = false +} + +// eof tells the scanner that the end of input has been reached. +// It returns a scan status just as s.step does. +func (s *scanner) eof() int { + if s.err != nil { + return scanError + } + if s.endTop { + return scanEnd + } + s.step(s, ' ') + if s.endTop { + return scanEnd + } + if s.err == nil { + s.err = &SyntaxError{"unexpected end of JSON input", s.bytes} + } + return scanError +} + +// pushParseState pushes a new parse state newParseState onto the parse stack. +// an error state is returned if maxNestingDepth was exceeded, otherwise successState is returned. +func (s *scanner) pushParseState(c byte, newParseState int, successState int) int { + s.parseState = append(s.parseState, newParseState) + if len(s.parseState) <= maxNestingDepth { + return successState + } + return s.error(c, "exceeded max depth") +} + +// popParseState pops a parse state (already obtained) off the stack +// and updates s.step accordingly. +func (s *scanner) popParseState() { + n := len(s.parseState) - 1 + s.parseState = s.parseState[0:n] + if n == 0 { + s.step = stateEndTop + s.endTop = true + } else { + s.step = stateEndValue + } +} + +func isSpace(c byte) bool { + return c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') +} + +// stateBeginValueOrEmpty is the state after reading `[`. +func stateBeginValueOrEmpty(s *scanner, c byte) int { + if isSpace(c) { + return scanSkipSpace + } + if c == ']' { + return stateEndValue(s, c) + } + return stateBeginValue(s, c) +} + +// stateBeginValue is the state at the beginning of the input. +func stateBeginValue(s *scanner, c byte) int { + if isSpace(c) { + return scanSkipSpace + } + switch c { + case '{': + s.step = stateBeginStringOrEmpty + return s.pushParseState(c, parseObjectKey, scanBeginObject) + case '[': + s.step = stateBeginValueOrEmpty + return s.pushParseState(c, parseArrayValue, scanBeginArray) + case '"': + s.step = stateInString + return scanBeginLiteral + case '-': + s.step = stateNeg + return scanBeginLiteral + case '0': // beginning of 0.123 + s.step = state0 + return scanBeginLiteral + case 't': // beginning of true + s.step = stateT + return scanBeginLiteral + case 'f': // beginning of false + s.step = stateF + return scanBeginLiteral + case 'n': // beginning of null + s.step = stateN + return scanBeginLiteral + } + if '1' <= c && c <= '9' { // beginning of 1234.5 + s.step = state1 + return scanBeginLiteral + } + return s.error(c, "looking for beginning of value") +} + +// stateBeginStringOrEmpty is the state after reading `{`. +func stateBeginStringOrEmpty(s *scanner, c byte) int { + if isSpace(c) { + return scanSkipSpace + } + if c == '}' { + n := len(s.parseState) + s.parseState[n-1] = parseObjectValue + return stateEndValue(s, c) + } + return stateBeginString(s, c) +} + +// stateBeginString is the state after reading `{"key": value,`. +func stateBeginString(s *scanner, c byte) int { + if isSpace(c) { + return scanSkipSpace + } + if c == '"' { + s.step = stateInString + return scanBeginLiteral + } + return s.error(c, "looking for beginning of object key string") +} + +// stateEndValue is the state after completing a value, +// such as after reading `{}` or `true` or `["x"`. +func stateEndValue(s *scanner, c byte) int { + n := len(s.parseState) + if n == 0 { + // Completed top-level before the current byte. + s.step = stateEndTop + s.endTop = true + return stateEndTop(s, c) + } + if isSpace(c) { + s.step = stateEndValue + return scanSkipSpace + } + ps := s.parseState[n-1] + switch ps { + case parseObjectKey: + if c == ':' { + s.parseState[n-1] = parseObjectValue + s.step = stateBeginValue + return scanObjectKey + } + return s.error(c, "after object key") + case parseObjectValue: + if c == ',' { + s.parseState[n-1] = parseObjectKey + s.step = stateBeginString + return scanObjectValue + } + if c == '}' { + s.popParseState() + return scanEndObject + } + return s.error(c, "after object key:value pair") + case parseArrayValue: + if c == ',' { + s.step = stateBeginValue + return scanArrayValue + } + if c == ']' { + s.popParseState() + return scanEndArray + } + return s.error(c, "after array element") + } + return s.error(c, "") +} + +// stateEndTop is the state after finishing the top-level value, +// such as after reading `{}` or `[1,2,3]`. +// Only space characters should be seen now. +func stateEndTop(s *scanner, c byte) int { + if !isSpace(c) { + // Complain about non-space byte on next call. + s.error(c, "after top-level value") + } + return scanEnd +} + +// stateInString is the state after reading `"`. +func stateInString(s *scanner, c byte) int { + if c == '"' { + s.step = stateEndValue + return scanContinue + } + if c == '\\' { + s.step = stateInStringEsc + return scanContinue + } + if c < 0x20 { + return s.error(c, "in string literal") + } + return scanContinue +} + +// stateInStringEsc is the state after reading `"\` during a quoted string. +func stateInStringEsc(s *scanner, c byte) int { + switch c { + case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': + s.step = stateInString + return scanContinue + case 'u': + s.step = stateInStringEscU + return scanContinue + } + return s.error(c, "in string escape code") +} + +// stateInStringEscU is the state after reading `"\u` during a quoted string. +func stateInStringEscU(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU1 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU1 is the state after reading `"\u1` during a quoted string. +func stateInStringEscU1(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU12 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU12 is the state after reading `"\u12` during a quoted string. +func stateInStringEscU12(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU123 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU123 is the state after reading `"\u123` during a quoted string. +func stateInStringEscU123(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInString + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateNeg is the state after reading `-` during a number. +func stateNeg(s *scanner, c byte) int { + if c == '0' { + s.step = state0 + return scanContinue + } + if '1' <= c && c <= '9' { + s.step = state1 + return scanContinue + } + return s.error(c, "in numeric literal") +} + +// state1 is the state after reading a non-zero integer during a number, +// such as after reading `1` or `100` but not `0`. +func state1(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = state1 + return scanContinue + } + return state0(s, c) +} + +// state0 is the state after reading `0` during a number. +func state0(s *scanner, c byte) int { + if c == '.' { + s.step = stateDot + return scanContinue + } + if c == 'e' || c == 'E' { + s.step = stateE + return scanContinue + } + return stateEndValue(s, c) +} + +// stateDot is the state after reading the integer and decimal point in a number, +// such as after reading `1.`. +func stateDot(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = stateDot0 + return scanContinue + } + return s.error(c, "after decimal point in numeric literal") +} + +// stateDot0 is the state after reading the integer, decimal point, and subsequent +// digits of a number, such as after reading `3.14`. +func stateDot0(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + return scanContinue + } + if c == 'e' || c == 'E' { + s.step = stateE + return scanContinue + } + return stateEndValue(s, c) +} + +// stateE is the state after reading the mantissa and e in a number, +// such as after reading `314e` or `0.314e`. +func stateE(s *scanner, c byte) int { + if c == '+' || c == '-' { + s.step = stateESign + return scanContinue + } + return stateESign(s, c) +} + +// stateESign is the state after reading the mantissa, e, and sign in a number, +// such as after reading `314e-` or `0.314e+`. +func stateESign(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = stateE0 + return scanContinue + } + return s.error(c, "in exponent of numeric literal") +} + +// stateE0 is the state after reading the mantissa, e, optional sign, +// and at least one digit of the exponent in a number, +// such as after reading `314e-2` or `0.314e+1` or `3.14e0`. +func stateE0(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + return scanContinue + } + return stateEndValue(s, c) +} + +// stateT is the state after reading `t`. +func stateT(s *scanner, c byte) int { + if c == 'r' { + s.step = stateTr + return scanContinue + } + return s.error(c, "in literal true (expecting 'r')") +} + +// stateTr is the state after reading `tr`. +func stateTr(s *scanner, c byte) int { + if c == 'u' { + s.step = stateTru + return scanContinue + } + return s.error(c, "in literal true (expecting 'u')") +} + +// stateTru is the state after reading `tru`. +func stateTru(s *scanner, c byte) int { + if c == 'e' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal true (expecting 'e')") +} + +// stateF is the state after reading `f`. +func stateF(s *scanner, c byte) int { + if c == 'a' { + s.step = stateFa + return scanContinue + } + return s.error(c, "in literal false (expecting 'a')") +} + +// stateFa is the state after reading `fa`. +func stateFa(s *scanner, c byte) int { + if c == 'l' { + s.step = stateFal + return scanContinue + } + return s.error(c, "in literal false (expecting 'l')") +} + +// stateFal is the state after reading `fal`. +func stateFal(s *scanner, c byte) int { + if c == 's' { + s.step = stateFals + return scanContinue + } + return s.error(c, "in literal false (expecting 's')") +} + +// stateFals is the state after reading `fals`. +func stateFals(s *scanner, c byte) int { + if c == 'e' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal false (expecting 'e')") +} + +// stateN is the state after reading `n`. +func stateN(s *scanner, c byte) int { + if c == 'u' { + s.step = stateNu + return scanContinue + } + return s.error(c, "in literal null (expecting 'u')") +} + +// stateNu is the state after reading `nu`. +func stateNu(s *scanner, c byte) int { + if c == 'l' { + s.step = stateNul + return scanContinue + } + return s.error(c, "in literal null (expecting 'l')") +} + +// stateNul is the state after reading `nul`. +func stateNul(s *scanner, c byte) int { + if c == 'l' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal null (expecting 'l')") +} + +// stateError is the state after reaching a syntax error, +// such as after reading `[1}` or `5.1.2`. +func stateError(s *scanner, c byte) int { + return scanError +} + +// error records an error and switches to the error state. +func (s *scanner) error(c byte, context string) int { + s.step = stateError + s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes} + return scanError +} + +// quoteChar formats c as a quoted character literal. +func quoteChar(c byte) string { + // special cases - different from quoted strings + if c == '\'' { + return `'\''` + } + if c == '"' { + return `'"'` + } + + // use quoted string with different quotation marks + s := strconv.Quote(string(c)) + return "'" + s[1:len(s)-1] + "'" +} diff --git a/pkg/encoders/json/scanner_test.go b/pkg/encoders/json/scanner_test.go new file mode 100644 index 0000000..fb64463 --- /dev/null +++ b/pkg/encoders/json/scanner_test.go @@ -0,0 +1,306 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !goexperiment.jsonv2 + +package json + +import ( + "bytes" + "math" + "math/rand" + "reflect" + "strings" + "testing" +) + +func indentNewlines(s string) string { + return strings.Join(strings.Split(s, "\n"), "\n\t") +} + +func stripWhitespace(s string) string { + return strings.Map(func(r rune) rune { + if r == ' ' || r == '\n' || r == '\r' || r == '\t' { + return -1 + } + return r + }, s) +} + +func TestValid(t *testing.T) { + tests := []struct { + CaseName + data string + ok bool + }{ + {Name(""), `foo`, false}, + {Name(""), `}{`, false}, + {Name(""), `{]`, false}, + {Name(""), `{}`, true}, + {Name(""), `{"foo":"bar"}`, true}, + {Name(""), `{"foo":"bar","bar":{"baz":["qux"]}}`, true}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + if ok := Valid([]byte(tt.data)); ok != tt.ok { + t.Errorf("%s: Valid(`%s`) = %v, want %v", tt.Where, tt.data, ok, tt.ok) + } + }) + } +} + +func TestCompactAndIndent(t *testing.T) { + tests := []struct { + CaseName + compact string + indent string + }{ + {Name(""), `1`, `1`}, + {Name(""), `{}`, `{}`}, + {Name(""), `[]`, `[]`}, + {Name(""), `{"":2}`, "{\n\t\"\": 2\n}"}, + {Name(""), `[3]`, "[\n\t3\n]"}, + {Name(""), `[1,2,3]`, "[\n\t1,\n\t2,\n\t3\n]"}, + {Name(""), `{"x":1}`, "{\n\t\"x\": 1\n}"}, + {Name(""), `[true,false,null,"x",1,1.5,0,-5e+2]`, `[ + true, + false, + null, + "x", + 1, + 1.5, + 0, + -5e+2 +]`}, + {Name(""), "{\"\":\"<>&\u2028\u2029\"}", "{\n\t\"\": \"<>&\u2028\u2029\"\n}"}, // See golang.org/issue/34070 + } + var buf bytes.Buffer + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + buf.Reset() + if err := Compact(&buf, []byte(tt.compact)); err != nil { + t.Errorf("%s: Compact error: %v", tt.Where, err) + } else if got := buf.String(); got != tt.compact { + t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.compact)) + } + + buf.Reset() + if err := Compact(&buf, []byte(tt.indent)); err != nil { + t.Errorf("%s: Compact error: %v", tt.Where, err) + } else if got := buf.String(); got != tt.compact { + t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.compact)) + } + + buf.Reset() + if err := Indent(&buf, []byte(tt.indent), "", "\t"); err != nil { + t.Errorf("%s: Indent error: %v", tt.Where, err) + } else if got := buf.String(); got != tt.indent { + t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.indent)) + } + + buf.Reset() + if err := Indent(&buf, []byte(tt.compact), "", "\t"); err != nil { + t.Errorf("%s: Indent error: %v", tt.Where, err) + } else if got := buf.String(); got != tt.indent { + t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.indent)) + } + }) + } +} + +func TestCompactSeparators(t *testing.T) { + // U+2028 and U+2029 should be escaped inside strings. + // They should not appear outside strings. + tests := []struct { + CaseName + in, compact string + }{ + {Name(""), "{\"\u2028\": 1}", "{\"\u2028\":1}"}, + {Name(""), "{\"\u2029\" :2}", "{\"\u2029\":2}"}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + var buf bytes.Buffer + if err := Compact(&buf, []byte(tt.in)); err != nil { + t.Errorf("%s: Compact error: %v", tt.Where, err) + } else if got := buf.String(); got != tt.compact { + t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.compact)) + } + }) + } +} + +// Tests of a large random structure. + +func TestCompactBig(t *testing.T) { + initBig() + var buf bytes.Buffer + if err := Compact(&buf, jsonBig); err != nil { + t.Fatalf("Compact error: %v", err) + } + b := buf.Bytes() + if !bytes.Equal(b, jsonBig) { + t.Error("Compact:") + diff(t, b, jsonBig) + return + } +} + +func TestIndentBig(t *testing.T) { + t.Parallel() + initBig() + var buf bytes.Buffer + if err := Indent(&buf, jsonBig, "", "\t"); err != nil { + t.Fatalf("Indent error: %v", err) + } + b := buf.Bytes() + if len(b) == len(jsonBig) { + // jsonBig is compact (no unnecessary spaces); + // indenting should make it bigger + t.Fatalf("Indent did not expand the input") + } + + // should be idempotent + var buf1 bytes.Buffer + if err := Indent(&buf1, b, "", "\t"); err != nil { + t.Fatalf("Indent error: %v", err) + } + b1 := buf1.Bytes() + if !bytes.Equal(b1, b) { + t.Error("Indent(Indent(jsonBig)) != Indent(jsonBig):") + diff(t, b1, b) + return + } + + // should get back to original + buf1.Reset() + if err := Compact(&buf1, b); err != nil { + t.Fatalf("Compact error: %v", err) + } + b1 = buf1.Bytes() + if !bytes.Equal(b1, jsonBig) { + t.Error("Compact(Indent(jsonBig)) != jsonBig:") + diff(t, b1, jsonBig) + return + } +} + +func TestIndentErrors(t *testing.T) { + tests := []struct { + CaseName + in string + err error + }{ + {Name(""), `{"X": "foo", "Y"}`, &SyntaxError{"invalid character '}' after object key", 17}}, + {Name(""), `{"X": "foo" "Y": "bar"}`, &SyntaxError{"invalid character '\"' after object key:value pair", 13}}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + slice := make([]uint8, 0) + buf := bytes.NewBuffer(slice) + if err := Indent(buf, []uint8(tt.in), "", ""); err != nil { + if !reflect.DeepEqual(err, tt.err) { + t.Fatalf("%s: Indent error:\n\tgot: %v\n\twant: %v", tt.Where, err, tt.err) + } + } + }) + } +} + +func diff(t *testing.T, a, b []byte) { + t.Helper() + for i := 0; ; i++ { + if i >= len(a) || i >= len(b) || a[i] != b[i] { + j := i - 10 + if j < 0 { + j = 0 + } + t.Errorf("diverge at %d: «%s» vs «%s»", i, trim(a[j:]), trim(b[j:])) + return + } + } +} + +func trim(b []byte) []byte { + return b[:min(len(b), 20)] +} + +// Generate a random JSON object. + +var jsonBig []byte + +func initBig() { + n := 10000 + if testing.Short() { + n = 100 + } + b, err := Marshal(genValue(n)) + if err != nil { + panic(err) + } + jsonBig = b +} + +func genValue(n int) any { + if n > 1 { + switch rand.Intn(2) { + case 0: + return genArray(n) + case 1: + return genMap(n) + } + } + switch rand.Intn(3) { + case 0: + return rand.Intn(2) == 0 + case 1: + return rand.NormFloat64() + case 2: + return genString(30) + } + panic("unreachable") +} + +func genString(stddev float64) string { + n := int(math.Abs(rand.NormFloat64()*stddev + stddev/2)) + c := make([]rune, n) + for i := range c { + f := math.Abs(rand.NormFloat64()*64 + 32) + if f > 0x10ffff { + f = 0x10ffff + } + c[i] = rune(f) + } + return string(c) +} + +func genArray(n int) []any { + f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2))) + if f > n { + f = n + } + if f < 1 { + f = 1 + } + x := make([]any, f) + for i := range x { + x[i] = genValue(((i+1)*n)/f - (i*n)/f) + } + return x +} + +func genMap(n int) map[string]any { + f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2))) + if f > n { + f = n + } + if n > 0 && f == 0 { + f = 1 + } + x := make(map[string]any) + for i := 0; i < f; i++ { + x[genString(10)] = genValue(((i+1)*n)/f - (i*n)/f) + } + return x +} diff --git a/pkg/encoders/json/stream.go b/pkg/encoders/json/stream.go new file mode 100644 index 0000000..fc480c9 --- /dev/null +++ b/pkg/encoders/json/stream.go @@ -0,0 +1,514 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !goexperiment.jsonv2 + +package json + +import ( + "bytes" + "errors" + "io" +) + +// A Decoder reads and decodes JSON values from an input stream. +type Decoder struct { + r io.Reader + buf []byte + d decodeState + scanp int // start of unread data in buf + scanned int64 // amount of data already scanned + scan scanner + err error + + tokenState int + tokenStack []int +} + +// NewDecoder returns a new decoder that reads from r. +// +// The decoder introduces its own buffering and may +// read data from r beyond the JSON values requested. +func NewDecoder(r io.Reader) *Decoder { + return &Decoder{r: r} +} + +// UseNumber causes the Decoder to unmarshal a number into an +// interface value as a [Number] instead of as a float64. +func (dec *Decoder) UseNumber() { dec.d.useNumber = true } + +// DisallowUnknownFields causes the Decoder to return an error when the destination +// is a struct and the input contains object keys which do not match any +// non-ignored, exported fields in the destination. +func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true } + +// Decode reads the next JSON-encoded value from its +// input and stores it in the value pointed to by v. +// +// See the documentation for [Unmarshal] for details about +// the conversion of JSON into a Go value. +func (dec *Decoder) Decode(v any) error { + if dec.err != nil { + return dec.err + } + + if err := dec.tokenPrepareForDecode(); err != nil { + return err + } + + if !dec.tokenValueAllowed() { + return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()} + } + + // Read whole value into buffer. + n, err := dec.readValue() + if err != nil { + return err + } + dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) + dec.scanp += n + + // Don't save err from unmarshal into dec.err: + // the connection is still usable since we read a complete JSON + // object from it before the error happened. + err = dec.d.unmarshal(v) + + // fixup token streaming state + dec.tokenValueEnd() + + return err +} + +// Buffered returns a reader of the data remaining in the Decoder's +// buffer. The reader is valid until the next call to [Decoder.Decode]. +func (dec *Decoder) Buffered() io.Reader { + return bytes.NewReader(dec.buf[dec.scanp:]) +} + +// readValue reads a JSON value into dec.buf. +// It returns the length of the encoding. +func (dec *Decoder) readValue() (int, error) { + dec.scan.reset() + + scanp := dec.scanp + var err error +Input: + // help the compiler see that scanp is never negative, so it can remove + // some bounds checks below. + for scanp >= 0 { + + // Look in the buffer for a new value. + for ; scanp < len(dec.buf); scanp++ { + c := dec.buf[scanp] + dec.scan.bytes++ + switch dec.scan.step(&dec.scan, c) { + case scanEnd: + // scanEnd is delayed one byte so we decrement + // the scanner bytes count by 1 to ensure that + // this value is correct in the next call of Decode. + dec.scan.bytes-- + break Input + case scanEndObject, scanEndArray: + // scanEnd is delayed one byte. + // We might block trying to get that byte from src, + // so instead invent a space byte. + if stateEndValue(&dec.scan, ' ') == scanEnd { + scanp++ + break Input + } + case scanError: + dec.err = dec.scan.err + return 0, dec.scan.err + } + } + + // Did the last read have an error? + // Delayed until now to allow buffer scan. + if err != nil { + if err == io.EOF { + if dec.scan.step(&dec.scan, ' ') == scanEnd { + break Input + } + if nonSpace(dec.buf) { + err = io.ErrUnexpectedEOF + } + } + dec.err = err + return 0, err + } + + n := scanp - dec.scanp + err = dec.refill() + scanp = dec.scanp + n + } + return scanp - dec.scanp, nil +} + +func (dec *Decoder) refill() error { + // Make room to read more into the buffer. + // First slide down data already consumed. + if dec.scanp > 0 { + dec.scanned += int64(dec.scanp) + n := copy(dec.buf, dec.buf[dec.scanp:]) + dec.buf = dec.buf[:n] + dec.scanp = 0 + } + + // Grow buffer if not large enough. + const minRead = 512 + if cap(dec.buf)-len(dec.buf) < minRead { + newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) + copy(newBuf, dec.buf) + dec.buf = newBuf + } + + // Read. Delay error for next iteration (after scan). + n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) + dec.buf = dec.buf[0 : len(dec.buf)+n] + + return err +} + +func nonSpace(b []byte) bool { + for _, c := range b { + if !isSpace(c) { + return true + } + } + return false +} + +// An Encoder writes JSON values to an output stream. +type Encoder struct { + w io.Writer + err error + escapeHTML bool + + indentBuf []byte + indentPrefix string + indentValue string +} + +// NewEncoder returns a new encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + return &Encoder{w: w, escapeHTML: true} +} + +// Encode writes the JSON encoding of v to the stream, +// with insignificant space characters elided, +// followed by a newline character. +// +// See the documentation for [Marshal] for details about the +// conversion of Go values to JSON. +func (enc *Encoder) Encode(v any) error { + if enc.err != nil { + return enc.err + } + + e := newEncodeState() + defer encodeStatePool.Put(e) + + err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}) + if err != nil { + return err + } + + // Terminate each value with a newline. + // This makes the output look a little nicer + // when debugging, and some kind of space + // is required if the encoded value was a number, + // so that the reader knows there aren't more + // digits coming. + e.WriteByte('\n') + + b := e.Bytes() + if enc.indentPrefix != "" || enc.indentValue != "" { + enc.indentBuf, err = appendIndent(enc.indentBuf[:0], b, enc.indentPrefix, enc.indentValue) + if err != nil { + return err + } + b = enc.indentBuf + } + if _, err = enc.w.Write(b); err != nil { + enc.err = err + } + return err +} + +// SetIndent instructs the encoder to format each subsequent encoded +// value as if indented by the package-level function Indent(dst, src, prefix, indent). +// Calling SetIndent("", "") disables indentation. +func (enc *Encoder) SetIndent(prefix, indent string) { + enc.indentPrefix = prefix + enc.indentValue = indent +} + +// SetEscapeHTML specifies whether problematic HTML characters +// should be escaped inside JSON quoted strings. +// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e +// to avoid certain safety problems that can arise when embedding JSON in HTML. +// +// In non-HTML settings where the escaping interferes with the readability +// of the output, SetEscapeHTML(false) disables this behavior. +func (enc *Encoder) SetEscapeHTML(on bool) { + enc.escapeHTML = on +} + +// RawMessage is a raw encoded JSON value. +// It implements [Marshaler] and [Unmarshaler] and can +// be used to delay JSON decoding or precompute a JSON encoding. +type RawMessage []byte + +// MarshalJSON returns m as the JSON encoding of m. +func (m RawMessage) MarshalJSON() ([]byte, error) { + if m == nil { + return []byte("null"), nil + } + return m, nil +} + +// UnmarshalJSON sets *m to a copy of data. +func (m *RawMessage) UnmarshalJSON(data []byte) error { + if m == nil { + return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") + } + *m = append((*m)[0:0], data...) + return nil +} + +var _ Marshaler = (*RawMessage)(nil) +var _ Unmarshaler = (*RawMessage)(nil) + +// A Token holds a value of one of these types: +// +// - [Delim], for the four JSON delimiters [ ] { } +// - bool, for JSON booleans +// - float64, for JSON numbers +// - [Number], for JSON numbers +// - string, for JSON string literals +// - nil, for JSON null +type Token any + +const ( + tokenTopValue = iota + tokenArrayStart + tokenArrayValue + tokenArrayComma + tokenObjectStart + tokenObjectKey + tokenObjectColon + tokenObjectValue + tokenObjectComma +) + +// advance tokenstate from a separator state to a value state +func (dec *Decoder) tokenPrepareForDecode() error { + // Note: Not calling peek before switch, to avoid + // putting peek into the standard Decode path. + // peek is only called when using the Token API. + switch dec.tokenState { + case tokenArrayComma: + c, err := dec.peek() + if err != nil { + return err + } + if c != ',' { + return &SyntaxError{"expected comma after array element", dec.InputOffset()} + } + dec.scanp++ + dec.tokenState = tokenArrayValue + case tokenObjectColon: + c, err := dec.peek() + if err != nil { + return err + } + if c != ':' { + return &SyntaxError{"expected colon after object key", dec.InputOffset()} + } + dec.scanp++ + dec.tokenState = tokenObjectValue + } + return nil +} + +func (dec *Decoder) tokenValueAllowed() bool { + switch dec.tokenState { + case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: + return true + } + return false +} + +func (dec *Decoder) tokenValueEnd() { + switch dec.tokenState { + case tokenArrayStart, tokenArrayValue: + dec.tokenState = tokenArrayComma + case tokenObjectValue: + dec.tokenState = tokenObjectComma + } +} + +// A Delim is a JSON array or object delimiter, one of [ ] { or }. +type Delim rune + +func (d Delim) String() string { + return string(d) +} + +// Token returns the next JSON token in the input stream. +// At the end of the input stream, Token returns nil, [io.EOF]. +// +// Token guarantees that the delimiters [ ] { } it returns are +// properly nested and matched: if Token encounters an unexpected +// delimiter in the input, it will return an error. +// +// The input stream consists of basic JSON values—bool, string, +// number, and null—along with delimiters [ ] { } of type [Delim] +// to mark the start and end of arrays and objects. +// Commas and colons are elided. +func (dec *Decoder) Token() (Token, error) { + for { + c, err := dec.peek() + if err != nil { + return nil, err + } + switch c { + case '[': + if !dec.tokenValueAllowed() { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenStack = append(dec.tokenStack, dec.tokenState) + dec.tokenState = tokenArrayStart + return Delim('['), nil + + case ']': + if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] + dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] + dec.tokenValueEnd() + return Delim(']'), nil + + case '{': + if !dec.tokenValueAllowed() { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenStack = append(dec.tokenStack, dec.tokenState) + dec.tokenState = tokenObjectStart + return Delim('{'), nil + + case '}': + if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] + dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] + dec.tokenValueEnd() + return Delim('}'), nil + + case ':': + if dec.tokenState != tokenObjectColon { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenState = tokenObjectValue + continue + + case ',': + if dec.tokenState == tokenArrayComma { + dec.scanp++ + dec.tokenState = tokenArrayValue + continue + } + if dec.tokenState == tokenObjectComma { + dec.scanp++ + dec.tokenState = tokenObjectKey + continue + } + return dec.tokenError(c) + + case '"': + if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { + var x string + old := dec.tokenState + dec.tokenState = tokenTopValue + err := dec.Decode(&x) + dec.tokenState = old + if err != nil { + return nil, err + } + dec.tokenState = tokenObjectColon + return x, nil + } + fallthrough + + default: + if !dec.tokenValueAllowed() { + return dec.tokenError(c) + } + var x any + if err := dec.Decode(&x); err != nil { + return nil, err + } + return x, nil + } + } +} + +func (dec *Decoder) tokenError(c byte) (Token, error) { + var context string + switch dec.tokenState { + case tokenTopValue: + context = " looking for beginning of value" + case tokenArrayStart, tokenArrayValue, tokenObjectValue: + context = " looking for beginning of value" + case tokenArrayComma: + context = " after array element" + case tokenObjectKey: + context = " looking for beginning of object key string" + case tokenObjectColon: + context = " after object key" + case tokenObjectComma: + context = " after object key:value pair" + } + return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()} +} + +// More reports whether there is another element in the +// current array or object being parsed. +func (dec *Decoder) More() bool { + c, err := dec.peek() + return err == nil && c != ']' && c != '}' +} + +func (dec *Decoder) peek() (byte, error) { + var err error + for { + for i := dec.scanp; i < len(dec.buf); i++ { + c := dec.buf[i] + if isSpace(c) { + continue + } + dec.scanp = i + return c, nil + } + // buffer has been scanned, now report any error + if err != nil { + return 0, err + } + err = dec.refill() + } +} + +// InputOffset returns the input stream byte offset of the current decoder position. +// The offset gives the location of the end of the most recently returned token +// and the beginning of the next token. +func (dec *Decoder) InputOffset() int64 { + return dec.scanned + int64(dec.scanp) +} diff --git a/pkg/encoders/json/stream_test.go b/pkg/encoders/json/stream_test.go new file mode 100644 index 0000000..478ee18 --- /dev/null +++ b/pkg/encoders/json/stream_test.go @@ -0,0 +1,524 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !goexperiment.jsonv2 + +package json + +import ( + "bytes" + "fmt" + "io" + "log" + "net" + "net/http" + "net/http/httptest" + "path" + "reflect" + "runtime" + "runtime/debug" + "strings" + "testing" +) + +// TODO(https://go.dev/issue/52751): Replace with native testing support. + +// CaseName is a case name annotated with a file and line. +type CaseName struct { + Name string + Where CasePos +} + +// Name annotates a case name with the file and line of the caller. +func Name(s string) (c CaseName) { + c.Name = s + runtime.Callers(2, c.Where.pc[:]) + return c +} + +// CasePos represents a file and line number. +type CasePos struct{ pc [1]uintptr } + +func (pos CasePos) String() string { + frames := runtime.CallersFrames(pos.pc[:]) + frame, _ := frames.Next() + return fmt.Sprintf("%s:%d", path.Base(frame.File), frame.Line) +} + +// Test values for the stream test. +// One of each JSON kind. +var streamTest = []any{ + 0.1, + "hello", + nil, + true, + false, + []any{"a", "b", "c"}, + map[string]any{"K": "Kelvin", "ß": "long s"}, + 3.14, // another value to make sure something can follow map +} + +var streamEncoded = `0.1 +"hello" +null +true +false +["a","b","c"] +{"ß":"long s","K":"Kelvin"} +3.14 +` + +func TestEncoder(t *testing.T) { + for i := 0; i <= len(streamTest); i++ { + var buf strings.Builder + enc := NewEncoder(&buf) + // Check that enc.SetIndent("", "") turns off indentation. + enc.SetIndent(">", ".") + enc.SetIndent("", "") + for j, v := range streamTest[0:i] { + if err := enc.Encode(v); err != nil { + t.Fatalf("#%d.%d Encode error: %v", i, j, err) + } + } + if got, want := buf.String(), nlines(streamEncoded, i); got != want { + t.Errorf("encoding %d items: mismatch:", i) + diff(t, []byte(got), []byte(want)) + break + } + } +} + +func TestEncoderErrorAndReuseEncodeState(t *testing.T) { + // Disable the GC temporarily to prevent encodeState's in Pool being cleaned away during the test. + percent := debug.SetGCPercent(-1) + defer debug.SetGCPercent(percent) + + // Trigger an error in Marshal with cyclic data. + type Dummy struct { + Name string + Next *Dummy + } + dummy := Dummy{Name: "Dummy"} + dummy.Next = &dummy + + var buf bytes.Buffer + enc := NewEncoder(&buf) + if err := enc.Encode(dummy); err == nil { + t.Errorf("Encode(dummy) error: got nil, want non-nil") + } + + type Data struct { + A string + I int + } + want := Data{A: "a", I: 1} + if err := enc.Encode(want); err != nil { + t.Errorf("Marshal error: %v", err) + } + + var got Data + if err := Unmarshal(buf.Bytes(), &got); err != nil { + t.Errorf("Unmarshal error: %v", err) + } + if got != want { + t.Errorf("Marshal/Unmarshal roundtrip:\n\tgot: %v\n\twant: %v", got, want) + } +} + +var streamEncodedIndent = `0.1 +"hello" +null +true +false +[ +>."a", +>."b", +>."c" +>] +{ +>."ß": "long s", +>."K": "Kelvin" +>} +3.14 +` + +func TestEncoderIndent(t *testing.T) { + var buf strings.Builder + enc := NewEncoder(&buf) + enc.SetIndent(">", ".") + for _, v := range streamTest { + enc.Encode(v) + } + if got, want := buf.String(), streamEncodedIndent; got != want { + t.Errorf("Encode mismatch:\ngot:\n%s\n\nwant:\n%s", got, want) + diff(t, []byte(got), []byte(want)) + } +} + +type strMarshaler string + +func (s strMarshaler) MarshalJSON() ([]byte, error) { + return []byte(s), nil +} + +type strPtrMarshaler string + +func (s *strPtrMarshaler) MarshalJSON() ([]byte, error) { + return []byte(*s), nil +} + +func TestEncoderSetEscapeHTML(t *testing.T) { + var c C + var ct CText + var tagStruct struct { + Valid int `json:"<>&#! "` + Invalid int `json:"\\"` + } + + // This case is particularly interesting, as we force the encoder to + // take the address of the Ptr field to use its MarshalJSON method. This + // is why the '&' is important. + marshalerStruct := &struct { + NonPtr strMarshaler + Ptr strPtrMarshaler + }{`""`, `""`} + + // https://golang.org/issue/34154 + stringOption := struct { + Bar string `json:"bar,string"` + }{`foobar`} + + tests := []struct { + CaseName + v any + wantEscape string + want string + }{ + {Name("c"), c, `"\u003c\u0026\u003e"`, `"<&>"`}, + {Name("ct"), ct, `"\"\u003c\u0026\u003e\""`, `"\"<&>\""`}, + {Name(`"<&>"`), "<&>", `"\u003c\u0026\u003e"`, `"<&>"`}, + { + Name("tagStruct"), tagStruct, + `{"\u003c\u003e\u0026#! ":0,"Invalid":0}`, + `{"<>&#! ":0,"Invalid":0}`, + }, + { + Name(`""`), marshalerStruct, + `{"NonPtr":"\u003cstr\u003e","Ptr":"\u003cstr\u003e"}`, + `{"NonPtr":"","Ptr":""}`, + }, + { + Name("stringOption"), stringOption, + `{"bar":"\"\\u003chtml\\u003efoobar\\u003c/html\\u003e\""}`, + `{"bar":"\"foobar\""}`, + }, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + var buf strings.Builder + enc := NewEncoder(&buf) + if err := enc.Encode(tt.v); err != nil { + t.Fatalf("%s: Encode(%s) error: %s", tt.Where, tt.Name, err) + } + if got := strings.TrimSpace(buf.String()); got != tt.wantEscape { + t.Errorf("%s: Encode(%s):\n\tgot: %s\n\twant: %s", tt.Where, tt.Name, got, tt.wantEscape) + } + buf.Reset() + enc.SetEscapeHTML(false) + if err := enc.Encode(tt.v); err != nil { + t.Fatalf("%s: SetEscapeHTML(false) Encode(%s) error: %s", tt.Where, tt.Name, err) + } + if got := strings.TrimSpace(buf.String()); got != tt.want { + t.Errorf("%s: SetEscapeHTML(false) Encode(%s):\n\tgot: %s\n\twant: %s", + tt.Where, tt.Name, got, tt.want) + } + }) + } +} + +func TestDecoder(t *testing.T) { + for i := 0; i <= len(streamTest); i++ { + // Use stream without newlines as input, + // just to stress the decoder even more. + // Our test input does not include back-to-back numbers. + // Otherwise stripping the newlines would + // merge two adjacent JSON values. + var buf bytes.Buffer + for _, c := range nlines(streamEncoded, i) { + if c != '\n' { + buf.WriteRune(c) + } + } + out := make([]any, i) + dec := NewDecoder(&buf) + for j := range out { + if err := dec.Decode(&out[j]); err != nil { + t.Fatalf("decode #%d/%d error: %v", j, i, err) + } + } + if !reflect.DeepEqual(out, streamTest[0:i]) { + t.Errorf("decoding %d items: mismatch:", i) + for j := range out { + if !reflect.DeepEqual(out[j], streamTest[j]) { + t.Errorf("#%d:\n\tgot: %v\n\twant: %v", j, out[j], streamTest[j]) + } + } + break + } + } +} + +func TestDecoderBuffered(t *testing.T) { + r := strings.NewReader(`{"Name": "Gopher"} extra `) + var m struct { + Name string + } + d := NewDecoder(r) + err := d.Decode(&m) + if err != nil { + t.Fatal(err) + } + if m.Name != "Gopher" { + t.Errorf("Name = %s, want Gopher", m.Name) + } + rest, err := io.ReadAll(d.Buffered()) + if err != nil { + t.Fatal(err) + } + if got, want := string(rest), " extra "; got != want { + t.Errorf("Remaining = %s, want %s", got, want) + } +} + +func nlines(s string, n int) string { + if n <= 0 { + return "" + } + for i, c := range s { + if c == '\n' { + if n--; n == 0 { + return s[0 : i+1] + } + } + } + return s +} + +func TestRawMessage(t *testing.T) { + var data struct { + X float64 + Id RawMessage + Y float32 + } + const raw = `["\u0056",null]` + const want = `{"X":0.1,"Id":["\u0056",null],"Y":0.2}` + err := Unmarshal([]byte(want), &data) + if err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if string([]byte(data.Id)) != raw { + t.Fatalf("Unmarshal:\n\tgot: %s\n\twant: %s", []byte(data.Id), raw) + } + got, err := Marshal(&data) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if string(got) != want { + t.Fatalf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +func TestNullRawMessage(t *testing.T) { + var data struct { + X float64 + Id RawMessage + IdPtr *RawMessage + Y float32 + } + const want = `{"X":0.1,"Id":null,"IdPtr":null,"Y":0.2}` + err := Unmarshal([]byte(want), &data) + if err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if want, got := "null", string(data.Id); want != got { + t.Fatalf("Unmarshal:\n\tgot: %s\n\twant: %s", got, want) + } + if data.IdPtr != nil { + t.Fatalf("pointer mismatch: got non-nil, want nil") + } + got, err := Marshal(&data) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if string(got) != want { + t.Fatalf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +func TestBlocking(t *testing.T) { + tests := []struct { + CaseName + in string + }{ + {Name(""), `{"x": 1}`}, + {Name(""), `[1, 2, 3]`}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + r, w := net.Pipe() + go w.Write([]byte(tt.in)) + var val any + + // If Decode reads beyond what w.Write writes above, + // it will block, and the test will deadlock. + if err := NewDecoder(r).Decode(&val); err != nil { + t.Errorf("%s: NewDecoder(%s).Decode error: %v", tt.Where, tt.in, err) + } + r.Close() + w.Close() + }) + } +} + +type decodeThis struct { + v any +} + +func TestDecodeInStream(t *testing.T) { + tests := []struct { + CaseName + json string + expTokens []any + }{ + // streaming token cases + {CaseName: Name(""), json: `10`, expTokens: []any{float64(10)}}, + {CaseName: Name(""), json: ` [10] `, expTokens: []any{ + Delim('['), float64(10), Delim(']')}}, + {CaseName: Name(""), json: ` [false,10,"b"] `, expTokens: []any{ + Delim('['), false, float64(10), "b", Delim(']')}}, + {CaseName: Name(""), json: `{ "a": 1 }`, expTokens: []any{ + Delim('{'), "a", float64(1), Delim('}')}}, + {CaseName: Name(""), json: `{"a": 1, "b":"3"}`, expTokens: []any{ + Delim('{'), "a", float64(1), "b", "3", Delim('}')}}, + {CaseName: Name(""), json: ` [{"a": 1},{"a": 2}] `, expTokens: []any{ + Delim('['), + Delim('{'), "a", float64(1), Delim('}'), + Delim('{'), "a", float64(2), Delim('}'), + Delim(']')}}, + {CaseName: Name(""), json: `{"obj": {"a": 1}}`, expTokens: []any{ + Delim('{'), "obj", Delim('{'), "a", float64(1), Delim('}'), + Delim('}')}}, + {CaseName: Name(""), json: `{"obj": [{"a": 1}]}`, expTokens: []any{ + Delim('{'), "obj", Delim('['), + Delim('{'), "a", float64(1), Delim('}'), + Delim(']'), Delim('}')}}, + + // streaming tokens with intermittent Decode() + {CaseName: Name(""), json: `{ "a": 1 }`, expTokens: []any{ + Delim('{'), "a", + decodeThis{float64(1)}, + Delim('}')}}, + {CaseName: Name(""), json: ` [ { "a" : 1 } ] `, expTokens: []any{ + Delim('['), + decodeThis{map[string]any{"a": float64(1)}}, + Delim(']')}}, + {CaseName: Name(""), json: ` [{"a": 1},{"a": 2}] `, expTokens: []any{ + Delim('['), + decodeThis{map[string]any{"a": float64(1)}}, + decodeThis{map[string]any{"a": float64(2)}}, + Delim(']')}}, + {CaseName: Name(""), json: `{ "obj" : [ { "a" : 1 } ] }`, expTokens: []any{ + Delim('{'), "obj", Delim('['), + decodeThis{map[string]any{"a": float64(1)}}, + Delim(']'), Delim('}')}}, + + {CaseName: Name(""), json: `{"obj": {"a": 1}}`, expTokens: []any{ + Delim('{'), "obj", + decodeThis{map[string]any{"a": float64(1)}}, + Delim('}')}}, + {CaseName: Name(""), json: `{"obj": [{"a": 1}]}`, expTokens: []any{ + Delim('{'), "obj", + decodeThis{[]any{ + map[string]any{"a": float64(1)}, + }}, + Delim('}')}}, + {CaseName: Name(""), json: ` [{"a": 1} {"a": 2}] `, expTokens: []any{ + Delim('['), + decodeThis{map[string]any{"a": float64(1)}}, + decodeThis{&SyntaxError{"expected comma after array element", 11}}, + }}, + {CaseName: Name(""), json: `{ "` + strings.Repeat("a", 513) + `" 1 }`, expTokens: []any{ + Delim('{'), strings.Repeat("a", 513), + decodeThis{&SyntaxError{"expected colon after object key", 518}}, + }}, + {CaseName: Name(""), json: `{ "\a" }`, expTokens: []any{ + Delim('{'), + &SyntaxError{"invalid character 'a' in string escape code", 3}, + }}, + {CaseName: Name(""), json: ` \a`, expTokens: []any{ + &SyntaxError{"invalid character '\\\\' looking for beginning of value", 1}, + }}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + dec := NewDecoder(strings.NewReader(tt.json)) + for i, want := range tt.expTokens { + var got any + var err error + + if dt, ok := want.(decodeThis); ok { + want = dt.v + err = dec.Decode(&got) + } else { + got, err = dec.Token() + } + if errWant, ok := want.(error); ok { + if err == nil || !reflect.DeepEqual(err, errWant) { + t.Fatalf("%s:\n\tinput: %s\n\tgot error: %v\n\twant error: %v", tt.Where, tt.json, err, errWant) + } + break + } else if err != nil { + t.Fatalf("%s:\n\tinput: %s\n\tgot error: %v\n\twant error: nil", tt.Where, tt.json, err) + } + if !reflect.DeepEqual(got, want) { + t.Fatalf("%s: token %d:\n\tinput: %s\n\tgot: %T(%v)\n\twant: %T(%v)", tt.Where, i, tt.json, got, got, want, want) + } + } + }) + } +} + +// Test from golang.org/issue/11893 +func TestHTTPDecoding(t *testing.T) { + const raw = `{ "foo": "bar" }` + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(raw)) + })) + defer ts.Close() + res, err := http.Get(ts.URL) + if err != nil { + log.Fatalf("http.Get error: %v", err) + } + defer res.Body.Close() + + foo := struct { + Foo string + }{} + + d := NewDecoder(res.Body) + err = d.Decode(&foo) + if err != nil { + t.Fatalf("Decode error: %v", err) + } + if foo.Foo != "bar" { + t.Errorf(`Decode: got %q, want "bar"`, foo.Foo) + } + + // make sure we get the EOF the second time + err = d.Decode(&foo) + if err != io.EOF { + t.Errorf("Decode error:\n\tgot: %v\n\twant: io.EOF", err) + } +} diff --git a/pkg/encoders/json/tables.go b/pkg/encoders/json/tables.go new file mode 100644 index 0000000..e8841cf --- /dev/null +++ b/pkg/encoders/json/tables.go @@ -0,0 +1,220 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !goexperiment.jsonv2 + +package json + +import "unicode/utf8" + +// safeSet holds the value true if the ASCII character with the given array +// position can be represented inside a JSON string without any further +// escaping. +// +// All values are true except for the ASCII control characters (0-31), the +// double quote ("), and the backslash character ("\"). +var safeSet = [utf8.RuneSelf]bool{ + ' ': true, + '!': true, + '"': false, + '#': true, + '$': true, + '%': true, + '&': true, + '\'': true, + '(': true, + ')': true, + '*': true, + '+': true, + ',': true, + '-': true, + '.': true, + '/': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + ':': true, + ';': true, + '<': true, + '=': true, + '>': true, + '?': true, + '@': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'V': true, + 'W': true, + 'X': true, + 'Y': true, + 'Z': true, + '[': true, + '\\': false, + ']': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '{': true, + '|': true, + '}': true, + '~': true, + '\u007f': true, +} + +// htmlSafeSet holds the value true if the ASCII character with the given +// array position can be safely represented inside a JSON string, embedded +// inside of HTML ` + got, err := json.Marshal(in) + if err != nil { + t.Fatalf("json.Marshal error: %v", err) + } + want := map[string]string{ + "v1": `"\u003cscript\u003e console.log(\"Hello, world!\"); \u003c/script\u003e"`, + "v2": `""`, + }[json.Version] + if string(got) != want { + t.Fatalf("json.Marshal = %s, want %s", got, want) + } + }) + } +} + +// In v1, JSON serialization silently ignored invalid UTF-8 by +// replacing such bytes with the Unicode replacement character. +// In v2, JSON serialization reports an error if invalid UTF-8 is encountered. +// +// Users of v2 can opt into the v1 behavior by setting [AllowInvalidUTF8]. +// +// Silently allowing invalid UTF-8 causes data corruption that can be difficult +// to detect until it is too late. Once it has been discovered, strict UTF-8 +// behavior sometimes cannot be enabled since other logic may be depending +// on the current behavior due to Hyrum's Law. +// +// Tim Bray, the author of RFC 8259 recommends that implementations should +// go beyond RFC 8259 and instead target compliance with RFC 7493, +// which makes strict decisions about behavior left undefined in RFC 8259. +// In particular, RFC 7493 rejects the presence of invalid UTF-8. +// See https://www.tbray.org/ongoing/When/201x/2017/12/14/RFC-8259-STD-90 +func TestInvalidUTF8(t *testing.T) { + for _, json := range jsonPackages { + t.Run(path.Join("Marshal", json.Version), func(t *testing.T) { + got, err := json.Marshal("\xff") + switch { + case json.Version == "v1" && err != nil: + t.Fatalf("json.Marshal error: %v", err) + case json.Version == "v1" && string(got) != "\"\ufffd\"": + t.Fatalf(`json.Marshal = %s, want %q`, got, "\ufffd") + case json.Version == "v2" && err == nil: + t.Fatal("json.Marshal error is nil, want non-nil") + } + }) + } + + for _, json := range jsonPackages { + t.Run(path.Join("Unmarshal", json.Version), func(t *testing.T) { + const in = "\"\xff\"" + var got string + err := json.Unmarshal([]byte(in), &got) + switch { + case json.Version == "v1" && err != nil: + t.Fatalf("json.Unmarshal error: %v", err) + case json.Version == "v1" && got != "\ufffd": + t.Fatalf(`json.Unmarshal = %q, want "\ufffd"`, got) + case json.Version == "v2" && err == nil: + t.Fatal("json.Unmarshal error is nil, want non-nil") + } + }) + } +} + +// In v1, duplicate JSON object names are permitted by default where +// they follow the inconsistent and difficult-to-explain merge semantics of v1. +// In v2, duplicate JSON object names are rejected by default where +// they follow the merge semantics of v2 based on RFC 7396. +// +// Users of v2 can opt into the v1 behavior by setting [AllowDuplicateNames]. +// +// Per RFC 8259, the handling of duplicate names is left as undefined behavior. +// Rejecting such inputs is within the realm of valid behavior. +// Tim Bray, the author of RFC 8259 recommends that implementations should +// go beyond RFC 8259 and instead target compliance with RFC 7493, +// which makes strict decisions about behavior left undefined in RFC 8259. +// In particular, RFC 7493 rejects the presence of duplicate object names. +// See https://www.tbray.org/ongoing/When/201x/2017/12/14/RFC-8259-STD-90 +// +// The lack of duplicate name rejection has correctness implications where +// roundtrip unmarshal/marshal do not result in semantically equivalent JSON. +// This is surprising behavior for users when they accidentally +// send JSON objects with duplicate names. +// +// The lack of duplicate name rejection may have security implications since it +// becomes difficult for a security tool to validate the semantic meaning of a +// JSON object since meaning is undefined in the presence of duplicate names. +// See https://labs.bishopfox.com/tech-blog/an-exploration-of-json-interoperability-vulnerabilities +// +// Related issue: +// +// https://go.dev/issue/48298 +func TestDuplicateNames(t *testing.T) { + for _, json := range jsonPackages { + t.Run(path.Join("Unmarshal", json.Version), func(t *testing.T) { + const in = `{"Name":1,"Name":2}` + var got struct{ Name int } + err := json.Unmarshal([]byte(in), &got) + switch { + case json.Version == "v1" && err != nil: + t.Fatalf("json.Unmarshal error: %v", err) + case json.Version == "v1" && got != struct{ Name int }{2}: + t.Fatalf(`json.Unmarshal = %v, want {2}`, got) + case json.Version == "v2" && err == nil: + t.Fatal("json.Unmarshal error is nil, want non-nil") + } + }) + } +} + +// In v1, unmarshaling a JSON null into a non-empty value was inconsistent +// in that sometimes it would be ignored and other times clear the value. +// In v2, unmarshaling a JSON null into a non-empty value would consistently +// always clear the value regardless of the value's type. +// +// The purpose of this change is to have consistent behavior with how JSON nulls +// are handled during Unmarshal. This semantic detail has no effect +// when Unmarshaling into a empty value. +// +// Related issues: +// +// https://go.dev/issue/22177 +// https://go.dev/issue/33835 +func TestMergeNull(t *testing.T) { + type Types struct { + Bool bool + String string + Bytes []byte + Int int + Map map[string]string + Struct struct{ Field string } + Slice []string + Array [1]string + Pointer *string + Interface any + } + + for _, json := range jsonPackages { + t.Run(path.Join("Unmarshal", json.Version), func(t *testing.T) { + // Start with a non-empty value where all fields are populated. + in := Types{ + Bool: true, + String: "old", + Bytes: []byte("old"), + Int: 1234, + Map: map[string]string{"old": "old"}, + Struct: struct{ Field string }{"old"}, + Slice: []string{"old"}, + Array: [1]string{"old"}, + Pointer: new(string), + Interface: "old", + } + + // Unmarshal a JSON null into every field. + if err := json.Unmarshal([]byte(`{ + "Bool": null, + "String": null, + "Bytes": null, + "Int": null, + "Map": null, + "Struct": null, + "Slice": null, + "Array": null, + "Pointer": null, + "Interface": null + }`), &in); err != nil { + t.Fatalf("json.Unmarshal error: %v", err) + } + + want := map[string]Types{ + "v1": { + Bool: true, + String: "old", + Int: 1234, + Struct: struct{ Field string }{"old"}, + Array: [1]string{"old"}, + }, + "v2": {}, // all fields are zeroed + }[json.Version] + if !reflect.DeepEqual(in, want) { + t.Fatalf("json.Unmarshal = %+v, want %+v", in, want) + } + }) + } +} + +// In v1, merge semantics are inconsistent and difficult to explain. +// In v2, merge semantics replaces the destination value for anything +// other than a JSON object, and recursively merges JSON objects. +// +// Merge semantics in v1 are inconsistent and difficult to explain +// largely because the behavior came about organically, rather than +// having a principled approach to how the semantics should operate. +// In v2, merging follows behavior based on RFC 7396. +// +// Related issues: +// +// https://go.dev/issue/21092 +// https://go.dev/issue/26946 +// https://go.dev/issue/27172 +// https://go.dev/issue/30701 +// https://go.dev/issue/31924 +// https://go.dev/issue/43664 +func TestMergeComposite(t *testing.T) { + type Tuple struct{ Old, New bool } + type Composites struct { + Slice []Tuple + Array [1]Tuple + Map map[string]Tuple + MapPointer map[string]*Tuple + Struct struct{ Tuple Tuple } + StructPointer *struct{ Tuple Tuple } + Interface any + InterfacePointer any + } + + for _, json := range jsonPackages { + t.Run(path.Join("Unmarshal", json.Version), func(t *testing.T) { + // Start with a non-empty value where all fields are populated. + in := Composites{ + Slice: []Tuple{{Old: true}, {Old: true}}[:1], + Array: [1]Tuple{{Old: true}}, + Map: map[string]Tuple{"Tuple": {Old: true}}, + MapPointer: map[string]*Tuple{"Tuple": {Old: true}}, + Struct: struct{ Tuple Tuple }{Tuple{Old: true}}, + StructPointer: &struct{ Tuple Tuple }{Tuple{Old: true}}, + Interface: Tuple{Old: true}, + InterfacePointer: &Tuple{Old: true}, + } + + // Unmarshal into every pre-populated field. + if err := json.Unmarshal([]byte(`{ + "Slice": [{"New":true}, {"New":true}], + "Array": [{"New":true}], + "Map": {"Tuple": {"New":true}}, + "MapPointer": {"Tuple": {"New":true}}, + "Struct": {"Tuple": {"New":true}}, + "StructPointer": {"Tuple": {"New":true}}, + "Interface": {"New":true}, + "InterfacePointer": {"New":true} + }`), &in); err != nil { + t.Fatalf("json.Unmarshal error: %v", err) + } + + merged := Tuple{Old: true, New: true} + replaced := Tuple{Old: false, New: true} + want := map[string]Composites{ + "v1": { + Slice: []Tuple{merged, merged}, // merged + Array: [1]Tuple{merged}, // merged + Map: map[string]Tuple{"Tuple": replaced}, // replaced + MapPointer: map[string]*Tuple{"Tuple": &replaced}, // replaced + Struct: struct{ Tuple Tuple }{merged}, // merged (same as v2) + StructPointer: &struct{ Tuple Tuple }{merged}, // merged (same as v2) + Interface: map[string]any{"New": true}, // replaced + InterfacePointer: &merged, // merged (same as v2) + }, + "v2": { + Slice: []Tuple{replaced, replaced}, // replaced + Array: [1]Tuple{replaced}, // replaced + Map: map[string]Tuple{"Tuple": merged}, // merged + MapPointer: map[string]*Tuple{"Tuple": &merged}, // merged + Struct: struct{ Tuple Tuple }{merged}, // merged (same as v1) + StructPointer: &struct{ Tuple Tuple }{merged}, // merged (same as v1) + Interface: merged, // merged + InterfacePointer: &merged, // merged (same as v1) + }, + }[json.Version] + if !reflect.DeepEqual(in, want) { + t.Fatalf("json.Unmarshal = %+v, want %+v", in, want) + } + }) + } +} + +// In v1, there was no special support for time.Duration, +// which resulted in that type simply being treated as a signed integer. +// In v2, there is now first-class support for time.Duration, where the type is +// formatted and parsed using time.Duration.String and time.ParseDuration. +// +// Users of v2 can opt into the v1 behavior by setting +// the "format:nano" option in the `json` struct field tag: +// +// struct { +// Duration time.Duration `json:",format:nano"` +// } +// +// Related issue: +// +// https://go.dev/issue/10275 +func TestTimeDurations(t *testing.T) { + t.SkipNow() // TODO(https://go.dev/issue/71631): The default representation of time.Duration is still undecided. + for _, json := range jsonPackages { + t.Run(path.Join("Marshal", json.Version), func(t *testing.T) { + got, err := json.Marshal(time.Minute) + switch { + case err != nil: + t.Fatalf("json.Marshal error: %v", err) + case json.Version == "v1" && string(got) != "60000000000": + t.Fatalf("json.Marshal = %s, want 60000000000", got) + case json.Version == "v2" && string(got) != `"1m0s"`: + t.Fatalf(`json.Marshal = %s, want "1m0s"`, got) + } + }) + } + + for _, json := range jsonPackages { + t.Run(path.Join("Unmarshal", json.Version), func(t *testing.T) { + in := map[string]string{ + "v1": "60000000000", + "v2": `"1m0s"`, + }[json.Version] + var got time.Duration + err := json.Unmarshal([]byte(in), &got) + switch { + case err != nil: + t.Fatalf("json.Unmarshal error: %v", err) + case got != time.Minute: + t.Fatalf("json.Unmarshal = %v, want 1m0s", got) + } + }) + } +} + +// In v1, non-empty structs without any JSON serializable fields are permitted. +// In v2, non-empty structs without any JSON serializable fields are rejected. +// +// The purpose of this change is to avoid a common pitfall for new users +// where they expect JSON serialization to handle unexported fields. +// However, this does not work since Go reflection does not +// provide the package the ability to mutate such fields. +// Rejecting unserializable structs in v2 is intended to be a clear signal +// that the type is not supposed to be serialized. +func TestEmptyStructs(t *testing.T) { + never := func(string) bool { return false } + onlyV2 := func(v string) bool { return v == "v2" } + values := []struct { + in any + wantError func(string) bool + }{ + // It is okay to marshal a truly empty struct in v1 and v2. + {in: addr(struct{}{}), wantError: never}, + // In v1, a non-empty struct without exported fields + // is equivalent to an empty struct, but is rejected in v2. + // Note that errors.errorString type has only unexported fields. + {in: errors.New("error"), wantError: onlyV2}, + // A mix of exported and unexported fields is permitted. + {in: addr(struct{ Exported, unexported int }{}), wantError: never}, + } + + for _, json := range jsonPackages { + t.Run("Marshal", func(t *testing.T) { + for _, value := range values { + wantError := value.wantError(json.Version) + _, err := json.Marshal(value.in) + switch { + case (err == nil) && wantError: + t.Fatalf("json.Marshal error is nil, want non-nil") + case (err != nil) && !wantError: + t.Fatalf("json.Marshal error: %v", err) + } + } + }) + } + + for _, json := range jsonPackages { + t.Run("Unmarshal", func(t *testing.T) { + for _, value := range values { + wantError := value.wantError(json.Version) + out := reflect.New(reflect.TypeOf(value.in).Elem()).Interface() + err := json.Unmarshal([]byte("{}"), out) + switch { + case (err == nil) && wantError: + t.Fatalf("json.Unmarshal error is nil, want non-nil") + case (err != nil) && !wantError: + t.Fatalf("json.Unmarshal error: %v", err) + } + } + }) + } +} diff --git a/pkg/encoders/json/v2_encode.go b/pkg/encoders/json/v2_encode.go new file mode 100644 index 0000000..c2d620b --- /dev/null +++ b/pkg/encoders/json/v2_encode.go @@ -0,0 +1,251 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.jsonv2 + +// Package json implements encoding and decoding of JSON as defined in +// RFC 7159. The mapping between JSON and Go values is described +// in the documentation for the Marshal and Unmarshal functions. +// +// See "JSON and Go" for an introduction to this package: +// https://golang.org/doc/articles/json_and_go.html +// +// # Security Considerations +// +// See the "Security Considerations" section in [encoding/json/v2]. +// +// For historical reasons, the default behavior of v1 [encoding/json] +// unfortunately operates with less secure defaults. +// New usages of JSON in Go are encouraged to use [encoding/json/v2] instead. +package json + +import ( + "reflect" + "strconv" + + jsonv2 "encoding/json/v2" +) + +// Marshal returns the JSON encoding of v. +// +// Marshal traverses the value v recursively. +// If an encountered value implements [Marshaler] +// and is not a nil pointer, Marshal calls [Marshaler.MarshalJSON] +// to produce JSON. If no [Marshaler.MarshalJSON] method is present but the +// value implements [encoding.TextMarshaler] instead, Marshal calls +// [encoding.TextMarshaler.MarshalText] and encodes the result as a JSON string. +// The nil pointer exception is not strictly necessary +// but mimics a similar, necessary exception in the behavior of +// [Unmarshaler.UnmarshalJSON]. +// +// Otherwise, Marshal uses the following type-dependent default encodings: +// +// Boolean values encode as JSON booleans. +// +// Floating point, integer, and [Number] values encode as JSON numbers. +// NaN and +/-Inf values will return an [UnsupportedValueError]. +// +// String values encode as JSON strings coerced to valid UTF-8, +// replacing invalid bytes with the Unicode replacement rune. +// So that the JSON will be safe to embed inside HTML