Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
cd6a53a7b7
|
|||
|
117e5924fd
|
|||
|
6cff006e54
|
|||
|
7f5bd3960c
|
|||
|
8287035920
|
|||
|
54a01e1255
|
@@ -44,14 +44,12 @@ type C struct {
|
||||
Owners []string `env:"ORLY_OWNERS" usage:"comma-separated list of owner npubs, who have full control of the relay for wipe and restart and other functions"`
|
||||
ACLMode string `env:"ORLY_ACL_MODE" usage:"ACL mode: follows, managed (nip-86), none" default:"none"`
|
||||
AuthRequired bool `env:"ORLY_AUTH_REQUIRED" usage:"require authentication for all requests (works with managed ACL)" default:"false"`
|
||||
SpiderMode string `env:"ORLY_SPIDER_MODE" usage:"spider mode: none,follows" default:"none"`
|
||||
SpiderFrequency time.Duration `env:"ORLY_SPIDER_FREQUENCY" usage:"spider frequency in seconds" default:"1h"`
|
||||
BootstrapRelays []string `env:"ORLY_BOOTSTRAP_RELAYS" usage:"comma-separated list of bootstrap relay URLs for initial sync"`
|
||||
NWCUri string `env:"ORLY_NWC_URI" usage:"NWC (Nostr Wallet Connect) connection string for Lightning payments"`
|
||||
SubscriptionEnabled bool `env:"ORLY_SUBSCRIPTION_ENABLED" default:"false" usage:"enable subscription-based access control requiring payment for non-directory events"`
|
||||
MonthlyPriceSats int64 `env:"ORLY_MONTHLY_PRICE_SATS" default:"6000" usage:"price in satoshis for one month subscription (default ~$2 USD)"`
|
||||
RelayURL string `env:"ORLY_RELAY_URL" usage:"base URL for the relay dashboard (e.g., https://relay.example.com)"`
|
||||
RelayAddresses []string `env:"ORLY_RELAY_ADDRESSES" usage:"comma-separated list of websocket addresses for this relay (e.g., wss://relay.example.com,wss://backup.example.com) - used by spider to avoid self-connections"`
|
||||
RelayAddresses []string `env:"ORLY_RELAY_ADDRESSES" usage:"comma-separated list of websocket addresses for this relay (e.g., wss://relay.example.com,wss://backup.example.com)"`
|
||||
FollowListFrequency time.Duration `env:"ORLY_FOLLOW_LIST_FREQUENCY" usage:"how often to fetch admin follow lists (default: 1h)" default:"1h"`
|
||||
|
||||
// Web UI and dev mode settings
|
||||
|
||||
@@ -112,17 +112,6 @@ func (l *Listener) HandleEvent(msg []byte) (err error) {
|
||||
|
||||
// Check if policy is enabled and process event through it
|
||||
if l.policyManager != nil && l.policyManager.Manager != nil && l.policyManager.Manager.IsEnabled() {
|
||||
if l.policyManager.Manager.IsDisabled() {
|
||||
// Policy is disabled due to failure - reject all events
|
||||
log.W.F("policy is disabled, rejecting event %0x", env.E.ID)
|
||||
if err = Ok.Error(
|
||||
l, env,
|
||||
"policy disabled - events rejected until policy is restored",
|
||||
); chk.E(err) {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Check policy for write access
|
||||
allowed, policyErr := l.policyManager.CheckPolicy("write", env.E, l.authedPubkey.Load(), l.remote)
|
||||
|
||||
@@ -8,6 +8,8 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"encoding/json"
|
||||
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"lol.mleku.dev/chk"
|
||||
"lol.mleku.dev/log"
|
||||
@@ -18,7 +20,6 @@ import (
|
||||
"next.orly.dev/pkg/encoders/bech32encoding"
|
||||
"next.orly.dev/pkg/encoders/event"
|
||||
"next.orly.dev/pkg/encoders/hex"
|
||||
"next.orly.dev/pkg/encoders/json"
|
||||
"next.orly.dev/pkg/encoders/kind"
|
||||
"next.orly.dev/pkg/encoders/tag"
|
||||
"next.orly.dev/pkg/encoders/timestamp"
|
||||
@@ -737,9 +738,19 @@ func (pp *PaymentProcessor) CreateWelcomeNote(userPubkey []byte) error {
|
||||
return fmt.Errorf("failed to encode relay npub: %w", err)
|
||||
}
|
||||
|
||||
// Create the welcome note content with nostr:npub link
|
||||
// Get user npub for personalized greeting
|
||||
userNpub, err := bech32encoding.BinToNpub(userPubkey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to encode user npub: %w", err)
|
||||
}
|
||||
|
||||
// Create the welcome note content with privacy notice and personalized greeting
|
||||
content := fmt.Sprintf(
|
||||
`Welcome to the relay! 🎉
|
||||
`This note is only visible to you
|
||||
|
||||
Hi nostr:%s
|
||||
|
||||
Welcome to the relay! 🎉
|
||||
|
||||
You have a FREE 30-day trial that started when you first logged in.
|
||||
|
||||
@@ -759,7 +770,7 @@ Relay: nostr:%s
|
||||
|
||||
Log in to the relay dashboard to access your configuration at: %s
|
||||
|
||||
Enjoy your time on the relay!`, monthlyPrice, monthlyPrice,
|
||||
Enjoy your time on the relay!`, string(userNpub), monthlyPrice, monthlyPrice,
|
||||
string(relayNpubForContent), pp.getDashboardURL(),
|
||||
)
|
||||
|
||||
@@ -771,8 +782,8 @@ Enjoy your time on the relay!`, monthlyPrice, monthlyPrice,
|
||||
ev.Content = []byte(content)
|
||||
ev.Tags = tag.NewS()
|
||||
|
||||
// Add "p" tag for the user
|
||||
*ev.Tags = append(*ev.Tags, tag.NewFromAny("p", hex.Enc(userPubkey)))
|
||||
// Add "p" tag for the user with mention in third field
|
||||
*ev.Tags = append(*ev.Tags, tag.NewFromAny("p", hex.Enc(userPubkey), "", "mention"))
|
||||
|
||||
// Add expiration tag (5 days from creation)
|
||||
noteExpiry := time.Now().AddDate(0, 0, 5)
|
||||
@@ -784,11 +795,8 @@ Enjoy your time on the relay!`, monthlyPrice, monthlyPrice,
|
||||
// Add "private" tag with authorized npubs (user and relay)
|
||||
var authorizedNpubs []string
|
||||
|
||||
// Add user npub
|
||||
userNpub, err := bech32encoding.BinToNpub(userPubkey)
|
||||
if err == nil {
|
||||
authorizedNpubs = append(authorizedNpubs, string(userNpub))
|
||||
}
|
||||
// Add user npub (already encoded above)
|
||||
authorizedNpubs = append(authorizedNpubs, string(userNpub))
|
||||
|
||||
// Add relay npub
|
||||
relayNpub, err := bech32encoding.BinToNpub(sign.Pub())
|
||||
|
||||
@@ -30,8 +30,6 @@ docker run -d \
|
||||
-e ORLY_BOOTSTRAP_RELAYS=wss://profiles.nostr1.com,wss://purplepag.es,wss://relay.nostr.band,wss://relay.damus.io \
|
||||
-e ORLY_RELAY_URL=wss://orly-relay.imwald.eu \
|
||||
-e ORLY_ACL_MODE=follows \
|
||||
-e ORLY_SPIDER_MODE=follows \
|
||||
-e ORLY_SPIDER_FREQUENCY=1h \
|
||||
-e ORLY_SUBSCRIPTION_ENABLED=false \
|
||||
silberengel/next-orly:latest
|
||||
|
||||
@@ -289,7 +287,6 @@ The latest Orly relay includes several proxy improvements:
|
||||
# Essential for proxy setups
|
||||
ORLY_RELAY_URL=wss://your-domain.com # Must match your public URL
|
||||
ORLY_ACL_MODE=follows # Enable follows-based access control
|
||||
ORLY_SPIDER_MODE=follows # Enable content syncing from other relays
|
||||
ORLY_SUBSCRIPTION_ENABLED=false # Disable payment requirements
|
||||
```
|
||||
|
||||
@@ -498,7 +495,6 @@ docker logs orly-relay | grep -i "kind.*3"
|
||||
### **Spider and ACL Features**
|
||||
|
||||
- ✅ Follows-based access control (`ORLY_ACL_MODE=follows`)
|
||||
- ✅ Content syncing from other relays (`ORLY_SPIDER_MODE=follows`)
|
||||
- ✅ No payment requirements (`ORLY_SUBSCRIPTION_ENABLED=false`)
|
||||
|
||||
### **Production Ready**
|
||||
|
||||
@@ -26,7 +26,6 @@ services:
|
||||
|
||||
# ACL and Spider Configuration
|
||||
- ORLY_ACL_MODE=follows
|
||||
- ORLY_SPIDER_MODE=follows
|
||||
|
||||
# Bootstrap relay URLs for initial sync
|
||||
- ORLY_BOOTSTRAP_RELAYS=wss://profiles.nostr1.com,wss://purplepag.es,wss://relay.nostr.band,wss://relay.damus.io
|
||||
|
||||
@@ -17,6 +17,7 @@ The policy configuration is loaded from `$HOME/.config/ORLY/policy.json`. See `d
|
||||
|
||||
```json
|
||||
{
|
||||
"default_policy": "allow",
|
||||
"kind": {
|
||||
"whitelist": [1, 3, 5, 7, 9735],
|
||||
"blacklist": []
|
||||
@@ -48,6 +49,17 @@ The policy configuration is loaded from `$HOME/.config/ORLY/policy.json`. See `d
|
||||
}
|
||||
```
|
||||
|
||||
### Default Policy
|
||||
|
||||
The `default_policy` field determines the default behavior when no specific rules deny an event:
|
||||
|
||||
- `"allow"` (default): Events are allowed unless explicitly denied by rules
|
||||
- `"deny"`: Events are denied unless explicitly allowed by rules
|
||||
|
||||
This applies to:
|
||||
- Events of whitelisted kinds that have no specific rules
|
||||
- Events that pass all other policy checks but have no explicit allow/deny decision
|
||||
|
||||
### Policy Evaluation Order
|
||||
|
||||
The policy system evaluates events in the following order:
|
||||
@@ -56,6 +68,7 @@ The policy system evaluates events in the following order:
|
||||
2. **Kinds Filtering** - Whitelist/blacklist by event kind
|
||||
3. **Kind-specific Rules** - Rules for specific event kinds
|
||||
4. **Script Rules** - Custom script logic (if enabled)
|
||||
5. **Default Policy** - Applied when no rules make a decision
|
||||
|
||||
### Global Rules
|
||||
|
||||
@@ -173,17 +186,41 @@ When policy is enabled, every EVENT envelope is checked using `CheckPolicy("writ
|
||||
|
||||
When policy is enabled, every event returned in REQ responses is filtered using `CheckPolicy("read", event, loggedInPubkey, ipAddress)` before being sent to the client. The same evaluation order applies for read access.
|
||||
|
||||
## Error Handling
|
||||
## Script Resilience
|
||||
|
||||
- If policy script fails or times out, events are allowed by default
|
||||
- If policy configuration is invalid, default policy (allow all) is used
|
||||
- Policy script failures are logged but don't block relay operation
|
||||
The policy system is designed to be resilient to script failures:
|
||||
|
||||
### Automatic Recovery
|
||||
- Policy scripts are automatically restarted if they crash or fail to load
|
||||
- The system continuously monitors script health and attempts recovery every 60 seconds (1 minute)
|
||||
- Script failures don't disable the entire policy system
|
||||
|
||||
### Fallback Behavior
|
||||
When a policy script fails or is not running:
|
||||
- Events that would have been processed by the script fall back to the `default_policy`
|
||||
- The system logs which policy rule is inactive and the fallback behavior
|
||||
- Other policy rules (global, kinds, non-script rules) continue to function normally
|
||||
|
||||
### Error Handling
|
||||
|
||||
- If policy script fails or times out, events fall back to `default_policy` setting
|
||||
- If policy configuration is invalid, default policy (allow all) is used
|
||||
- Policy script failures are logged with specific rule information but don't block relay operation
|
||||
|
||||
## Monitoring
|
||||
|
||||
Policy decisions are logged at debug level:
|
||||
Policy decisions and script health are logged:
|
||||
|
||||
### Policy Decisions
|
||||
- `policy allowed event <id>`
|
||||
- `policy rejected event <id>`
|
||||
|
||||
### Script Health
|
||||
- `policy rule for kind <N> is inactive (script not running), falling back to default policy (<policy>)`
|
||||
- `policy rule for kind <N> failed (script processing error: <error>), falling back to default policy (<policy>)`
|
||||
- `policy rule for kind <N> returned unknown action '<action>', falling back to default policy (<policy>)`
|
||||
- `policy script not found at <path>, will retry periodically`
|
||||
- `policy script crashed - events will fall back to default policy until restart`
|
||||
- `policy filtered out event <id> for read access`
|
||||
|
||||
## Best Practices
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
{
|
||||
"default_policy": "allow",
|
||||
"kind": {
|
||||
"whitelist": [0, 1, 3, 4, 5, 6, 7, 40, 41, 42, 43, 44, 9735],
|
||||
"blacklist": []
|
||||
|
||||
3
go.mod
3
go.mod
@@ -20,7 +20,7 @@ require (
|
||||
golang.org/x/lint v0.0.0-20241112194109-818c5a804067
|
||||
golang.org/x/net v0.44.0
|
||||
honnef.co/go/tools v0.6.1
|
||||
lol.mleku.dev v1.0.3
|
||||
lol.mleku.dev v1.0.4
|
||||
lukechampine.com/frand v1.5.1
|
||||
)
|
||||
|
||||
@@ -35,7 +35,6 @@ require (
|
||||
github.com/google/flatbuffers v25.9.23+incompatible // indirect
|
||||
github.com/google/pprof v0.0.0-20251002213607-436353cc1ee6 // indirect
|
||||
github.com/klauspost/compress v1.18.0 // indirect
|
||||
github.com/nostr-dev-kit/ndk v0.0.0-20251010140307-0653d6e69923 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/templexxx/cpu v0.1.1 // indirect
|
||||
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
|
||||
|
||||
6
go.sum
6
go.sum
@@ -60,8 +60,6 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
|
||||
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
||||
github.com/nostr-dev-kit/ndk v0.0.0-20251010140307-0653d6e69923 h1:N+sorUpSXhIxJeJ4A81SC3UTwo4S+BL3ECB/QSYS5qE=
|
||||
github.com/nostr-dev-kit/ndk v0.0.0-20251010140307-0653d6e69923/go.mod h1:g76mM+6X3X2E9gM9VP+1I9arcSIhCLwknT1HAXJA+Z8=
|
||||
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
|
||||
github.com/pkg/profile v1.7.0 h1:hnbDkaNWPCLMO9wGLdBFTIZvzDrDfBM2072E1S9gJkA=
|
||||
github.com/pkg/profile v1.7.0/go.mod h1:8Uer0jas47ZQMJ7VD+OHknK4YDY07LPUC6dEvqDjvNo=
|
||||
@@ -138,7 +136,7 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
honnef.co/go/tools v0.6.1 h1:R094WgE8K4JirYjBaOpz/AvTyUu/3wbmAoskKN/pxTI=
|
||||
honnef.co/go/tools v0.6.1/go.mod h1:3puzxxljPCe8RGJX7BIy1plGbxEOZni5mR2aXe3/uk4=
|
||||
lol.mleku.dev v1.0.3 h1:IrqLd/wFRghu6MX7mgyKh//3VQiId2AM4RdCbFqSLnY=
|
||||
lol.mleku.dev v1.0.3/go.mod h1:DQ0WnmkntA9dPLCXgvtIgYt5G0HSqx3wSTLolHgWeLA=
|
||||
lol.mleku.dev v1.0.4 h1:SOngs7erj8J3nXz673kYFgXQHFO+jkCI1E2iOlpyzV8=
|
||||
lol.mleku.dev v1.0.4/go.mod h1:DQ0WnmkntA9dPLCXgvtIgYt5G0HSqx3wSTLolHgWeLA=
|
||||
lukechampine.com/frand v1.5.1 h1:fg0eRtdmGFIxhP5zQJzM1lFDbD6CUfu/f+7WgAZd5/w=
|
||||
lukechampine.com/frand v1.5.1/go.mod h1:4VstaWc2plN4Mjr10chUD46RAVGWhpkZ5Nja8+Azp0Q=
|
||||
|
||||
7
main.go
7
main.go
@@ -22,7 +22,6 @@ import (
|
||||
"next.orly.dev/pkg/crypto/keys"
|
||||
"next.orly.dev/pkg/database"
|
||||
"next.orly.dev/pkg/encoders/hex"
|
||||
"next.orly.dev/pkg/spider"
|
||||
"next.orly.dev/pkg/utils/interrupt"
|
||||
"next.orly.dev/pkg/version"
|
||||
)
|
||||
@@ -290,12 +289,6 @@ func main() {
|
||||
}
|
||||
acl.Registry.Syncer()
|
||||
|
||||
// Initialize and start spider functionality if enabled
|
||||
spiderCtx, spiderCancel := context.WithCancel(ctx)
|
||||
spiderInstance := spider.New(db, cfg, spiderCtx, spiderCancel)
|
||||
spiderInstance.Start()
|
||||
defer spiderInstance.Stop()
|
||||
|
||||
// Start HTTP pprof server if enabled
|
||||
if cfg.PprofHTTP {
|
||||
pprofAddr := fmt.Sprintf("%s:%d", cfg.Listen, 6060)
|
||||
|
||||
@@ -7,8 +7,9 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"encoding/json"
|
||||
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"next.orly.dev/pkg/encoders/json"
|
||||
)
|
||||
|
||||
type Subscription struct {
|
||||
@@ -192,7 +193,7 @@ func (d *D) GetPaymentHistory(pubkey []byte) ([]Payment, error) {
|
||||
// IsFirstTimeUser checks if a user is logging in for the first time and marks them as seen
|
||||
func (d *D) IsFirstTimeUser(pubkey []byte) (bool, error) {
|
||||
key := fmt.Sprintf("firstlogin:%s", hex.EncodeToString(pubkey))
|
||||
|
||||
|
||||
isFirstTime := false
|
||||
err := d.DB.Update(
|
||||
func(txn *badger.Txn) error {
|
||||
@@ -212,6 +213,6 @@ func (d *D) IsFirstTimeUser(pubkey []byte) (bool, error) {
|
||||
return err // Return any other error as-is
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
return isFirstTime, err
|
||||
}
|
||||
|
||||
@@ -24,9 +24,6 @@ import (
|
||||
// encode <, >, and & characters due to legacy bullcrap in the encoding/json
|
||||
// library. Either call MarshalJSON directly or use a json.Encoder with html
|
||||
// escaping disabled.
|
||||
//
|
||||
// Or import "next.orly.dev/pkg/encoders/json" and use json.Marshal which is the
|
||||
// same as go 1.25 json v1 except with this one stupidity removed.
|
||||
type E struct {
|
||||
|
||||
// ID is the SHA256 hash of the canonical encoding of the event in binary
|
||||
@@ -89,7 +86,7 @@ func (ev *E) Clone() *E {
|
||||
CreatedAt: ev.CreatedAt,
|
||||
Kind: ev.Kind,
|
||||
}
|
||||
|
||||
|
||||
// Deep copy all byte slices with independent memory
|
||||
if ev.ID != nil {
|
||||
clone.ID = make([]byte, len(ev.ID))
|
||||
@@ -107,7 +104,7 @@ func (ev *E) Clone() *E {
|
||||
clone.Sig = make([]byte, len(ev.Sig))
|
||||
copy(clone.Sig, ev.Sig)
|
||||
}
|
||||
|
||||
|
||||
// Deep copy tags
|
||||
if ev.Tags != nil {
|
||||
clone.Tags = tag.NewS()
|
||||
@@ -124,7 +121,7 @@ func (ev *E) Clone() *E {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return clone
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
"lukechampine.com/frand"
|
||||
"next.orly.dev/pkg/encoders/event/examples"
|
||||
"next.orly.dev/pkg/encoders/hex"
|
||||
"next.orly.dev/pkg/encoders/json"
|
||||
"encoding/json"
|
||||
"next.orly.dev/pkg/encoders/tag"
|
||||
"next.orly.dev/pkg/utils"
|
||||
"next.orly.dev/pkg/utils/bufpool"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,50 +0,0 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !goexperiment.jsonv2
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// foldName returns a folded string such that foldName(x) == foldName(y)
|
||||
// is identical to bytes.EqualFold(x, y).
|
||||
func foldName(in []byte) []byte {
|
||||
// This is inlinable to take advantage of "function outlining".
|
||||
var arr [32]byte // large enough for most JSON names
|
||||
return appendFoldedName(arr[:0], in)
|
||||
}
|
||||
|
||||
func appendFoldedName(out, in []byte) []byte {
|
||||
for i := 0; i < len(in); {
|
||||
// Handle single-byte ASCII.
|
||||
if c := in[i]; c < utf8.RuneSelf {
|
||||
if 'a' <= c && c <= 'z' {
|
||||
c -= 'a' - 'A'
|
||||
}
|
||||
out = append(out, c)
|
||||
i++
|
||||
continue
|
||||
}
|
||||
// Handle multi-byte Unicode.
|
||||
r, n := utf8.DecodeRune(in[i:])
|
||||
out = utf8.AppendRune(out, foldRune(r))
|
||||
i += n
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// foldRune is returns the smallest rune for all runes in the same fold set.
|
||||
func foldRune(r rune) rune {
|
||||
for {
|
||||
r2 := unicode.SimpleFold(r)
|
||||
if r2 <= r {
|
||||
return r2
|
||||
}
|
||||
r = r2
|
||||
}
|
||||
}
|
||||
@@ -1,184 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !goexperiment.jsonv2
|
||||
|
||||
package json
|
||||
|
||||
import "bytes"
|
||||
|
||||
// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
|
||||
// characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
|
||||
// so that the JSON will be safe to embed inside HTML <script> tags.
|
||||
// For historical reasons, web browsers don't honor standard HTML
|
||||
// escaping within <script> tags, so an alternative JSON encoding must be used.
|
||||
func HTMLEscape(dst *bytes.Buffer, src []byte) {
|
||||
dst.Grow(len(src))
|
||||
dst.Write(appendHTMLEscape(dst.AvailableBuffer(), src))
|
||||
}
|
||||
|
||||
func appendHTMLEscape(dst, src []byte) []byte {
|
||||
// The characters can only appear in string literals,
|
||||
// so just scan the string one byte at a time.
|
||||
start := 0
|
||||
for i, c := range src {
|
||||
if c == '<' || c == '>' || c == '&' {
|
||||
dst = append(dst, src[start:i]...)
|
||||
dst = append(dst, '\\', 'u', '0', '0', hex[c>>4], hex[c&0xF])
|
||||
start = i + 1
|
||||
}
|
||||
// Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9).
|
||||
if c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 {
|
||||
dst = append(dst, src[start:i]...)
|
||||
dst = append(dst, '\\', 'u', '2', '0', '2', hex[src[i+2]&0xF])
|
||||
start = i + len("\u2029")
|
||||
}
|
||||
}
|
||||
return append(dst, src[start:]...)
|
||||
}
|
||||
|
||||
// Compact appends to dst the JSON-encoded src with
|
||||
// insignificant space characters elided.
|
||||
func Compact(dst *bytes.Buffer, src []byte) error {
|
||||
dst.Grow(len(src))
|
||||
b := dst.AvailableBuffer()
|
||||
b, err := appendCompact(b, src, false)
|
||||
dst.Write(b)
|
||||
return err
|
||||
}
|
||||
|
||||
func appendCompact(dst, src []byte, escape bool) ([]byte, error) {
|
||||
origLen := len(dst)
|
||||
scan := newScanner()
|
||||
defer freeScanner(scan)
|
||||
start := 0
|
||||
for i, c := range src {
|
||||
if escape && (c == '<' || c == '>' || c == '&') {
|
||||
if start < i {
|
||||
dst = append(dst, src[start:i]...)
|
||||
}
|
||||
dst = append(dst, '\\', 'u', '0', '0', hex[c>>4], hex[c&0xF])
|
||||
start = i + 1
|
||||
}
|
||||
// Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9).
|
||||
if escape && c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 {
|
||||
if start < i {
|
||||
dst = append(dst, src[start:i]...)
|
||||
}
|
||||
dst = append(dst, '\\', 'u', '2', '0', '2', hex[src[i+2]&0xF])
|
||||
start = i + 3
|
||||
}
|
||||
v := scan.step(scan, c)
|
||||
if v >= scanSkipSpace {
|
||||
if v == scanError {
|
||||
break
|
||||
}
|
||||
if start < i {
|
||||
dst = append(dst, src[start:i]...)
|
||||
}
|
||||
start = i + 1
|
||||
}
|
||||
}
|
||||
if scan.eof() == scanError {
|
||||
return dst[:origLen], scan.err
|
||||
}
|
||||
if start < len(src) {
|
||||
dst = append(dst, src[start:]...)
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
func appendNewline(dst []byte, prefix, indent string, depth int) []byte {
|
||||
dst = append(dst, '\n')
|
||||
dst = append(dst, prefix...)
|
||||
for i := 0; i < depth; i++ {
|
||||
dst = append(dst, indent...)
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// indentGrowthFactor specifies the growth factor of indenting JSON input.
|
||||
// Empirically, the growth factor was measured to be between 1.4x to 1.8x
|
||||
// for some set of compacted JSON with the indent being a single tab.
|
||||
// Specify a growth factor slightly larger than what is observed
|
||||
// to reduce probability of allocation in appendIndent.
|
||||
// A factor no higher than 2 ensures that wasted space never exceeds 50%.
|
||||
const indentGrowthFactor = 2
|
||||
|
||||
// Indent appends to dst an indented form of the JSON-encoded src.
|
||||
// Each element in a JSON object or array begins on a new,
|
||||
// indented line beginning with prefix followed by one or more
|
||||
// copies of indent according to the indentation nesting.
|
||||
// The data appended to dst does not begin with the prefix nor
|
||||
// any indentation, to make it easier to embed inside other formatted JSON data.
|
||||
// Although leading space characters (space, tab, carriage return, newline)
|
||||
// at the beginning of src are dropped, trailing space characters
|
||||
// at the end of src are preserved and copied to dst.
|
||||
// For example, if src has no trailing spaces, neither will dst;
|
||||
// if src ends in a trailing newline, so will dst.
|
||||
func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error {
|
||||
dst.Grow(indentGrowthFactor * len(src))
|
||||
b := dst.AvailableBuffer()
|
||||
b, err := appendIndent(b, src, prefix, indent)
|
||||
dst.Write(b)
|
||||
return err
|
||||
}
|
||||
|
||||
func appendIndent(dst, src []byte, prefix, indent string) ([]byte, error) {
|
||||
origLen := len(dst)
|
||||
scan := newScanner()
|
||||
defer freeScanner(scan)
|
||||
needIndent := false
|
||||
depth := 0
|
||||
for _, c := range src {
|
||||
scan.bytes++
|
||||
v := scan.step(scan, c)
|
||||
if v == scanSkipSpace {
|
||||
continue
|
||||
}
|
||||
if v == scanError {
|
||||
break
|
||||
}
|
||||
if needIndent && v != scanEndObject && v != scanEndArray {
|
||||
needIndent = false
|
||||
depth++
|
||||
dst = appendNewline(dst, prefix, indent, depth)
|
||||
}
|
||||
|
||||
// Emit semantically uninteresting bytes
|
||||
// (in particular, punctuation in strings) unmodified.
|
||||
if v == scanContinue {
|
||||
dst = append(dst, c)
|
||||
continue
|
||||
}
|
||||
|
||||
// Add spacing around real punctuation.
|
||||
switch c {
|
||||
case '{', '[':
|
||||
// delay indent so that empty object and array are formatted as {} and [].
|
||||
needIndent = true
|
||||
dst = append(dst, c)
|
||||
case ',':
|
||||
dst = append(dst, c)
|
||||
dst = appendNewline(dst, prefix, indent, depth)
|
||||
case ':':
|
||||
dst = append(dst, c, ' ')
|
||||
case '}', ']':
|
||||
if needIndent {
|
||||
// suppress indent in empty object/array
|
||||
needIndent = false
|
||||
} else {
|
||||
depth--
|
||||
dst = appendNewline(dst, prefix, indent, depth)
|
||||
}
|
||||
dst = append(dst, c)
|
||||
default:
|
||||
dst = append(dst, c)
|
||||
}
|
||||
}
|
||||
if scan.eof() == scanError {
|
||||
return dst[:origLen], scan.err
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package internal
|
||||
|
||||
import "errors"
|
||||
|
||||
// NotForPublicUse is a marker type that an API is for internal use only.
|
||||
// It does not perfectly prevent usage of that API, but helps to restrict usage.
|
||||
// Anything with this marker is not covered by the Go compatibility agreement.
|
||||
type NotForPublicUse struct{}
|
||||
|
||||
// AllowInternalUse is passed from "json" to "jsontext" to authenticate
|
||||
// that the caller can have access to internal functionality.
|
||||
var AllowInternalUse NotForPublicUse
|
||||
|
||||
// Sentinel error values internally shared between jsonv1 and jsonv2.
|
||||
var (
|
||||
ErrCycle = errors.New("encountered a cycle")
|
||||
ErrNonNilReference = errors.New("value must be passed as a non-nil pointer reference")
|
||||
)
|
||||
|
||||
var (
|
||||
// TransformMarshalError converts a v2 error into a v1 error.
|
||||
// It is called only at the top-level of a Marshal function.
|
||||
TransformMarshalError func(any, error) error
|
||||
// NewMarshalerError constructs a jsonv1.MarshalerError.
|
||||
// It is called after a user-defined Marshal method/function fails.
|
||||
NewMarshalerError func(any, error, string) error
|
||||
// TransformUnmarshalError converts a v2 error into a v1 error.
|
||||
// It is called only at the top-level of a Unmarshal function.
|
||||
TransformUnmarshalError func(any, error) error
|
||||
|
||||
// NewRawNumber returns new(jsonv1.Number).
|
||||
NewRawNumber func() any
|
||||
// RawNumberOf returns jsonv1.Number(b).
|
||||
RawNumberOf func(b []byte) any
|
||||
)
|
||||
@@ -1,215 +0,0 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
// jsonflags implements all the optional boolean flags.
|
||||
// These flags are shared across both "json", "jsontext", and "jsonopts".
|
||||
package jsonflags
|
||||
|
||||
import "next.orly.dev/pkg/json/internal"
|
||||
|
||||
// Bools represents zero or more boolean flags, all set to true or false.
|
||||
// The least-significant bit is the boolean value of all flags in the set.
|
||||
// The remaining bits identify which particular flags.
|
||||
//
|
||||
// In common usage, this is OR'd with 0 or 1. For example:
|
||||
// - (AllowInvalidUTF8 | 0) means "AllowInvalidUTF8 is false"
|
||||
// - (Multiline | Indent | 1) means "Multiline and Indent are true"
|
||||
type Bools uint64
|
||||
|
||||
func (Bools) JSONOptions(internal.NotForPublicUse) {}
|
||||
|
||||
const (
|
||||
// AllFlags is the set of all flags.
|
||||
AllFlags = AllCoderFlags | AllArshalV2Flags | AllArshalV1Flags
|
||||
|
||||
// AllCoderFlags is the set of all encoder/decoder flags.
|
||||
AllCoderFlags = (maxCoderFlag - 1) - initFlag
|
||||
|
||||
// AllArshalV2Flags is the set of all v2 marshal/unmarshal flags.
|
||||
AllArshalV2Flags = (maxArshalV2Flag - 1) - (maxCoderFlag - 1)
|
||||
|
||||
// AllArshalV1Flags is the set of all v1 marshal/unmarshal flags.
|
||||
AllArshalV1Flags = (maxArshalV1Flag - 1) - (maxArshalV2Flag - 1)
|
||||
|
||||
// NonBooleanFlags is the set of non-boolean flags,
|
||||
// where the value is some other concrete Go type.
|
||||
// The value of the flag is stored within jsonopts.Struct.
|
||||
NonBooleanFlags = 0 |
|
||||
Indent |
|
||||
IndentPrefix |
|
||||
ByteLimit |
|
||||
DepthLimit |
|
||||
Marshalers |
|
||||
Unmarshalers
|
||||
|
||||
// DefaultV1Flags is the set of booleans flags that default to true under
|
||||
// v1 semantics. None of the non-boolean flags differ between v1 and v2.
|
||||
DefaultV1Flags = 0 |
|
||||
AllowDuplicateNames |
|
||||
AllowInvalidUTF8 |
|
||||
EscapeForHTML |
|
||||
EscapeForJS |
|
||||
PreserveRawStrings |
|
||||
Deterministic |
|
||||
FormatNilMapAsNull |
|
||||
FormatNilSliceAsNull |
|
||||
MatchCaseInsensitiveNames |
|
||||
CallMethodsWithLegacySemantics |
|
||||
FormatByteArrayAsArray |
|
||||
FormatBytesWithLegacySemantics |
|
||||
FormatDurationAsNano |
|
||||
MatchCaseSensitiveDelimiter |
|
||||
MergeWithLegacySemantics |
|
||||
OmitEmptyWithLegacySemantics |
|
||||
ParseBytesWithLooseRFC4648 |
|
||||
ParseTimeWithLooseRFC3339 |
|
||||
ReportErrorsWithLegacySemantics |
|
||||
StringifyWithLegacySemantics |
|
||||
UnmarshalArrayFromAnyLength
|
||||
|
||||
// AnyWhitespace reports whether the encoded output might have any whitespace.
|
||||
AnyWhitespace = Multiline | SpaceAfterColon | SpaceAfterComma
|
||||
|
||||
// WhitespaceFlags is the set of flags related to whitespace formatting.
|
||||
// In contrast to AnyWhitespace, this includes Indent and IndentPrefix
|
||||
// as those settings take no effect if Multiline is false.
|
||||
WhitespaceFlags = AnyWhitespace | Indent | IndentPrefix
|
||||
|
||||
// AnyEscape is the set of flags related to escaping in a JSON string.
|
||||
AnyEscape = EscapeForHTML | EscapeForJS
|
||||
|
||||
// CanonicalizeNumbers is the set of flags related to raw number canonicalization.
|
||||
CanonicalizeNumbers = CanonicalizeRawInts | CanonicalizeRawFloats
|
||||
)
|
||||
|
||||
// Encoder and decoder flags.
|
||||
const (
|
||||
initFlag Bools = 1 << iota // reserved for the boolean value itself
|
||||
|
||||
AllowDuplicateNames // encode or decode
|
||||
AllowInvalidUTF8 // encode or decode
|
||||
WithinArshalCall // encode or decode; for internal use by json.Marshal and json.Unmarshal
|
||||
OmitTopLevelNewline // encode only; for internal use by json.Marshal and json.MarshalWrite
|
||||
PreserveRawStrings // encode only
|
||||
CanonicalizeRawInts // encode only
|
||||
CanonicalizeRawFloats // encode only
|
||||
ReorderRawObjects // encode only
|
||||
EscapeForHTML // encode only
|
||||
EscapeForJS // encode only
|
||||
Multiline // encode only
|
||||
SpaceAfterColon // encode only
|
||||
SpaceAfterComma // encode only
|
||||
Indent // encode only; non-boolean flag
|
||||
IndentPrefix // encode only; non-boolean flag
|
||||
ByteLimit // encode or decode; non-boolean flag
|
||||
DepthLimit // encode or decode; non-boolean flag
|
||||
|
||||
maxCoderFlag
|
||||
)
|
||||
|
||||
// Marshal and Unmarshal flags (for v2).
|
||||
const (
|
||||
_ Bools = (maxCoderFlag >> 1) << iota
|
||||
|
||||
StringifyNumbers // marshal or unmarshal
|
||||
Deterministic // marshal only
|
||||
FormatNilMapAsNull // marshal only
|
||||
FormatNilSliceAsNull // marshal only
|
||||
OmitZeroStructFields // marshal only
|
||||
MatchCaseInsensitiveNames // marshal or unmarshal
|
||||
DiscardUnknownMembers // marshal only
|
||||
RejectUnknownMembers // unmarshal only
|
||||
Marshalers // marshal only; non-boolean flag
|
||||
Unmarshalers // unmarshal only; non-boolean flag
|
||||
|
||||
maxArshalV2Flag
|
||||
)
|
||||
|
||||
// Marshal and Unmarshal flags (for v1).
|
||||
const (
|
||||
_ Bools = (maxArshalV2Flag >> 1) << iota
|
||||
|
||||
CallMethodsWithLegacySemantics // marshal or unmarshal
|
||||
FormatByteArrayAsArray // marshal or unmarshal
|
||||
FormatBytesWithLegacySemantics // marshal or unmarshal
|
||||
FormatDurationAsNano // marshal or unmarshal
|
||||
MatchCaseSensitiveDelimiter // marshal or unmarshal
|
||||
MergeWithLegacySemantics // unmarshal
|
||||
OmitEmptyWithLegacySemantics // marshal
|
||||
ParseBytesWithLooseRFC4648 // unmarshal
|
||||
ParseTimeWithLooseRFC3339 // unmarshal
|
||||
ReportErrorsWithLegacySemantics // marshal or unmarshal
|
||||
StringifyWithLegacySemantics // marshal or unmarshal
|
||||
StringifyBoolsAndStrings // marshal or unmarshal; for internal use by jsonv2.makeStructArshaler
|
||||
UnmarshalAnyWithRawNumber // unmarshal; for internal use by jsonv1.Decoder.UseNumber
|
||||
UnmarshalArrayFromAnyLength // unmarshal
|
||||
|
||||
maxArshalV1Flag
|
||||
)
|
||||
|
||||
// bitsUsed is the number of bits used in the 64-bit boolean flags
|
||||
const bitsUsed = 42
|
||||
|
||||
// Static compile check that bitsUsed and maxArshalV1Flag are in sync.
|
||||
const _ = uint64((1<<bitsUsed)-maxArshalV1Flag) + uint64(maxArshalV1Flag-(1<<bitsUsed))
|
||||
|
||||
// Flags is a set of boolean flags.
|
||||
// If the presence bit is zero, then the value bit must also be zero.
|
||||
// The least-significant bit of both fields is always zero.
|
||||
//
|
||||
// Unlike Bools, which can represent a set of bools that are all true or false,
|
||||
// Flags represents a set of bools, each individually may be true or false.
|
||||
type Flags struct{ Presence, Values uint64 }
|
||||
|
||||
// Join joins two sets of flags such that the latter takes precedence.
|
||||
func (dst *Flags) Join(src Flags) {
|
||||
// Copy over all source presence bits over to the destination (using OR),
|
||||
// then invert the source presence bits to clear out source value (using AND-NOT),
|
||||
// then copy over source value bits over to the destination (using OR).
|
||||
// e.g., dst := Flags{Presence: 0b_1100_0011, Value: 0b_1000_0011}
|
||||
// e.g., src := Flags{Presence: 0b_0101_1010, Value: 0b_1001_0010}
|
||||
dst.Presence |= src.Presence // e.g., 0b_1100_0011 | 0b_0101_1010 -> 0b_110_11011
|
||||
dst.Values &= ^src.Presence // e.g., 0b_1000_0011 & 0b_1010_0101 -> 0b_100_00001
|
||||
dst.Values |= src.Values // e.g., 0b_1000_0001 | 0b_1001_0010 -> 0b_100_10011
|
||||
}
|
||||
|
||||
// Set sets both the presence and value for the provided bool (or set of bools).
|
||||
func (fs *Flags) Set(f Bools) {
|
||||
// Select out the bits for the flag identifiers (everything except LSB),
|
||||
// then set the presence for all the identifier bits (using OR),
|
||||
// then invert the identifier bits to clear out the values (using AND-NOT),
|
||||
// then copy over all the identifier bits to the value if LSB is 1.
|
||||
// e.g., fs := Flags{Presence: 0b_0101_0010, Value: 0b_0001_0010}
|
||||
// e.g., f := 0b_1001_0001
|
||||
id := uint64(f) &^ uint64(1) // e.g., 0b_1001_0001 & 0b_1111_1110 -> 0b_1001_0000
|
||||
fs.Presence |= id // e.g., 0b_0101_0010 | 0b_1001_0000 -> 0b_1101_0011
|
||||
fs.Values &= ^id // e.g., 0b_0001_0010 & 0b_0110_1111 -> 0b_0000_0010
|
||||
fs.Values |= uint64(f&1) * id // e.g., 0b_0000_0010 | 0b_1001_0000 -> 0b_1001_0010
|
||||
}
|
||||
|
||||
// Get reports whether the bool (or any of the bools) is true.
|
||||
// This is generally only used with a singular bool.
|
||||
// The value bit of f (i.e., the LSB) is ignored.
|
||||
func (fs Flags) Get(f Bools) bool {
|
||||
return fs.Values&uint64(f) > 0
|
||||
}
|
||||
|
||||
// Has reports whether the bool (or any of the bools) is set.
|
||||
// The value bit of f (i.e., the LSB) is ignored.
|
||||
func (fs Flags) Has(f Bools) bool {
|
||||
return fs.Presence&uint64(f) > 0
|
||||
}
|
||||
|
||||
// Clear clears both the presence and value for the provided bool or bools.
|
||||
// The value bit of f (i.e., the LSB) is ignored.
|
||||
func (fs *Flags) Clear(f Bools) {
|
||||
// Invert f to produce a mask to clear all bits in f (using AND).
|
||||
// e.g., fs := Flags{Presence: 0b_0101_0010, Value: 0b_0001_0010}
|
||||
// e.g., f := 0b_0001_1000
|
||||
mask := uint64(^f) // e.g., 0b_0001_1000 -> 0b_1110_0111
|
||||
fs.Presence &= mask // e.g., 0b_0101_0010 & 0b_1110_0111 -> 0b_0100_0010
|
||||
fs.Values &= mask // e.g., 0b_0001_0010 & 0b_1110_0111 -> 0b_0000_0010
|
||||
}
|
||||
@@ -1,202 +0,0 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsonopts
|
||||
|
||||
import (
|
||||
"next.orly.dev/pkg/json/internal"
|
||||
"next.orly.dev/pkg/json/internal/jsonflags"
|
||||
)
|
||||
|
||||
// Options is the common options type shared across json packages.
|
||||
type Options interface {
|
||||
// JSONOptions is exported so related json packages can implement Options.
|
||||
JSONOptions(internal.NotForPublicUse)
|
||||
}
|
||||
|
||||
// Struct is the combination of all options in struct form.
|
||||
// This is efficient to pass down the call stack and to query.
|
||||
type Struct struct {
|
||||
Flags jsonflags.Flags
|
||||
|
||||
CoderValues
|
||||
ArshalValues
|
||||
}
|
||||
|
||||
type CoderValues struct {
|
||||
Indent string // jsonflags.Indent
|
||||
IndentPrefix string // jsonflags.IndentPrefix
|
||||
ByteLimit int64 // jsonflags.ByteLimit
|
||||
DepthLimit int // jsonflags.DepthLimit
|
||||
}
|
||||
|
||||
type ArshalValues struct {
|
||||
// The Marshalers and Unmarshalers fields use the any type to avoid a
|
||||
// concrete dependency on *json.Marshalers and *json.Unmarshalers,
|
||||
// which would in turn create a dependency on the "reflect" package.
|
||||
|
||||
Marshalers any // jsonflags.Marshalers
|
||||
Unmarshalers any // jsonflags.Unmarshalers
|
||||
|
||||
Format string
|
||||
FormatDepth int
|
||||
}
|
||||
|
||||
// DefaultOptionsV2 is the set of all options that define default v2 behavior.
|
||||
var DefaultOptionsV2 = Struct{
|
||||
Flags: jsonflags.Flags{
|
||||
Presence: uint64(jsonflags.AllFlags & ^jsonflags.WhitespaceFlags),
|
||||
Values: uint64(0),
|
||||
},
|
||||
}
|
||||
|
||||
// DefaultOptionsV1 is the set of all options that define default v1 behavior.
|
||||
var DefaultOptionsV1 = Struct{
|
||||
Flags: jsonflags.Flags{
|
||||
Presence: uint64(jsonflags.AllFlags & ^jsonflags.WhitespaceFlags),
|
||||
Values: uint64(jsonflags.DefaultV1Flags),
|
||||
},
|
||||
}
|
||||
|
||||
func (*Struct) JSONOptions(internal.NotForPublicUse) {}
|
||||
|
||||
// GetUnknownOption is injected by the "json" package to handle Options
|
||||
// declared in that package so that "jsonopts" can handle them.
|
||||
var GetUnknownOption = func(Struct, Options) (any, bool) { panic("unknown option") }
|
||||
|
||||
func GetOption[T any](opts Options, setter func(T) Options) (T, bool) {
|
||||
// Collapse the options to *Struct to simplify lookup.
|
||||
structOpts, ok := opts.(*Struct)
|
||||
if !ok {
|
||||
var structOpts2 Struct
|
||||
structOpts2.Join(opts)
|
||||
structOpts = &structOpts2
|
||||
}
|
||||
|
||||
// Lookup the option based on the return value of the setter.
|
||||
var zero T
|
||||
switch opt := setter(zero).(type) {
|
||||
case jsonflags.Bools:
|
||||
v := structOpts.Flags.Get(opt)
|
||||
ok := structOpts.Flags.Has(opt)
|
||||
return any(v).(T), ok
|
||||
case Indent:
|
||||
if !structOpts.Flags.Has(jsonflags.Indent) {
|
||||
return zero, false
|
||||
}
|
||||
return any(structOpts.Indent).(T), true
|
||||
case IndentPrefix:
|
||||
if !structOpts.Flags.Has(jsonflags.IndentPrefix) {
|
||||
return zero, false
|
||||
}
|
||||
return any(structOpts.IndentPrefix).(T), true
|
||||
case ByteLimit:
|
||||
if !structOpts.Flags.Has(jsonflags.ByteLimit) {
|
||||
return zero, false
|
||||
}
|
||||
return any(structOpts.ByteLimit).(T), true
|
||||
case DepthLimit:
|
||||
if !structOpts.Flags.Has(jsonflags.DepthLimit) {
|
||||
return zero, false
|
||||
}
|
||||
return any(structOpts.DepthLimit).(T), true
|
||||
default:
|
||||
v, ok := GetUnknownOption(*structOpts, opt)
|
||||
return v.(T), ok
|
||||
}
|
||||
}
|
||||
|
||||
// JoinUnknownOption is injected by the "json" package to handle Options
|
||||
// declared in that package so that "jsonopts" can handle them.
|
||||
var JoinUnknownOption = func(Struct, Options) Struct { panic("unknown option") }
|
||||
|
||||
func (dst *Struct) Join(srcs ...Options) {
|
||||
dst.join(false, srcs...)
|
||||
}
|
||||
|
||||
func (dst *Struct) JoinWithoutCoderOptions(srcs ...Options) {
|
||||
dst.join(true, srcs...)
|
||||
}
|
||||
|
||||
func (dst *Struct) join(excludeCoderOptions bool, srcs ...Options) {
|
||||
for _, src := range srcs {
|
||||
switch src := src.(type) {
|
||||
case nil:
|
||||
continue
|
||||
case jsonflags.Bools:
|
||||
if excludeCoderOptions {
|
||||
src &= ^jsonflags.AllCoderFlags
|
||||
}
|
||||
dst.Flags.Set(src)
|
||||
case Indent:
|
||||
if excludeCoderOptions {
|
||||
continue
|
||||
}
|
||||
dst.Flags.Set(jsonflags.Multiline | jsonflags.Indent | 1)
|
||||
dst.Indent = string(src)
|
||||
case IndentPrefix:
|
||||
if excludeCoderOptions {
|
||||
continue
|
||||
}
|
||||
dst.Flags.Set(jsonflags.Multiline | jsonflags.IndentPrefix | 1)
|
||||
dst.IndentPrefix = string(src)
|
||||
case ByteLimit:
|
||||
if excludeCoderOptions {
|
||||
continue
|
||||
}
|
||||
dst.Flags.Set(jsonflags.ByteLimit | 1)
|
||||
dst.ByteLimit = int64(src)
|
||||
case DepthLimit:
|
||||
if excludeCoderOptions {
|
||||
continue
|
||||
}
|
||||
dst.Flags.Set(jsonflags.DepthLimit | 1)
|
||||
dst.DepthLimit = int(src)
|
||||
case *Struct:
|
||||
srcFlags := src.Flags // shallow copy the flags
|
||||
if excludeCoderOptions {
|
||||
srcFlags.Clear(jsonflags.AllCoderFlags)
|
||||
}
|
||||
dst.Flags.Join(srcFlags)
|
||||
if srcFlags.Has(jsonflags.NonBooleanFlags) {
|
||||
if srcFlags.Has(jsonflags.Indent) {
|
||||
dst.Indent = src.Indent
|
||||
}
|
||||
if srcFlags.Has(jsonflags.IndentPrefix) {
|
||||
dst.IndentPrefix = src.IndentPrefix
|
||||
}
|
||||
if srcFlags.Has(jsonflags.ByteLimit) {
|
||||
dst.ByteLimit = src.ByteLimit
|
||||
}
|
||||
if srcFlags.Has(jsonflags.DepthLimit) {
|
||||
dst.DepthLimit = src.DepthLimit
|
||||
}
|
||||
if srcFlags.Has(jsonflags.Marshalers) {
|
||||
dst.Marshalers = src.Marshalers
|
||||
}
|
||||
if srcFlags.Has(jsonflags.Unmarshalers) {
|
||||
dst.Unmarshalers = src.Unmarshalers
|
||||
}
|
||||
}
|
||||
default:
|
||||
*dst = JoinUnknownOption(*dst, src)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type (
|
||||
Indent string // jsontext.WithIndent
|
||||
IndentPrefix string // jsontext.WithIndentPrefix
|
||||
ByteLimit int64 // jsontext.WithByteLimit
|
||||
DepthLimit int // jsontext.WithDepthLimit
|
||||
// type for jsonflags.Marshalers declared in "json" package
|
||||
// type for jsonflags.Unmarshalers declared in "json" package
|
||||
)
|
||||
|
||||
func (Indent) JSONOptions(internal.NotForPublicUse) {}
|
||||
func (IndentPrefix) JSONOptions(internal.NotForPublicUse) {}
|
||||
func (ByteLimit) JSONOptions(internal.NotForPublicUse) {}
|
||||
func (DepthLimit) JSONOptions(internal.NotForPublicUse) {}
|
||||
@@ -1,629 +0,0 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsonwire
|
||||
|
||||
import (
|
||||
"io"
|
||||
"math"
|
||||
"slices"
|
||||
"strconv"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type ValueFlags uint
|
||||
|
||||
const (
|
||||
_ ValueFlags = (1 << iota) / 2 // powers of two starting with zero
|
||||
|
||||
stringNonVerbatim // string cannot be naively treated as valid UTF-8
|
||||
stringNonCanonical // string not formatted according to RFC 8785, section 3.2.2.2.
|
||||
// TODO: Track whether a number is a non-integer?
|
||||
)
|
||||
|
||||
func (f *ValueFlags) Join(f2 ValueFlags) { *f |= f2 }
|
||||
func (f ValueFlags) IsVerbatim() bool { return f&stringNonVerbatim == 0 }
|
||||
func (f ValueFlags) IsCanonical() bool { return f&stringNonCanonical == 0 }
|
||||
|
||||
// ConsumeWhitespace consumes leading JSON whitespace per RFC 7159, section 2.
|
||||
func ConsumeWhitespace(b []byte) (n int) {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
for len(b) > n && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
|
||||
n++
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// ConsumeNull consumes the next JSON null literal per RFC 7159, section 3.
|
||||
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
|
||||
func ConsumeNull(b []byte) int {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
const literal = "null"
|
||||
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
|
||||
return len(literal)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ConsumeFalse consumes the next JSON false literal per RFC 7159, section 3.
|
||||
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
|
||||
func ConsumeFalse(b []byte) int {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
const literal = "false"
|
||||
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
|
||||
return len(literal)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ConsumeTrue consumes the next JSON true literal per RFC 7159, section 3.
|
||||
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
|
||||
func ConsumeTrue(b []byte) int {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
const literal = "true"
|
||||
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
|
||||
return len(literal)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ConsumeLiteral consumes the next JSON literal per RFC 7159, section 3.
|
||||
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
|
||||
func ConsumeLiteral(b []byte, lit string) (n int, err error) {
|
||||
for i := 0; i < len(b) && i < len(lit); i++ {
|
||||
if b[i] != lit[i] {
|
||||
return i, NewInvalidCharacterError(b[i:], "in literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")")
|
||||
}
|
||||
}
|
||||
if len(b) < len(lit) {
|
||||
return len(b), io.ErrUnexpectedEOF
|
||||
}
|
||||
return len(lit), nil
|
||||
}
|
||||
|
||||
// ConsumeSimpleString consumes the next JSON string per RFC 7159, section 7
|
||||
// but is limited to the grammar for an ASCII string without escape sequences.
|
||||
// It returns 0 if it is invalid or more complicated than a simple string,
|
||||
// in which case consumeString should be called.
|
||||
//
|
||||
// It rejects '<', '>', and '&' for compatibility reasons since these were
|
||||
// always escaped in the v1 implementation. Thus, if this function reports
|
||||
// non-zero then we know that the string would be encoded the same way
|
||||
// under both v1 or v2 escape semantics.
|
||||
func ConsumeSimpleString(b []byte) (n int) {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if len(b) > 0 && b[0] == '"' {
|
||||
n++
|
||||
for len(b) > n && b[n] < utf8.RuneSelf && escapeASCII[b[n]] == 0 {
|
||||
n++
|
||||
}
|
||||
if uint(len(b)) > uint(n) && b[n] == '"' {
|
||||
n++
|
||||
return n
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ConsumeString consumes the next JSON string per RFC 7159, section 7.
|
||||
// If validateUTF8 is false, then this allows the presence of invalid UTF-8
|
||||
// characters within the string itself.
|
||||
// It reports the number of bytes consumed and whether an error was encountered.
|
||||
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
|
||||
func ConsumeString(flags *ValueFlags, b []byte, validateUTF8 bool) (n int, err error) {
|
||||
return ConsumeStringResumable(flags, b, 0, validateUTF8)
|
||||
}
|
||||
|
||||
// ConsumeStringResumable is identical to consumeString but supports resuming
|
||||
// from a previous call that returned io.ErrUnexpectedEOF.
|
||||
func ConsumeStringResumable(flags *ValueFlags, b []byte, resumeOffset int, validateUTF8 bool) (n int, err error) {
|
||||
// Consume the leading double quote.
|
||||
switch {
|
||||
case resumeOffset > 0:
|
||||
n = resumeOffset // already handled the leading quote
|
||||
case uint(len(b)) == 0:
|
||||
return n, io.ErrUnexpectedEOF
|
||||
case b[0] == '"':
|
||||
n++
|
||||
default:
|
||||
return n, NewInvalidCharacterError(b[n:], `at start of string (expecting '"')`)
|
||||
}
|
||||
|
||||
// Consume every character in the string.
|
||||
for uint(len(b)) > uint(n) {
|
||||
// Optimize for long sequences of unescaped characters.
|
||||
noEscape := func(c byte) bool {
|
||||
return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
|
||||
}
|
||||
for uint(len(b)) > uint(n) && noEscape(b[n]) {
|
||||
n++
|
||||
}
|
||||
if uint(len(b)) <= uint(n) {
|
||||
return n, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
// Check for terminating double quote.
|
||||
if b[n] == '"' {
|
||||
n++
|
||||
return n, nil
|
||||
}
|
||||
|
||||
switch r, rn := utf8.DecodeRune(b[n:]); {
|
||||
// Handle UTF-8 encoded byte sequence.
|
||||
// Due to specialized handling of ASCII above, we know that
|
||||
// all normal sequences at this point must be 2 bytes or larger.
|
||||
case rn > 1:
|
||||
n += rn
|
||||
// Handle escape sequence.
|
||||
case r == '\\':
|
||||
flags.Join(stringNonVerbatim)
|
||||
resumeOffset = n
|
||||
if uint(len(b)) < uint(n+2) {
|
||||
return resumeOffset, io.ErrUnexpectedEOF
|
||||
}
|
||||
switch r := b[n+1]; r {
|
||||
case '/':
|
||||
// Forward slash is the only character with 3 representations.
|
||||
// Per RFC 8785, section 3.2.2.2., this must not be escaped.
|
||||
flags.Join(stringNonCanonical)
|
||||
n += 2
|
||||
case '"', '\\', 'b', 'f', 'n', 'r', 't':
|
||||
n += 2
|
||||
case 'u':
|
||||
if uint(len(b)) < uint(n+6) {
|
||||
if hasEscapedUTF16Prefix(b[n:], false) {
|
||||
return resumeOffset, io.ErrUnexpectedEOF
|
||||
}
|
||||
flags.Join(stringNonCanonical)
|
||||
return n, NewInvalidEscapeSequenceError(b[n:])
|
||||
}
|
||||
v1, ok := parseHexUint16(b[n+2 : n+6])
|
||||
if !ok {
|
||||
flags.Join(stringNonCanonical)
|
||||
return n, NewInvalidEscapeSequenceError(b[n : n+6])
|
||||
}
|
||||
// Only certain control characters can use the \uFFFF notation
|
||||
// for canonical formatting (per RFC 8785, section 3.2.2.2.).
|
||||
switch v1 {
|
||||
// \uFFFF notation not permitted for these characters.
|
||||
case '\b', '\f', '\n', '\r', '\t':
|
||||
flags.Join(stringNonCanonical)
|
||||
default:
|
||||
// \uFFFF notation only permitted for control characters.
|
||||
if v1 >= ' ' {
|
||||
flags.Join(stringNonCanonical)
|
||||
} else {
|
||||
// \uFFFF notation must be lower case.
|
||||
for _, c := range b[n+2 : n+6] {
|
||||
if 'A' <= c && c <= 'F' {
|
||||
flags.Join(stringNonCanonical)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
n += 6
|
||||
|
||||
r := rune(v1)
|
||||
if validateUTF8 && utf16.IsSurrogate(r) {
|
||||
if uint(len(b)) < uint(n+6) {
|
||||
if hasEscapedUTF16Prefix(b[n:], true) {
|
||||
return resumeOffset, io.ErrUnexpectedEOF
|
||||
}
|
||||
flags.Join(stringNonCanonical)
|
||||
return n - 6, NewInvalidEscapeSequenceError(b[n-6:])
|
||||
} else if v2, ok := parseHexUint16(b[n+2 : n+6]); b[n] != '\\' || b[n+1] != 'u' || !ok {
|
||||
flags.Join(stringNonCanonical)
|
||||
return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6])
|
||||
} else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError {
|
||||
flags.Join(stringNonCanonical)
|
||||
return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6])
|
||||
} else {
|
||||
n += 6
|
||||
}
|
||||
}
|
||||
default:
|
||||
flags.Join(stringNonCanonical)
|
||||
return n, NewInvalidEscapeSequenceError(b[n : n+2])
|
||||
}
|
||||
// Handle invalid UTF-8.
|
||||
case r == utf8.RuneError:
|
||||
if !utf8.FullRune(b[n:]) {
|
||||
return n, io.ErrUnexpectedEOF
|
||||
}
|
||||
flags.Join(stringNonVerbatim | stringNonCanonical)
|
||||
if validateUTF8 {
|
||||
return n, ErrInvalidUTF8
|
||||
}
|
||||
n++
|
||||
// Handle invalid control characters.
|
||||
case r < ' ':
|
||||
flags.Join(stringNonVerbatim | stringNonCanonical)
|
||||
return n, NewInvalidCharacterError(b[n:], "in string (expecting non-control character)")
|
||||
default:
|
||||
panic("BUG: unhandled character " + QuoteRune(b[n:]))
|
||||
}
|
||||
}
|
||||
return n, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
// AppendUnquote appends the unescaped form of a JSON string in src to dst.
|
||||
// Any invalid UTF-8 within the string will be replaced with utf8.RuneError,
|
||||
// but the error will be specified as having encountered such an error.
|
||||
// The input must be an entire JSON string with no surrounding whitespace.
|
||||
func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) (v []byte, err error) {
|
||||
dst = slices.Grow(dst, len(src))
|
||||
|
||||
// Consume the leading double quote.
|
||||
var i, n int
|
||||
switch {
|
||||
case uint(len(src)) == 0:
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
case src[0] == '"':
|
||||
i, n = 1, 1
|
||||
default:
|
||||
return dst, NewInvalidCharacterError(src, `at start of string (expecting '"')`)
|
||||
}
|
||||
|
||||
// Consume every character in the string.
|
||||
for uint(len(src)) > uint(n) {
|
||||
// Optimize for long sequences of unescaped characters.
|
||||
noEscape := func(c byte) bool {
|
||||
return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
|
||||
}
|
||||
for uint(len(src)) > uint(n) && noEscape(src[n]) {
|
||||
n++
|
||||
}
|
||||
if uint(len(src)) <= uint(n) {
|
||||
dst = append(dst, src[i:n]...)
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
// Check for terminating double quote.
|
||||
if src[n] == '"' {
|
||||
dst = append(dst, src[i:n]...)
|
||||
n++
|
||||
if n < len(src) {
|
||||
err = NewInvalidCharacterError(src[n:], "after string value")
|
||||
}
|
||||
return dst, err
|
||||
}
|
||||
|
||||
switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); {
|
||||
// Handle UTF-8 encoded byte sequence.
|
||||
// Due to specialized handling of ASCII above, we know that
|
||||
// all normal sequences at this point must be 2 bytes or larger.
|
||||
case rn > 1:
|
||||
n += rn
|
||||
// Handle escape sequence.
|
||||
case r == '\\':
|
||||
dst = append(dst, src[i:n]...)
|
||||
|
||||
// Handle escape sequence.
|
||||
if uint(len(src)) < uint(n+2) {
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
switch r := src[n+1]; r {
|
||||
case '"', '\\', '/':
|
||||
dst = append(dst, r)
|
||||
n += 2
|
||||
case 'b':
|
||||
dst = append(dst, '\b')
|
||||
n += 2
|
||||
case 'f':
|
||||
dst = append(dst, '\f')
|
||||
n += 2
|
||||
case 'n':
|
||||
dst = append(dst, '\n')
|
||||
n += 2
|
||||
case 'r':
|
||||
dst = append(dst, '\r')
|
||||
n += 2
|
||||
case 't':
|
||||
dst = append(dst, '\t')
|
||||
n += 2
|
||||
case 'u':
|
||||
if uint(len(src)) < uint(n+6) {
|
||||
if hasEscapedUTF16Prefix(src[n:], false) {
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
return dst, NewInvalidEscapeSequenceError(src[n:])
|
||||
}
|
||||
v1, ok := parseHexUint16(src[n+2 : n+6])
|
||||
if !ok {
|
||||
return dst, NewInvalidEscapeSequenceError(src[n : n+6])
|
||||
}
|
||||
n += 6
|
||||
|
||||
// Check whether this is a surrogate half.
|
||||
r := rune(v1)
|
||||
if utf16.IsSurrogate(r) {
|
||||
r = utf8.RuneError // assume failure unless the following succeeds
|
||||
if uint(len(src)) < uint(n+6) {
|
||||
if hasEscapedUTF16Prefix(src[n:], true) {
|
||||
return utf8.AppendRune(dst, r), io.ErrUnexpectedEOF
|
||||
}
|
||||
err = NewInvalidEscapeSequenceError(src[n-6:])
|
||||
} else if v2, ok := parseHexUint16(src[n+2 : n+6]); src[n] != '\\' || src[n+1] != 'u' || !ok {
|
||||
err = NewInvalidEscapeSequenceError(src[n-6 : n+6])
|
||||
} else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError {
|
||||
err = NewInvalidEscapeSequenceError(src[n-6 : n+6])
|
||||
} else {
|
||||
n += 6
|
||||
}
|
||||
}
|
||||
|
||||
dst = utf8.AppendRune(dst, r)
|
||||
default:
|
||||
return dst, NewInvalidEscapeSequenceError(src[n : n+2])
|
||||
}
|
||||
i = n
|
||||
// Handle invalid UTF-8.
|
||||
case r == utf8.RuneError:
|
||||
dst = append(dst, src[i:n]...)
|
||||
if !utf8.FullRuneInString(string(truncateMaxUTF8(src[n:]))) {
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
// NOTE: An unescaped string may be longer than the escaped string
|
||||
// because invalid UTF-8 bytes are being replaced.
|
||||
dst = append(dst, "\uFFFD"...)
|
||||
n += rn
|
||||
i = n
|
||||
err = ErrInvalidUTF8
|
||||
// Handle invalid control characters.
|
||||
case r < ' ':
|
||||
dst = append(dst, src[i:n]...)
|
||||
return dst, NewInvalidCharacterError(src[n:], "in string (expecting non-control character)")
|
||||
default:
|
||||
panic("BUG: unhandled character " + QuoteRune(src[n:]))
|
||||
}
|
||||
}
|
||||
dst = append(dst, src[i:n]...)
|
||||
return dst, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
// hasEscapedUTF16Prefix reports whether b is possibly
|
||||
// the truncated prefix of a \uFFFF escape sequence.
|
||||
func hasEscapedUTF16Prefix[Bytes ~[]byte | ~string](b Bytes, lowerSurrogateHalf bool) bool {
|
||||
for i := range len(b) {
|
||||
switch c := b[i]; {
|
||||
case i == 0 && c != '\\':
|
||||
return false
|
||||
case i == 1 && c != 'u':
|
||||
return false
|
||||
case i == 2 && lowerSurrogateHalf && c != 'd' && c != 'D':
|
||||
return false // not within ['\uDC00':'\uDFFF']
|
||||
case i == 3 && lowerSurrogateHalf && !('c' <= c && c <= 'f') && !('C' <= c && c <= 'F'):
|
||||
return false // not within ['\uDC00':'\uDFFF']
|
||||
case i >= 2 && i < 6 && !('0' <= c && c <= '9') && !('a' <= c && c <= 'f') && !('A' <= c && c <= 'F'):
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// UnquoteMayCopy returns the unescaped form of b.
|
||||
// If there are no escaped characters, the output is simply a subslice of
|
||||
// the input with the surrounding quotes removed.
|
||||
// Otherwise, a new buffer is allocated for the output.
|
||||
// It assumes the input is valid.
|
||||
func UnquoteMayCopy(b []byte, isVerbatim bool) []byte {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if isVerbatim {
|
||||
return b[len(`"`) : len(b)-len(`"`)]
|
||||
}
|
||||
b, _ = AppendUnquote(nil, b)
|
||||
return b
|
||||
}
|
||||
|
||||
// ConsumeSimpleNumber consumes the next JSON number per RFC 7159, section 6
|
||||
// but is limited to the grammar for a positive integer.
|
||||
// It returns 0 if it is invalid or more complicated than a simple integer,
|
||||
// in which case consumeNumber should be called.
|
||||
func ConsumeSimpleNumber(b []byte) (n int) {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if len(b) > 0 {
|
||||
if b[0] == '0' {
|
||||
n++
|
||||
} else if '1' <= b[0] && b[0] <= '9' {
|
||||
n++
|
||||
for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
} else {
|
||||
return 0
|
||||
}
|
||||
if uint(len(b)) <= uint(n) || (b[n] != '.' && b[n] != 'e' && b[n] != 'E') {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type ConsumeNumberState uint
|
||||
|
||||
const (
|
||||
consumeNumberInit ConsumeNumberState = iota
|
||||
beforeIntegerDigits
|
||||
withinIntegerDigits
|
||||
beforeFractionalDigits
|
||||
withinFractionalDigits
|
||||
beforeExponentDigits
|
||||
withinExponentDigits
|
||||
)
|
||||
|
||||
// ConsumeNumber consumes the next JSON number per RFC 7159, section 6.
|
||||
// It reports the number of bytes consumed and whether an error was encountered.
|
||||
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
|
||||
//
|
||||
// Note that JSON numbers are not self-terminating.
|
||||
// If the entire input is consumed, then the caller needs to consider whether
|
||||
// there may be subsequent unread data that may still be part of this number.
|
||||
func ConsumeNumber(b []byte) (n int, err error) {
|
||||
n, _, err = ConsumeNumberResumable(b, 0, consumeNumberInit)
|
||||
return n, err
|
||||
}
|
||||
|
||||
// ConsumeNumberResumable is identical to consumeNumber but supports resuming
|
||||
// from a previous call that returned io.ErrUnexpectedEOF.
|
||||
func ConsumeNumberResumable(b []byte, resumeOffset int, state ConsumeNumberState) (n int, _ ConsumeNumberState, err error) {
|
||||
// Jump to the right state when resuming from a partial consumption.
|
||||
n = resumeOffset
|
||||
if state > consumeNumberInit {
|
||||
switch state {
|
||||
case withinIntegerDigits, withinFractionalDigits, withinExponentDigits:
|
||||
// Consume leading digits.
|
||||
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
if uint(len(b)) <= uint(n) {
|
||||
return n, state, nil // still within the same state
|
||||
}
|
||||
state++ // switches "withinX" to "beforeY" where Y is the state after X
|
||||
}
|
||||
switch state {
|
||||
case beforeIntegerDigits:
|
||||
goto beforeInteger
|
||||
case beforeFractionalDigits:
|
||||
goto beforeFractional
|
||||
case beforeExponentDigits:
|
||||
goto beforeExponent
|
||||
default:
|
||||
return n, state, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Consume required integer component (with optional minus sign).
|
||||
beforeInteger:
|
||||
resumeOffset = n
|
||||
if uint(len(b)) > 0 && b[0] == '-' {
|
||||
n++
|
||||
}
|
||||
switch {
|
||||
case uint(len(b)) <= uint(n):
|
||||
return resumeOffset, beforeIntegerDigits, io.ErrUnexpectedEOF
|
||||
case b[n] == '0':
|
||||
n++
|
||||
state = beforeFractionalDigits
|
||||
case '1' <= b[n] && b[n] <= '9':
|
||||
n++
|
||||
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
state = withinIntegerDigits
|
||||
default:
|
||||
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
|
||||
}
|
||||
|
||||
// Consume optional fractional component.
|
||||
beforeFractional:
|
||||
if uint(len(b)) > uint(n) && b[n] == '.' {
|
||||
resumeOffset = n
|
||||
n++
|
||||
switch {
|
||||
case uint(len(b)) <= uint(n):
|
||||
return resumeOffset, beforeFractionalDigits, io.ErrUnexpectedEOF
|
||||
case '0' <= b[n] && b[n] <= '9':
|
||||
n++
|
||||
default:
|
||||
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
|
||||
}
|
||||
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
state = withinFractionalDigits
|
||||
}
|
||||
|
||||
// Consume optional exponent component.
|
||||
beforeExponent:
|
||||
if uint(len(b)) > uint(n) && (b[n] == 'e' || b[n] == 'E') {
|
||||
resumeOffset = n
|
||||
n++
|
||||
if uint(len(b)) > uint(n) && (b[n] == '-' || b[n] == '+') {
|
||||
n++
|
||||
}
|
||||
switch {
|
||||
case uint(len(b)) <= uint(n):
|
||||
return resumeOffset, beforeExponentDigits, io.ErrUnexpectedEOF
|
||||
case '0' <= b[n] && b[n] <= '9':
|
||||
n++
|
||||
default:
|
||||
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
|
||||
}
|
||||
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
|
||||
n++
|
||||
}
|
||||
state = withinExponentDigits
|
||||
}
|
||||
|
||||
return n, state, nil
|
||||
}
|
||||
|
||||
// parseHexUint16 is similar to strconv.ParseUint,
|
||||
// but operates directly on []byte and is optimized for base-16.
|
||||
// See https://go.dev/issue/42429.
|
||||
func parseHexUint16[Bytes ~[]byte | ~string](b Bytes) (v uint16, ok bool) {
|
||||
if len(b) != 4 {
|
||||
return 0, false
|
||||
}
|
||||
for i := range 4 {
|
||||
c := b[i]
|
||||
switch {
|
||||
case '0' <= c && c <= '9':
|
||||
c = c - '0'
|
||||
case 'a' <= c && c <= 'f':
|
||||
c = 10 + c - 'a'
|
||||
case 'A' <= c && c <= 'F':
|
||||
c = 10 + c - 'A'
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
v = v*16 + uint16(c)
|
||||
}
|
||||
return v, true
|
||||
}
|
||||
|
||||
// ParseUint parses b as a decimal unsigned integer according to
|
||||
// a strict subset of the JSON number grammar, returning the value if valid.
|
||||
// It returns (0, false) if there is a syntax error and
|
||||
// returns (math.MaxUint64, false) if there is an overflow.
|
||||
func ParseUint(b []byte) (v uint64, ok bool) {
|
||||
const unsafeWidth = 20 // len(fmt.Sprint(uint64(math.MaxUint64)))
|
||||
var n int
|
||||
for ; len(b) > n && ('0' <= b[n] && b[n] <= '9'); n++ {
|
||||
v = 10*v + uint64(b[n]-'0')
|
||||
}
|
||||
switch {
|
||||
case n == 0 || len(b) != n || (b[0] == '0' && string(b) != "0"):
|
||||
return 0, false
|
||||
case n >= unsafeWidth && (b[0] != '1' || v < 1e19 || n > unsafeWidth):
|
||||
return math.MaxUint64, false
|
||||
}
|
||||
return v, true
|
||||
}
|
||||
|
||||
// ParseFloat parses a floating point number according to the Go float grammar.
|
||||
// Note that the JSON number grammar is a strict subset.
|
||||
//
|
||||
// If the number overflows the finite representation of a float,
|
||||
// then we return MaxFloat since any finite value will always be infinitely
|
||||
// more accurate at representing another finite value than an infinite value.
|
||||
func ParseFloat(b []byte, bits int) (v float64, ok bool) {
|
||||
fv, err := strconv.ParseFloat(string(b), bits)
|
||||
if math.IsInf(fv, 0) {
|
||||
switch {
|
||||
case bits == 32 && math.IsInf(fv, +1):
|
||||
fv = +math.MaxFloat32
|
||||
case bits == 64 && math.IsInf(fv, +1):
|
||||
fv = +math.MaxFloat64
|
||||
case bits == 32 && math.IsInf(fv, -1):
|
||||
fv = -math.MaxFloat32
|
||||
case bits == 64 && math.IsInf(fv, -1):
|
||||
fv = -math.MaxFloat64
|
||||
}
|
||||
}
|
||||
return fv, err == nil
|
||||
}
|
||||
@@ -1,290 +0,0 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsonwire
|
||||
|
||||
import (
|
||||
"math"
|
||||
"slices"
|
||||
"strconv"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
|
||||
"next.orly.dev/pkg/json/internal/jsonflags"
|
||||
)
|
||||
|
||||
// escapeASCII reports whether the ASCII character needs to be escaped.
|
||||
// It conservatively assumes EscapeForHTML.
|
||||
var escapeASCII = [...]uint8{
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
|
||||
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // escape '"' and '&'
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, // escape '<' and '>'
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // escape '\\'
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
}
|
||||
|
||||
// NeedEscape reports whether src needs escaping of any characters.
|
||||
// It conservatively assumes EscapeForHTML and EscapeForJS.
|
||||
// It reports true for inputs with invalid UTF-8.
|
||||
func NeedEscape[Bytes ~[]byte | ~string](src Bytes) bool {
|
||||
var i int
|
||||
for uint(len(src)) > uint(i) {
|
||||
if c := src[i]; c < utf8.RuneSelf {
|
||||
if escapeASCII[c] > 0 {
|
||||
return true
|
||||
}
|
||||
i++
|
||||
} else {
|
||||
r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[i:])))
|
||||
if r == utf8.RuneError || r == '\u2028' || r == '\u2029' {
|
||||
return true
|
||||
}
|
||||
i += rn
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// AppendQuote appends src to dst as a JSON string per RFC 7159, section 7.
|
||||
//
|
||||
// It takes in flags and respects the following:
|
||||
// - EscapeForHTML escapes '<', '>', and '&'.
|
||||
// - EscapeForJS escapes '\u2028' and '\u2029'.
|
||||
// - AllowInvalidUTF8 avoids reporting an error for invalid UTF-8.
|
||||
//
|
||||
// Regardless of whether AllowInvalidUTF8 is specified,
|
||||
// invalid bytes are replaced with the Unicode replacement character ('\ufffd').
|
||||
// If no escape flags are set, then the shortest representable form is used,
|
||||
// which is also the canonical form for strings (RFC 8785, section 3.2.2.2).
|
||||
func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflags.Flags) ([]byte, error) {
|
||||
var i, n int
|
||||
var hasInvalidUTF8 bool
|
||||
dst = slices.Grow(dst, len(`"`)+len(src)+len(`"`))
|
||||
dst = append(dst, '"')
|
||||
for uint(len(src)) > uint(n) {
|
||||
if c := src[n]; c < utf8.RuneSelf {
|
||||
// Handle single-byte ASCII.
|
||||
n++
|
||||
if escapeASCII[c] == 0 {
|
||||
continue // no escaping possibly needed
|
||||
}
|
||||
// Handle escaping of single-byte ASCII.
|
||||
if !(c == '<' || c == '>' || c == '&') || flags.Get(jsonflags.EscapeForHTML) {
|
||||
dst = append(dst, src[i:n-1]...)
|
||||
dst = appendEscapedASCII(dst, c)
|
||||
i = n
|
||||
}
|
||||
} else {
|
||||
// Handle multi-byte Unicode.
|
||||
r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:])))
|
||||
n += rn
|
||||
if r != utf8.RuneError && r != '\u2028' && r != '\u2029' {
|
||||
continue // no escaping possibly needed
|
||||
}
|
||||
// Handle escaping of multi-byte Unicode.
|
||||
switch {
|
||||
case isInvalidUTF8(r, rn):
|
||||
hasInvalidUTF8 = true
|
||||
dst = append(dst, src[i:n-rn]...)
|
||||
dst = append(dst, "\ufffd"...)
|
||||
i = n
|
||||
case (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS):
|
||||
dst = append(dst, src[i:n-rn]...)
|
||||
dst = appendEscapedUnicode(dst, r)
|
||||
i = n
|
||||
}
|
||||
}
|
||||
}
|
||||
dst = append(dst, src[i:n]...)
|
||||
dst = append(dst, '"')
|
||||
if hasInvalidUTF8 && !flags.Get(jsonflags.AllowInvalidUTF8) {
|
||||
return dst, ErrInvalidUTF8
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
func appendEscapedASCII(dst []byte, c byte) []byte {
|
||||
switch c {
|
||||
case '"', '\\':
|
||||
dst = append(dst, '\\', c)
|
||||
case '\b':
|
||||
dst = append(dst, "\\b"...)
|
||||
case '\f':
|
||||
dst = append(dst, "\\f"...)
|
||||
case '\n':
|
||||
dst = append(dst, "\\n"...)
|
||||
case '\r':
|
||||
dst = append(dst, "\\r"...)
|
||||
case '\t':
|
||||
dst = append(dst, "\\t"...)
|
||||
default:
|
||||
dst = appendEscapedUTF16(dst, uint16(c))
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func appendEscapedUnicode(dst []byte, r rune) []byte {
|
||||
if r1, r2 := utf16.EncodeRune(r); r1 != '\ufffd' && r2 != '\ufffd' {
|
||||
dst = appendEscapedUTF16(dst, uint16(r1))
|
||||
dst = appendEscapedUTF16(dst, uint16(r2))
|
||||
} else {
|
||||
dst = appendEscapedUTF16(dst, uint16(r))
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func appendEscapedUTF16(dst []byte, x uint16) []byte {
|
||||
const hex = "0123456789abcdef"
|
||||
return append(dst, '\\', 'u', hex[(x>>12)&0xf], hex[(x>>8)&0xf], hex[(x>>4)&0xf], hex[(x>>0)&0xf])
|
||||
}
|
||||
|
||||
// ReformatString consumes a JSON string from src and appends it to dst,
|
||||
// reformatting it if necessary according to the specified flags.
|
||||
// It returns the appended output and the number of consumed input bytes.
|
||||
func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
|
||||
// TODO: Should this update ValueFlags as input?
|
||||
var valFlags ValueFlags
|
||||
n, err := ConsumeString(&valFlags, src, !flags.Get(jsonflags.AllowInvalidUTF8))
|
||||
if err != nil {
|
||||
return dst, n, err
|
||||
}
|
||||
|
||||
// If the output requires no special escapes, and the input
|
||||
// is already in canonical form or should be preserved verbatim,
|
||||
// then directly copy the input to the output.
|
||||
if !flags.Get(jsonflags.AnyEscape) &&
|
||||
(valFlags.IsCanonical() || flags.Get(jsonflags.PreserveRawStrings)) {
|
||||
dst = append(dst, src[:n]...) // copy the string verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
|
||||
// Under [jsonflags.PreserveRawStrings], any pre-escaped sequences
|
||||
// remain escaped, however we still need to respect the
|
||||
// [jsonflags.EscapeForHTML] and [jsonflags.EscapeForJS] options.
|
||||
if flags.Get(jsonflags.PreserveRawStrings) {
|
||||
var i, lastAppendIndex int
|
||||
for i < n {
|
||||
if c := src[i]; c < utf8.RuneSelf {
|
||||
if (c == '<' || c == '>' || c == '&') && flags.Get(jsonflags.EscapeForHTML) {
|
||||
dst = append(dst, src[lastAppendIndex:i]...)
|
||||
dst = appendEscapedASCII(dst, c)
|
||||
lastAppendIndex = i + 1
|
||||
}
|
||||
i++
|
||||
} else {
|
||||
r, rn := utf8.DecodeRune(truncateMaxUTF8(src[i:]))
|
||||
if (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS) {
|
||||
dst = append(dst, src[lastAppendIndex:i]...)
|
||||
dst = appendEscapedUnicode(dst, r)
|
||||
lastAppendIndex = i + rn
|
||||
}
|
||||
i += rn
|
||||
}
|
||||
}
|
||||
return append(dst, src[lastAppendIndex:n]...), n, nil
|
||||
}
|
||||
|
||||
// The input contains characters that might need escaping,
|
||||
// unnecessary escape sequences, or invalid UTF-8.
|
||||
// Perform a round-trip unquote and quote to properly reformat
|
||||
// these sequences according the current flags.
|
||||
b, _ := AppendUnquote(nil, src[:n])
|
||||
dst, _ = AppendQuote(dst, b, flags)
|
||||
return dst, n, nil
|
||||
}
|
||||
|
||||
// AppendFloat appends src to dst as a JSON number per RFC 7159, section 6.
|
||||
// It formats numbers similar to the ES6 number-to-string conversion.
|
||||
// See https://go.dev/issue/14135.
|
||||
//
|
||||
// The output is identical to ECMA-262, 6th edition, section 7.1.12.1 and with
|
||||
// RFC 8785, section 3.2.2.3 for 64-bit floating-point numbers except for -0,
|
||||
// which is formatted as -0 instead of just 0.
|
||||
//
|
||||
// For 32-bit floating-point numbers,
|
||||
// the output is a 32-bit equivalent of the algorithm.
|
||||
// Note that ECMA-262 specifies no algorithm for 32-bit numbers.
|
||||
func AppendFloat(dst []byte, src float64, bits int) []byte {
|
||||
if bits == 32 {
|
||||
src = float64(float32(src))
|
||||
}
|
||||
|
||||
abs := math.Abs(src)
|
||||
fmt := byte('f')
|
||||
if abs != 0 {
|
||||
if bits == 64 && (float64(abs) < 1e-6 || float64(abs) >= 1e21) ||
|
||||
bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
|
||||
fmt = 'e'
|
||||
}
|
||||
}
|
||||
dst = strconv.AppendFloat(dst, src, fmt, -1, bits)
|
||||
if fmt == 'e' {
|
||||
// Clean up e-09 to e-9.
|
||||
n := len(dst)
|
||||
if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' {
|
||||
dst[n-2] = dst[n-1]
|
||||
dst = dst[:n-1]
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// ReformatNumber consumes a JSON string from src and appends it to dst,
|
||||
// canonicalizing it if specified.
|
||||
// It returns the appended output and the number of consumed input bytes.
|
||||
func ReformatNumber(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
|
||||
n, err := ConsumeNumber(src)
|
||||
if err != nil {
|
||||
return dst, n, err
|
||||
}
|
||||
if !flags.Get(jsonflags.CanonicalizeNumbers) {
|
||||
dst = append(dst, src[:n]...) // copy the number verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
|
||||
// Identify the kind of number.
|
||||
var isFloat bool
|
||||
for _, c := range src[:n] {
|
||||
if c == '.' || c == 'e' || c == 'E' {
|
||||
isFloat = true // has fraction or exponent
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Check if need to canonicalize this kind of number.
|
||||
switch {
|
||||
case string(src[:n]) == "-0":
|
||||
break // canonicalize -0 as 0 regardless of kind
|
||||
case isFloat:
|
||||
if !flags.Get(jsonflags.CanonicalizeRawFloats) {
|
||||
dst = append(dst, src[:n]...) // copy the number verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
default:
|
||||
// As an optimization, we can copy integer numbers below 2⁵³ verbatim
|
||||
// since the canonical form is always identical.
|
||||
const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10))
|
||||
if !flags.Get(jsonflags.CanonicalizeRawInts) || n < maxExactIntegerDigits {
|
||||
dst = append(dst, src[:n]...) // copy the number verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Parse and reformat the number (which uses a canonical format).
|
||||
fv, _ := strconv.ParseFloat(string(src[:n]), 64)
|
||||
switch {
|
||||
case fv == 0:
|
||||
fv = 0 // normalize negative zero as just zero
|
||||
case math.IsInf(fv, +1):
|
||||
fv = +math.MaxFloat64
|
||||
case math.IsInf(fv, -1):
|
||||
fv = -math.MaxFloat64
|
||||
}
|
||||
return AppendFloat(dst, fv, 64), n, nil
|
||||
}
|
||||
@@ -1,217 +0,0 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
// Package jsonwire implements stateless functionality for handling JSON text.
|
||||
package jsonwire
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"errors"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// TrimSuffixWhitespace trims JSON from the end of b.
|
||||
func TrimSuffixWhitespace(b []byte) []byte {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
n := len(b) - 1
|
||||
for n >= 0 && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
|
||||
n--
|
||||
}
|
||||
return b[:n+1]
|
||||
}
|
||||
|
||||
// TrimSuffixString trims a valid JSON string at the end of b.
|
||||
// The behavior is undefined if there is not a valid JSON string present.
|
||||
func TrimSuffixString(b []byte) []byte {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if len(b) > 0 && b[len(b)-1] == '"' {
|
||||
b = b[:len(b)-1]
|
||||
}
|
||||
for len(b) >= 2 && !(b[len(b)-1] == '"' && b[len(b)-2] != '\\') {
|
||||
b = b[:len(b)-1] // trim all characters except an unescaped quote
|
||||
}
|
||||
if len(b) > 0 && b[len(b)-1] == '"' {
|
||||
b = b[:len(b)-1]
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// HasSuffixByte reports whether b ends with c.
|
||||
func HasSuffixByte(b []byte, c byte) bool {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
return len(b) > 0 && b[len(b)-1] == c
|
||||
}
|
||||
|
||||
// TrimSuffixByte removes c from the end of b if it is present.
|
||||
func TrimSuffixByte(b []byte, c byte) []byte {
|
||||
// NOTE: The arguments and logic are kept simple to keep this inlinable.
|
||||
if len(b) > 0 && b[len(b)-1] == c {
|
||||
return b[:len(b)-1]
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// QuoteRune quotes the first rune in the input.
|
||||
func QuoteRune[Bytes ~[]byte | ~string](b Bytes) string {
|
||||
r, n := utf8.DecodeRuneInString(string(truncateMaxUTF8(b)))
|
||||
if r == utf8.RuneError && n == 1 {
|
||||
return `'\x` + strconv.FormatUint(uint64(b[0]), 16) + `'`
|
||||
}
|
||||
return strconv.QuoteRune(r)
|
||||
}
|
||||
|
||||
// CompareUTF16 lexicographically compares x to y according
|
||||
// to the UTF-16 codepoints of the UTF-8 encoded input strings.
|
||||
// This implements the ordering specified in RFC 8785, section 3.2.3.
|
||||
func CompareUTF16[Bytes ~[]byte | ~string](x, y Bytes) int {
|
||||
// NOTE: This is an optimized, mostly allocation-free implementation
|
||||
// of CompareUTF16Simple in wire_test.go. FuzzCompareUTF16 verifies that the
|
||||
// two implementations agree on the result of comparing any two strings.
|
||||
isUTF16Self := func(r rune) bool {
|
||||
return ('\u0000' <= r && r <= '\uD7FF') || ('\uE000' <= r && r <= '\uFFFF')
|
||||
}
|
||||
|
||||
for {
|
||||
if len(x) == 0 || len(y) == 0 {
|
||||
return cmp.Compare(len(x), len(y))
|
||||
}
|
||||
|
||||
// ASCII fast-path.
|
||||
if x[0] < utf8.RuneSelf || y[0] < utf8.RuneSelf {
|
||||
if x[0] != y[0] {
|
||||
return cmp.Compare(x[0], y[0])
|
||||
}
|
||||
x, y = x[1:], y[1:]
|
||||
continue
|
||||
}
|
||||
|
||||
// Decode next pair of runes as UTF-8.
|
||||
rx, nx := utf8.DecodeRuneInString(string(truncateMaxUTF8(x)))
|
||||
ry, ny := utf8.DecodeRuneInString(string(truncateMaxUTF8(y)))
|
||||
|
||||
selfx := isUTF16Self(rx)
|
||||
selfy := isUTF16Self(ry)
|
||||
switch {
|
||||
// The x rune is a single UTF-16 codepoint, while
|
||||
// the y rune is a surrogate pair of UTF-16 codepoints.
|
||||
case selfx && !selfy:
|
||||
ry, _ = utf16.EncodeRune(ry)
|
||||
// The y rune is a single UTF-16 codepoint, while
|
||||
// the x rune is a surrogate pair of UTF-16 codepoints.
|
||||
case selfy && !selfx:
|
||||
rx, _ = utf16.EncodeRune(rx)
|
||||
}
|
||||
if rx != ry {
|
||||
return cmp.Compare(rx, ry)
|
||||
}
|
||||
|
||||
// Check for invalid UTF-8, in which case,
|
||||
// we just perform a byte-for-byte comparison.
|
||||
if isInvalidUTF8(rx, nx) || isInvalidUTF8(ry, ny) {
|
||||
if x[0] != y[0] {
|
||||
return cmp.Compare(x[0], y[0])
|
||||
}
|
||||
}
|
||||
x, y = x[nx:], y[ny:]
|
||||
}
|
||||
}
|
||||
|
||||
// truncateMaxUTF8 truncates b such it contains at least one rune.
|
||||
//
|
||||
// The utf8 package currently lacks generic variants, which complicates
|
||||
// generic functions that operates on either []byte or string.
|
||||
// As a hack, we always call the utf8 function operating on strings,
|
||||
// but always truncate the input such that the result is identical.
|
||||
//
|
||||
// Example usage:
|
||||
//
|
||||
// utf8.DecodeRuneInString(string(truncateMaxUTF8(b)))
|
||||
//
|
||||
// Converting a []byte to a string is stack allocated since
|
||||
// truncateMaxUTF8 guarantees that the []byte is short.
|
||||
func truncateMaxUTF8[Bytes ~[]byte | ~string](b Bytes) Bytes {
|
||||
// TODO(https://go.dev/issue/56948): Remove this function and
|
||||
// instead directly call generic utf8 functions wherever used.
|
||||
if len(b) > utf8.UTFMax {
|
||||
return b[:utf8.UTFMax]
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// TODO(https://go.dev/issue/70547): Use utf8.ErrInvalid instead.
|
||||
var ErrInvalidUTF8 = errors.New("invalid UTF-8")
|
||||
|
||||
func NewInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) error {
|
||||
what := QuoteRune(prefix)
|
||||
return errors.New("invalid character " + what + " " + where)
|
||||
}
|
||||
|
||||
func NewInvalidEscapeSequenceError[Bytes ~[]byte | ~string](what Bytes) error {
|
||||
label := "escape sequence"
|
||||
if len(what) > 6 {
|
||||
label = "surrogate pair"
|
||||
}
|
||||
needEscape := strings.IndexFunc(string(what), func(r rune) bool {
|
||||
return r == '`' || r == utf8.RuneError || unicode.IsSpace(r) || !unicode.IsPrint(r)
|
||||
}) >= 0
|
||||
if needEscape {
|
||||
return errors.New("invalid " + label + " " + strconv.Quote(string(what)) + " in string")
|
||||
} else {
|
||||
return errors.New("invalid " + label + " `" + string(what) + "` in string")
|
||||
}
|
||||
}
|
||||
|
||||
// TruncatePointer optionally truncates the JSON pointer,
|
||||
// enforcing that the length roughly does not exceed n.
|
||||
func TruncatePointer(s string, n int) string {
|
||||
if len(s) <= n {
|
||||
return s
|
||||
}
|
||||
i := n / 2
|
||||
j := len(s) - n/2
|
||||
|
||||
// Avoid truncating a name if there are multiple names present.
|
||||
if k := strings.LastIndexByte(s[:i], '/'); k > 0 {
|
||||
i = k
|
||||
}
|
||||
if k := strings.IndexByte(s[j:], '/'); k >= 0 {
|
||||
j += k + len("/")
|
||||
}
|
||||
|
||||
// Avoid truncation in the middle of a UTF-8 rune.
|
||||
for i > 0 && isInvalidUTF8(utf8.DecodeLastRuneInString(s[:i])) {
|
||||
i--
|
||||
}
|
||||
for j < len(s) && isInvalidUTF8(utf8.DecodeRuneInString(s[j:])) {
|
||||
j++
|
||||
}
|
||||
|
||||
// Determine the right middle fragment to use.
|
||||
var middle string
|
||||
switch strings.Count(s[i:j], "/") {
|
||||
case 0:
|
||||
middle = "…"
|
||||
case 1:
|
||||
middle = "…/…"
|
||||
default:
|
||||
middle = "…/…/…"
|
||||
}
|
||||
if strings.HasPrefix(s[i:j], "/") && middle != "…" {
|
||||
middle = strings.TrimPrefix(middle, "…")
|
||||
}
|
||||
if strings.HasSuffix(s[i:j], "/") && middle != "…" {
|
||||
middle = strings.TrimSuffix(middle, "…")
|
||||
}
|
||||
return s[:i] + middle + s[j:]
|
||||
}
|
||||
|
||||
func isInvalidUTF8(r rune, rn int) bool {
|
||||
return r == utf8.RuneError && rn == 1
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,116 +0,0 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
// Package jsontext implements syntactic processing of JSON
|
||||
// as specified in RFC 4627, RFC 7159, RFC 7493, RFC 8259, and RFC 8785.
|
||||
// JSON is a simple data interchange format that can represent
|
||||
// primitive data types such as booleans, strings, and numbers,
|
||||
// in addition to structured data types such as objects and arrays.
|
||||
//
|
||||
// This package (encoding/json/jsontext) is experimental,
|
||||
// and not subject to the Go 1 compatibility promise.
|
||||
// It only exists when building with the GOEXPERIMENT=jsonv2 environment variable set.
|
||||
// Most users should use [encoding/json].
|
||||
//
|
||||
// The [Encoder] and [Decoder] types are used to encode or decode
|
||||
// a stream of JSON tokens or values.
|
||||
//
|
||||
// # Tokens and Values
|
||||
//
|
||||
// A JSON token refers to the basic structural elements of JSON:
|
||||
//
|
||||
// - a JSON literal (i.e., null, true, or false)
|
||||
// - a JSON string (e.g., "hello, world!")
|
||||
// - a JSON number (e.g., 123.456)
|
||||
// - a begin or end delimiter for a JSON object (i.e., '{' or '}')
|
||||
// - a begin or end delimiter for a JSON array (i.e., '[' or ']')
|
||||
//
|
||||
// A JSON token is represented by the [Token] type in Go. Technically,
|
||||
// there are two additional structural characters (i.e., ':' and ','),
|
||||
// but there is no [Token] representation for them since their presence
|
||||
// can be inferred by the structure of the JSON grammar itself.
|
||||
// For example, there must always be an implicit colon between
|
||||
// the name and value of a JSON object member.
|
||||
//
|
||||
// A JSON value refers to a complete unit of JSON data:
|
||||
//
|
||||
// - a JSON literal, string, or number
|
||||
// - a JSON object (e.g., `{"name":"value"}`)
|
||||
// - a JSON array (e.g., `[1,2,3,]`)
|
||||
//
|
||||
// A JSON value is represented by the [Value] type in Go and is a []byte
|
||||
// containing the raw textual representation of the value. There is some overlap
|
||||
// between tokens and values as both contain literals, strings, and numbers.
|
||||
// However, only a value can represent the entirety of a JSON object or array.
|
||||
//
|
||||
// The [Encoder] and [Decoder] types contain methods to read or write the next
|
||||
// [Token] or [Value] in a sequence. They maintain a state machine to validate
|
||||
// whether the sequence of JSON tokens and/or values produces a valid JSON.
|
||||
// [Options] may be passed to the [NewEncoder] or [NewDecoder] constructors
|
||||
// to configure the syntactic behavior of encoding and decoding.
|
||||
//
|
||||
// # Terminology
|
||||
//
|
||||
// The terms "encode" and "decode" are used for syntactic functionality
|
||||
// that is concerned with processing JSON based on its grammar, and
|
||||
// the terms "marshal" and "unmarshal" are used for semantic functionality
|
||||
// that determines the meaning of JSON values as Go values and vice-versa.
|
||||
// This package (i.e., [jsontext]) deals with JSON at a syntactic layer,
|
||||
// while [encoding/json/v2] deals with JSON at a semantic layer.
|
||||
// The goal is to provide a clear distinction between functionality that
|
||||
// is purely concerned with encoding versus that of marshaling.
|
||||
// For example, one can directly encode a stream of JSON tokens without
|
||||
// needing to marshal a concrete Go value representing them.
|
||||
// Similarly, one can decode a stream of JSON tokens without
|
||||
// needing to unmarshal them into a concrete Go value.
|
||||
//
|
||||
// This package uses JSON terminology when discussing JSON, which may differ
|
||||
// from related concepts in Go or elsewhere in computing literature.
|
||||
//
|
||||
// - a JSON "object" refers to an unordered collection of name/value members.
|
||||
// - a JSON "array" refers to an ordered sequence of elements.
|
||||
// - a JSON "value" refers to either a literal (i.e., null, false, or true),
|
||||
// string, number, object, or array.
|
||||
//
|
||||
// See RFC 8259 for more information.
|
||||
//
|
||||
// # Specifications
|
||||
//
|
||||
// Relevant specifications include RFC 4627, RFC 7159, RFC 7493, RFC 8259,
|
||||
// and RFC 8785. Each RFC is generally a stricter subset of another RFC.
|
||||
// In increasing order of strictness:
|
||||
//
|
||||
// - RFC 4627 and RFC 7159 do not require (but recommend) the use of UTF-8
|
||||
// and also do not require (but recommend) that object names be unique.
|
||||
// - RFC 8259 requires the use of UTF-8,
|
||||
// but does not require (but recommends) that object names be unique.
|
||||
// - RFC 7493 requires the use of UTF-8
|
||||
// and also requires that object names be unique.
|
||||
// - RFC 8785 defines a canonical representation. It requires the use of UTF-8
|
||||
// and also requires that object names be unique and in a specific ordering.
|
||||
// It specifies exactly how strings and numbers must be formatted.
|
||||
//
|
||||
// The primary difference between RFC 4627 and RFC 7159 is that the former
|
||||
// restricted top-level values to only JSON objects and arrays, while
|
||||
// RFC 7159 and subsequent RFCs permit top-level values to additionally be
|
||||
// JSON nulls, booleans, strings, or numbers.
|
||||
//
|
||||
// By default, this package operates on RFC 7493, but can be configured
|
||||
// to operate according to the other RFC specifications.
|
||||
// RFC 7493 is a stricter subset of RFC 8259 and fully compliant with it.
|
||||
// In particular, it makes specific choices about behavior that RFC 8259
|
||||
// leaves as undefined in order to ensure greater interoperability.
|
||||
//
|
||||
// # Security Considerations
|
||||
//
|
||||
// See the "Security Considerations" section in [encoding/json/v2].
|
||||
package jsontext
|
||||
|
||||
// requireKeyedLiterals can be embedded in a struct to require keyed literals.
|
||||
type requireKeyedLiterals struct{}
|
||||
|
||||
// nonComparable can be embedded in a struct to prevent comparability.
|
||||
type nonComparable [0]func()
|
||||
@@ -1,972 +0,0 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"math/bits"
|
||||
|
||||
"next.orly.dev/pkg/json/internal/jsonflags"
|
||||
"next.orly.dev/pkg/json/internal/jsonopts"
|
||||
"next.orly.dev/pkg/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
// Encoder is a streaming encoder from raw JSON tokens and values.
|
||||
// It is used to write a stream of top-level JSON values,
|
||||
// each terminated with a newline character.
|
||||
//
|
||||
// [Encoder.WriteToken] and [Encoder.WriteValue] calls may be interleaved.
|
||||
// For example, the following JSON value:
|
||||
//
|
||||
// {"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}}
|
||||
//
|
||||
// can be composed with the following calls (ignoring errors for brevity):
|
||||
//
|
||||
// e.WriteToken(BeginObject) // {
|
||||
// e.WriteToken(String("name")) // "name"
|
||||
// e.WriteToken(String("value")) // "value"
|
||||
// e.WriteValue(Value(`"array"`)) // "array"
|
||||
// e.WriteToken(BeginArray) // [
|
||||
// e.WriteToken(Null) // null
|
||||
// e.WriteToken(False) // false
|
||||
// e.WriteValue(Value("true")) // true
|
||||
// e.WriteToken(Float(3.14159)) // 3.14159
|
||||
// e.WriteToken(EndArray) // ]
|
||||
// e.WriteValue(Value(`"object"`)) // "object"
|
||||
// e.WriteValue(Value(`{"k":"v"}`)) // {"k":"v"}
|
||||
// e.WriteToken(EndObject) // }
|
||||
//
|
||||
// The above is one of many possible sequence of calls and
|
||||
// may not represent the most sensible method to call for any given token/value.
|
||||
// For example, it is probably more common to call [Encoder.WriteToken] with a string
|
||||
// for object names.
|
||||
type Encoder struct {
|
||||
s encoderState
|
||||
}
|
||||
|
||||
// encoderState is the low-level state of Encoder.
|
||||
// It has exported fields and method for use by the "json" package.
|
||||
type encoderState struct {
|
||||
state
|
||||
encodeBuffer
|
||||
jsonopts.Struct
|
||||
|
||||
SeenPointers map[any]struct{} // only used when marshaling; identical to json.seenPointers
|
||||
}
|
||||
|
||||
// encodeBuffer is a buffer split into 2 segments:
|
||||
//
|
||||
// - buf[0:len(buf)] // written (but unflushed) portion of the buffer
|
||||
// - buf[len(buf):cap(buf)] // unused portion of the buffer
|
||||
type encodeBuffer struct {
|
||||
Buf []byte // may alias wr if it is a bytes.Buffer
|
||||
|
||||
// baseOffset is added to len(buf) to obtain the absolute offset
|
||||
// relative to the start of io.Writer stream.
|
||||
baseOffset int64
|
||||
|
||||
wr io.Writer
|
||||
|
||||
// maxValue is the approximate maximum Value size passed to WriteValue.
|
||||
maxValue int
|
||||
// availBuffer is the buffer returned by the AvailableBuffer method.
|
||||
availBuffer []byte // always has zero length
|
||||
// bufStats is statistics about buffer utilization.
|
||||
// It is only used with pooled encoders in pools.go.
|
||||
bufStats bufferStatistics
|
||||
}
|
||||
|
||||
// NewEncoder constructs a new streaming encoder writing to w
|
||||
// configured with the provided options.
|
||||
// It flushes the internal buffer when the buffer is sufficiently full or
|
||||
// when a top-level value has been written.
|
||||
//
|
||||
// If w is a [bytes.Buffer], then the encoder appends directly into the buffer
|
||||
// without copying the contents from an intermediate buffer.
|
||||
func NewEncoder(w io.Writer, opts ...Options) *Encoder {
|
||||
e := new(Encoder)
|
||||
e.Reset(w, opts...)
|
||||
return e
|
||||
}
|
||||
|
||||
// Reset resets an encoder such that it is writing afresh to w and
|
||||
// configured with the provided options. Reset must not be called on
|
||||
// a Encoder passed to the [encoding/json/v2.MarshalerTo.MarshalJSONTo] method
|
||||
// or the [encoding/json/v2.MarshalToFunc] function.
|
||||
func (e *Encoder) Reset(w io.Writer, opts ...Options) {
|
||||
switch {
|
||||
case e == nil:
|
||||
panic("jsontext: invalid nil Encoder")
|
||||
case w == nil:
|
||||
panic("jsontext: invalid nil io.Writer")
|
||||
case e.s.Flags.Get(jsonflags.WithinArshalCall):
|
||||
panic("jsontext: cannot reset Encoder passed to json.MarshalerTo")
|
||||
}
|
||||
e.s.reset(nil, w, opts...)
|
||||
}
|
||||
|
||||
func (e *encoderState) reset(b []byte, w io.Writer, opts ...Options) {
|
||||
e.state.reset()
|
||||
e.encodeBuffer = encodeBuffer{Buf: b, wr: w, bufStats: e.bufStats}
|
||||
if bb, ok := w.(*bytes.Buffer); ok && bb != nil {
|
||||
e.Buf = bb.AvailableBuffer() // alias the unused buffer of bb
|
||||
}
|
||||
opts2 := jsonopts.Struct{} // avoid mutating e.Struct in case it is part of opts
|
||||
opts2.Join(opts...)
|
||||
e.Struct = opts2
|
||||
if e.Flags.Get(jsonflags.Multiline) {
|
||||
if !e.Flags.Has(jsonflags.SpaceAfterColon) {
|
||||
e.Flags.Set(jsonflags.SpaceAfterColon | 1)
|
||||
}
|
||||
if !e.Flags.Has(jsonflags.SpaceAfterComma) {
|
||||
e.Flags.Set(jsonflags.SpaceAfterComma | 0)
|
||||
}
|
||||
if !e.Flags.Has(jsonflags.Indent) {
|
||||
e.Flags.Set(jsonflags.Indent | 1)
|
||||
e.Indent = "\t"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Options returns the options used to construct the decoder and
|
||||
// may additionally contain semantic options passed to a
|
||||
// [encoding/json/v2.MarshalEncode] call.
|
||||
//
|
||||
// If operating within
|
||||
// a [encoding/json/v2.MarshalerTo.MarshalJSONTo] method call or
|
||||
// a [encoding/json/v2.MarshalToFunc] function call,
|
||||
// then the returned options are only valid within the call.
|
||||
func (e *Encoder) Options() Options {
|
||||
return &e.s.Struct
|
||||
}
|
||||
|
||||
// NeedFlush determines whether to flush at this point.
|
||||
func (e *encoderState) NeedFlush() bool {
|
||||
// NOTE: This function is carefully written to be inlinable.
|
||||
|
||||
// Avoid flushing if e.wr is nil since there is no underlying writer.
|
||||
// Flush if less than 25% of the capacity remains.
|
||||
// Flushing at some constant fraction ensures that the buffer stops growing
|
||||
// so long as the largest Token or Value fits within that unused capacity.
|
||||
return e.wr != nil && (e.Tokens.Depth() == 1 || len(e.Buf) > 3*cap(e.Buf)/4)
|
||||
}
|
||||
|
||||
// Flush flushes the buffer to the underlying io.Writer.
|
||||
// It may append a trailing newline after the top-level value.
|
||||
func (e *encoderState) Flush() error {
|
||||
if e.wr == nil || e.avoidFlush() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// In streaming mode, always emit a newline after the top-level value.
|
||||
if e.Tokens.Depth() == 1 && !e.Flags.Get(jsonflags.OmitTopLevelNewline) {
|
||||
e.Buf = append(e.Buf, '\n')
|
||||
}
|
||||
|
||||
// Inform objectNameStack that we are about to flush the buffer content.
|
||||
e.Names.copyQuotedBuffer(e.Buf)
|
||||
|
||||
// Specialize bytes.Buffer for better performance.
|
||||
if bb, ok := e.wr.(*bytes.Buffer); ok {
|
||||
// If e.buf already aliases the internal buffer of bb,
|
||||
// then the Write call simply increments the internal offset,
|
||||
// otherwise Write operates as expected.
|
||||
// See https://go.dev/issue/42986.
|
||||
n, _ := bb.Write(e.Buf) // never fails unless bb is nil
|
||||
e.baseOffset += int64(n)
|
||||
|
||||
// If the internal buffer of bytes.Buffer is too small,
|
||||
// append operations elsewhere in the Encoder may grow the buffer.
|
||||
// This would be semantically correct, but hurts performance.
|
||||
// As such, ensure 25% of the current length is always available
|
||||
// to reduce the probability that other appends must allocate.
|
||||
if avail := bb.Available(); avail < bb.Len()/4 {
|
||||
bb.Grow(avail + 1)
|
||||
}
|
||||
|
||||
e.Buf = bb.AvailableBuffer()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Flush the internal buffer to the underlying io.Writer.
|
||||
n, err := e.wr.Write(e.Buf)
|
||||
e.baseOffset += int64(n)
|
||||
if err != nil {
|
||||
// In the event of an error, preserve the unflushed portion.
|
||||
// Thus, write errors aren't fatal so long as the io.Writer
|
||||
// maintains consistent state after errors.
|
||||
if n > 0 {
|
||||
e.Buf = e.Buf[:copy(e.Buf, e.Buf[n:])]
|
||||
}
|
||||
return &ioError{action: "write", err: err}
|
||||
}
|
||||
e.Buf = e.Buf[:0]
|
||||
|
||||
// Check whether to grow the buffer.
|
||||
// Note that cap(e.buf) may already exceed maxBufferSize since
|
||||
// an append elsewhere already grew it to store a large token.
|
||||
const maxBufferSize = 4 << 10
|
||||
const growthSizeFactor = 2 // higher value is faster
|
||||
const growthRateFactor = 2 // higher value is slower
|
||||
// By default, grow if below the maximum buffer size.
|
||||
grow := cap(e.Buf) <= maxBufferSize/growthSizeFactor
|
||||
// Growing can be expensive, so only grow
|
||||
// if a sufficient number of bytes have been processed.
|
||||
grow = grow && int64(cap(e.Buf)) < e.previousOffsetEnd()/growthRateFactor
|
||||
if grow {
|
||||
e.Buf = make([]byte, 0, cap(e.Buf)*growthSizeFactor)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
func (d *encodeBuffer) offsetAt(pos int) int64 { return d.baseOffset + int64(pos) }
|
||||
func (e *encodeBuffer) previousOffsetEnd() int64 { return e.baseOffset + int64(len(e.Buf)) }
|
||||
func (e *encodeBuffer) unflushedBuffer() []byte { return e.Buf }
|
||||
|
||||
// avoidFlush indicates whether to avoid flushing to ensure there is always
|
||||
// enough in the buffer to unwrite the last object member if it were empty.
|
||||
func (e *encoderState) avoidFlush() bool {
|
||||
switch {
|
||||
case e.Tokens.Last.Length() == 0:
|
||||
// Never flush after BeginObject or BeginArray since we don't know yet
|
||||
// if the object or array will end up being empty.
|
||||
return true
|
||||
case e.Tokens.Last.needObjectValue():
|
||||
// Never flush before the object value since we don't know yet
|
||||
// if the object value will end up being empty.
|
||||
return true
|
||||
case e.Tokens.Last.NeedObjectName() && len(e.Buf) >= 2:
|
||||
// Never flush after the object value if it does turn out to be empty.
|
||||
switch string(e.Buf[len(e.Buf)-2:]) {
|
||||
case `ll`, `""`, `{}`, `[]`: // last two bytes of every empty value
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// UnwriteEmptyObjectMember unwrites the last object member if it is empty
|
||||
// and reports whether it performed an unwrite operation.
|
||||
func (e *encoderState) UnwriteEmptyObjectMember(prevName *string) bool {
|
||||
if last := e.Tokens.Last; !last.isObject() || !last.NeedObjectName() || last.Length() == 0 {
|
||||
panic("BUG: must be called on an object after writing a value")
|
||||
}
|
||||
|
||||
// The flushing logic is modified to never flush a trailing empty value.
|
||||
// The encoder never writes trailing whitespace eagerly.
|
||||
b := e.unflushedBuffer()
|
||||
|
||||
// Detect whether the last value was empty.
|
||||
var n int
|
||||
if len(b) >= 3 {
|
||||
switch string(b[len(b)-2:]) {
|
||||
case "ll": // last two bytes of `null`
|
||||
n = len(`null`)
|
||||
case `""`:
|
||||
// It is possible for a non-empty string to have `""` as a suffix
|
||||
// if the second to the last quote was escaped.
|
||||
if b[len(b)-3] == '\\' {
|
||||
return false // e.g., `"\""` is not empty
|
||||
}
|
||||
n = len(`""`)
|
||||
case `{}`:
|
||||
n = len(`{}`)
|
||||
case `[]`:
|
||||
n = len(`[]`)
|
||||
}
|
||||
}
|
||||
if n == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Unwrite the value, whitespace, colon, name, whitespace, and comma.
|
||||
b = b[:len(b)-n]
|
||||
b = jsonwire.TrimSuffixWhitespace(b)
|
||||
b = jsonwire.TrimSuffixByte(b, ':')
|
||||
b = jsonwire.TrimSuffixString(b)
|
||||
b = jsonwire.TrimSuffixWhitespace(b)
|
||||
b = jsonwire.TrimSuffixByte(b, ',')
|
||||
e.Buf = b // store back truncated unflushed buffer
|
||||
|
||||
// Undo state changes.
|
||||
e.Tokens.Last.decrement() // for object member value
|
||||
e.Tokens.Last.decrement() // for object member name
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
if e.Tokens.Last.isActiveNamespace() {
|
||||
e.Namespaces.Last().removeLast()
|
||||
}
|
||||
}
|
||||
e.Names.clearLast()
|
||||
if prevName != nil {
|
||||
e.Names.copyQuotedBuffer(e.Buf) // required by objectNameStack.replaceLastUnquotedName
|
||||
e.Names.replaceLastUnquotedName(*prevName)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// UnwriteOnlyObjectMemberName unwrites the only object member name
|
||||
// and returns the unquoted name.
|
||||
func (e *encoderState) UnwriteOnlyObjectMemberName() string {
|
||||
if last := e.Tokens.Last; !last.isObject() || last.Length() != 1 {
|
||||
panic("BUG: must be called on an object after writing first name")
|
||||
}
|
||||
|
||||
// Unwrite the name and whitespace.
|
||||
b := jsonwire.TrimSuffixString(e.Buf)
|
||||
isVerbatim := bytes.IndexByte(e.Buf[len(b):], '\\') < 0
|
||||
name := string(jsonwire.UnquoteMayCopy(e.Buf[len(b):], isVerbatim))
|
||||
e.Buf = jsonwire.TrimSuffixWhitespace(b)
|
||||
|
||||
// Undo state changes.
|
||||
e.Tokens.Last.decrement()
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
if e.Tokens.Last.isActiveNamespace() {
|
||||
e.Namespaces.Last().removeLast()
|
||||
}
|
||||
}
|
||||
e.Names.clearLast()
|
||||
return name
|
||||
}
|
||||
|
||||
// WriteToken writes the next token and advances the internal write offset.
|
||||
//
|
||||
// The provided token kind must be consistent with the JSON grammar.
|
||||
// For example, it is an error to provide a number when the encoder
|
||||
// is expecting an object name (which is always a string), or
|
||||
// to provide an end object delimiter when the encoder is finishing an array.
|
||||
// If the provided token is invalid, then it reports a [SyntacticError] and
|
||||
// the internal state remains unchanged. The offset reported
|
||||
// in [SyntacticError] will be relative to the [Encoder.OutputOffset].
|
||||
func (e *Encoder) WriteToken(t Token) error {
|
||||
return e.s.WriteToken(t)
|
||||
}
|
||||
func (e *encoderState) WriteToken(t Token) error {
|
||||
k := t.Kind()
|
||||
b := e.Buf // use local variable to avoid mutating e in case of error
|
||||
|
||||
// Append any delimiters or optional whitespace.
|
||||
b = e.Tokens.MayAppendDelim(b, k)
|
||||
if e.Flags.Get(jsonflags.AnyWhitespace) {
|
||||
b = e.appendWhitespace(b, k)
|
||||
}
|
||||
pos := len(b) // offset before the token
|
||||
|
||||
// Append the token to the output and to the state machine.
|
||||
var err error
|
||||
switch k {
|
||||
case 'n':
|
||||
b = append(b, "null"...)
|
||||
err = e.Tokens.appendLiteral()
|
||||
case 'f':
|
||||
b = append(b, "false"...)
|
||||
err = e.Tokens.appendLiteral()
|
||||
case 't':
|
||||
b = append(b, "true"...)
|
||||
err = e.Tokens.appendLiteral()
|
||||
case '"':
|
||||
if b, err = t.appendString(b, &e.Flags); err != nil {
|
||||
break
|
||||
}
|
||||
if e.Tokens.Last.NeedObjectName() {
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
if !e.Tokens.Last.isValidNamespace() {
|
||||
err = errInvalidNamespace
|
||||
break
|
||||
}
|
||||
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) {
|
||||
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
|
||||
break
|
||||
}
|
||||
}
|
||||
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
|
||||
}
|
||||
err = e.Tokens.appendString()
|
||||
case '0':
|
||||
if b, err = t.appendNumber(b, &e.Flags); err != nil {
|
||||
break
|
||||
}
|
||||
err = e.Tokens.appendNumber()
|
||||
case '{':
|
||||
b = append(b, '{')
|
||||
if err = e.Tokens.pushObject(); err != nil {
|
||||
break
|
||||
}
|
||||
e.Names.push()
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
e.Namespaces.push()
|
||||
}
|
||||
case '}':
|
||||
b = append(b, '}')
|
||||
if err = e.Tokens.popObject(); err != nil {
|
||||
break
|
||||
}
|
||||
e.Names.pop()
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
e.Namespaces.pop()
|
||||
}
|
||||
case '[':
|
||||
b = append(b, '[')
|
||||
err = e.Tokens.pushArray()
|
||||
case ']':
|
||||
b = append(b, ']')
|
||||
err = e.Tokens.popArray()
|
||||
default:
|
||||
err = errInvalidToken
|
||||
}
|
||||
if err != nil {
|
||||
return wrapSyntacticError(e, err, pos, +1)
|
||||
}
|
||||
|
||||
// Finish off the buffer and store it back into e.
|
||||
e.Buf = b
|
||||
if e.NeedFlush() {
|
||||
return e.Flush()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AppendRaw appends either a raw string (without double quotes) or number.
|
||||
// Specify safeASCII if the string output is guaranteed to be ASCII
|
||||
// without any characters (including '<', '>', and '&') that need escaping,
|
||||
// otherwise this will validate whether the string needs escaping.
|
||||
// The appended bytes for a JSON number must be valid.
|
||||
//
|
||||
// This is a specialized implementation of Encoder.WriteValue
|
||||
// that allows appending directly into the buffer.
|
||||
// It is only called from marshal logic in the "json" package.
|
||||
func (e *encoderState) AppendRaw(k Kind, safeASCII bool, appendFn func([]byte) ([]byte, error)) error {
|
||||
b := e.Buf // use local variable to avoid mutating e in case of error
|
||||
|
||||
// Append any delimiters or optional whitespace.
|
||||
b = e.Tokens.MayAppendDelim(b, k)
|
||||
if e.Flags.Get(jsonflags.AnyWhitespace) {
|
||||
b = e.appendWhitespace(b, k)
|
||||
}
|
||||
pos := len(b) // offset before the token
|
||||
|
||||
var err error
|
||||
switch k {
|
||||
case '"':
|
||||
// Append directly into the encoder buffer by assuming that
|
||||
// most of the time none of the characters need escaping.
|
||||
b = append(b, '"')
|
||||
if b, err = appendFn(b); err != nil {
|
||||
return err
|
||||
}
|
||||
b = append(b, '"')
|
||||
|
||||
// Check whether we need to escape the string and if necessary
|
||||
// copy it to a scratch buffer and then escape it back.
|
||||
isVerbatim := safeASCII || !jsonwire.NeedEscape(b[pos+len(`"`):len(b)-len(`"`)])
|
||||
if !isVerbatim {
|
||||
var err error
|
||||
b2 := append(e.availBuffer, b[pos+len(`"`):len(b)-len(`"`)]...)
|
||||
b, err = jsonwire.AppendQuote(b[:pos], string(b2), &e.Flags)
|
||||
e.availBuffer = b2[:0]
|
||||
if err != nil {
|
||||
return wrapSyntacticError(e, err, pos, +1)
|
||||
}
|
||||
}
|
||||
|
||||
// Update the state machine.
|
||||
if e.Tokens.Last.NeedObjectName() {
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
if !e.Tokens.Last.isValidNamespace() {
|
||||
return wrapSyntacticError(e, err, pos, +1)
|
||||
}
|
||||
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], isVerbatim) {
|
||||
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
|
||||
return wrapSyntacticError(e, err, pos, +1)
|
||||
}
|
||||
}
|
||||
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
|
||||
}
|
||||
if err := e.Tokens.appendString(); err != nil {
|
||||
return wrapSyntacticError(e, err, pos, +1)
|
||||
}
|
||||
case '0':
|
||||
if b, err = appendFn(b); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := e.Tokens.appendNumber(); err != nil {
|
||||
return wrapSyntacticError(e, err, pos, +1)
|
||||
}
|
||||
default:
|
||||
panic("BUG: invalid kind")
|
||||
}
|
||||
|
||||
// Finish off the buffer and store it back into e.
|
||||
e.Buf = b
|
||||
if e.NeedFlush() {
|
||||
return e.Flush()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteValue writes the next raw value and advances the internal write offset.
|
||||
// The Encoder does not simply copy the provided value verbatim, but
|
||||
// parses it to ensure that it is syntactically valid and reformats it
|
||||
// according to how the Encoder is configured to format whitespace and strings.
|
||||
// If [AllowInvalidUTF8] is specified, then any invalid UTF-8 is mangled
|
||||
// as the Unicode replacement character, U+FFFD.
|
||||
//
|
||||
// The provided value kind must be consistent with the JSON grammar
|
||||
// (see examples on [Encoder.WriteToken]). If the provided value is invalid,
|
||||
// then it reports a [SyntacticError] and the internal state remains unchanged.
|
||||
// The offset reported in [SyntacticError] will be relative to the
|
||||
// [Encoder.OutputOffset] plus the offset into v of any encountered syntax error.
|
||||
func (e *Encoder) WriteValue(v Value) error {
|
||||
return e.s.WriteValue(v)
|
||||
}
|
||||
func (e *encoderState) WriteValue(v Value) error {
|
||||
e.maxValue |= len(v) // bitwise OR is a fast approximation of max
|
||||
|
||||
k := v.Kind()
|
||||
b := e.Buf // use local variable to avoid mutating e in case of error
|
||||
|
||||
// Append any delimiters or optional whitespace.
|
||||
b = e.Tokens.MayAppendDelim(b, k)
|
||||
if e.Flags.Get(jsonflags.AnyWhitespace) {
|
||||
b = e.appendWhitespace(b, k)
|
||||
}
|
||||
pos := len(b) // offset before the value
|
||||
|
||||
// Append the value the output.
|
||||
var n int
|
||||
n += jsonwire.ConsumeWhitespace(v[n:])
|
||||
b, m, err := e.reformatValue(b, v[n:], e.Tokens.Depth())
|
||||
if err != nil {
|
||||
return wrapSyntacticError(e, err, pos+n+m, +1)
|
||||
}
|
||||
n += m
|
||||
n += jsonwire.ConsumeWhitespace(v[n:])
|
||||
if len(v) > n {
|
||||
err = jsonwire.NewInvalidCharacterError(v[n:], "after top-level value")
|
||||
return wrapSyntacticError(e, err, pos+n, 0)
|
||||
}
|
||||
|
||||
// Append the kind to the state machine.
|
||||
switch k {
|
||||
case 'n', 'f', 't':
|
||||
err = e.Tokens.appendLiteral()
|
||||
case '"':
|
||||
if e.Tokens.Last.NeedObjectName() {
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
if !e.Tokens.Last.isValidNamespace() {
|
||||
err = errInvalidNamespace
|
||||
break
|
||||
}
|
||||
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) {
|
||||
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
|
||||
break
|
||||
}
|
||||
}
|
||||
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
|
||||
}
|
||||
err = e.Tokens.appendString()
|
||||
case '0':
|
||||
err = e.Tokens.appendNumber()
|
||||
case '{':
|
||||
if err = e.Tokens.pushObject(); err != nil {
|
||||
break
|
||||
}
|
||||
if err = e.Tokens.popObject(); err != nil {
|
||||
panic("BUG: popObject should never fail immediately after pushObject: " + err.Error())
|
||||
}
|
||||
if e.Flags.Get(jsonflags.ReorderRawObjects) {
|
||||
mustReorderObjects(b[pos:])
|
||||
}
|
||||
case '[':
|
||||
if err = e.Tokens.pushArray(); err != nil {
|
||||
break
|
||||
}
|
||||
if err = e.Tokens.popArray(); err != nil {
|
||||
panic("BUG: popArray should never fail immediately after pushArray: " + err.Error())
|
||||
}
|
||||
if e.Flags.Get(jsonflags.ReorderRawObjects) {
|
||||
mustReorderObjects(b[pos:])
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return wrapSyntacticError(e, err, pos, +1)
|
||||
}
|
||||
|
||||
// Finish off the buffer and store it back into e.
|
||||
e.Buf = b
|
||||
if e.NeedFlush() {
|
||||
return e.Flush()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CountNextDelimWhitespace counts the number of bytes of delimiter and
|
||||
// whitespace bytes assuming the upcoming token is a JSON value.
|
||||
// This method is used for error reporting at the semantic layer.
|
||||
func (e *encoderState) CountNextDelimWhitespace() (n int) {
|
||||
const next = Kind('"') // arbitrary kind as next JSON value
|
||||
delim := e.Tokens.needDelim(next)
|
||||
if delim > 0 {
|
||||
n += len(",") | len(":")
|
||||
}
|
||||
if delim == ':' {
|
||||
if e.Flags.Get(jsonflags.SpaceAfterColon) {
|
||||
n += len(" ")
|
||||
}
|
||||
} else {
|
||||
if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) {
|
||||
n += len(" ")
|
||||
}
|
||||
if e.Flags.Get(jsonflags.Multiline) {
|
||||
if m := e.Tokens.NeedIndent(next); m > 0 {
|
||||
n += len("\n") + len(e.IndentPrefix) + (m-1)*len(e.Indent)
|
||||
}
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// appendWhitespace appends whitespace that immediately precedes the next token.
|
||||
func (e *encoderState) appendWhitespace(b []byte, next Kind) []byte {
|
||||
if delim := e.Tokens.needDelim(next); delim == ':' {
|
||||
if e.Flags.Get(jsonflags.SpaceAfterColon) {
|
||||
b = append(b, ' ')
|
||||
}
|
||||
} else {
|
||||
if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) {
|
||||
b = append(b, ' ')
|
||||
}
|
||||
if e.Flags.Get(jsonflags.Multiline) {
|
||||
b = e.AppendIndent(b, e.Tokens.NeedIndent(next))
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// AppendIndent appends the appropriate number of indentation characters
|
||||
// for the current nested level, n.
|
||||
func (e *encoderState) AppendIndent(b []byte, n int) []byte {
|
||||
if n == 0 {
|
||||
return b
|
||||
}
|
||||
b = append(b, '\n')
|
||||
b = append(b, e.IndentPrefix...)
|
||||
for ; n > 1; n-- {
|
||||
b = append(b, e.Indent...)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// reformatValue parses a JSON value from the start of src and
|
||||
// appends it to the end of dst, reformatting whitespace and strings as needed.
|
||||
// It returns the extended dst buffer and the number of consumed input bytes.
|
||||
func (e *encoderState) reformatValue(dst []byte, src Value, depth int) ([]byte, int, error) {
|
||||
// TODO: Should this update ValueFlags as input?
|
||||
if len(src) == 0 {
|
||||
return dst, 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
switch k := Kind(src[0]).normalize(); k {
|
||||
case 'n':
|
||||
if jsonwire.ConsumeNull(src) == 0 {
|
||||
n, err := jsonwire.ConsumeLiteral(src, "null")
|
||||
return dst, n, err
|
||||
}
|
||||
return append(dst, "null"...), len("null"), nil
|
||||
case 'f':
|
||||
if jsonwire.ConsumeFalse(src) == 0 {
|
||||
n, err := jsonwire.ConsumeLiteral(src, "false")
|
||||
return dst, n, err
|
||||
}
|
||||
return append(dst, "false"...), len("false"), nil
|
||||
case 't':
|
||||
if jsonwire.ConsumeTrue(src) == 0 {
|
||||
n, err := jsonwire.ConsumeLiteral(src, "true")
|
||||
return dst, n, err
|
||||
}
|
||||
return append(dst, "true"...), len("true"), nil
|
||||
case '"':
|
||||
if n := jsonwire.ConsumeSimpleString(src); n > 0 {
|
||||
dst = append(dst, src[:n]...) // copy simple strings verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
return jsonwire.ReformatString(dst, src, &e.Flags)
|
||||
case '0':
|
||||
if n := jsonwire.ConsumeSimpleNumber(src); n > 0 && !e.Flags.Get(jsonflags.CanonicalizeNumbers) {
|
||||
dst = append(dst, src[:n]...) // copy simple numbers verbatim
|
||||
return dst, n, nil
|
||||
}
|
||||
return jsonwire.ReformatNumber(dst, src, &e.Flags)
|
||||
case '{':
|
||||
return e.reformatObject(dst, src, depth)
|
||||
case '[':
|
||||
return e.reformatArray(dst, src, depth)
|
||||
default:
|
||||
return dst, 0, jsonwire.NewInvalidCharacterError(src, "at start of value")
|
||||
}
|
||||
}
|
||||
|
||||
// reformatObject parses a JSON object from the start of src and
|
||||
// appends it to the end of src, reformatting whitespace and strings as needed.
|
||||
// It returns the extended dst buffer and the number of consumed input bytes.
|
||||
func (e *encoderState) reformatObject(dst []byte, src Value, depth int) ([]byte, int, error) {
|
||||
// Append object begin.
|
||||
if len(src) == 0 || src[0] != '{' {
|
||||
panic("BUG: reformatObject must be called with a buffer that starts with '{'")
|
||||
} else if depth == maxNestingDepth+1 {
|
||||
return dst, 0, errMaxDepth
|
||||
}
|
||||
dst = append(dst, '{')
|
||||
n := len("{")
|
||||
|
||||
// Append (possible) object end.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, io.ErrUnexpectedEOF
|
||||
}
|
||||
if src[n] == '}' {
|
||||
dst = append(dst, '}')
|
||||
n += len("}")
|
||||
return dst, n, nil
|
||||
}
|
||||
|
||||
var err error
|
||||
var names *objectNamespace
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
|
||||
e.Namespaces.push()
|
||||
defer e.Namespaces.pop()
|
||||
names = e.Namespaces.Last()
|
||||
}
|
||||
depth++
|
||||
for {
|
||||
// Append optional newline and indentation.
|
||||
if e.Flags.Get(jsonflags.Multiline) {
|
||||
dst = e.AppendIndent(dst, depth)
|
||||
}
|
||||
|
||||
// Append object name.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, io.ErrUnexpectedEOF
|
||||
}
|
||||
m := jsonwire.ConsumeSimpleString(src[n:])
|
||||
isVerbatim := m > 0
|
||||
if isVerbatim {
|
||||
dst = append(dst, src[n:n+m]...)
|
||||
} else {
|
||||
dst, m, err = jsonwire.ReformatString(dst, src[n:], &e.Flags)
|
||||
if err != nil {
|
||||
return dst, n + m, err
|
||||
}
|
||||
}
|
||||
quotedName := src[n : n+m]
|
||||
if !e.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(quotedName, isVerbatim) {
|
||||
return dst, n, wrapWithObjectName(ErrDuplicateName, quotedName)
|
||||
}
|
||||
n += m
|
||||
|
||||
// Append colon.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, wrapWithObjectName(io.ErrUnexpectedEOF, quotedName)
|
||||
}
|
||||
if src[n] != ':' {
|
||||
err = jsonwire.NewInvalidCharacterError(src[n:], "after object name (expecting ':')")
|
||||
return dst, n, wrapWithObjectName(err, quotedName)
|
||||
}
|
||||
dst = append(dst, ':')
|
||||
n += len(":")
|
||||
if e.Flags.Get(jsonflags.SpaceAfterColon) {
|
||||
dst = append(dst, ' ')
|
||||
}
|
||||
|
||||
// Append object value.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, wrapWithObjectName(io.ErrUnexpectedEOF, quotedName)
|
||||
}
|
||||
dst, m, err = e.reformatValue(dst, src[n:], depth)
|
||||
if err != nil {
|
||||
return dst, n + m, wrapWithObjectName(err, quotedName)
|
||||
}
|
||||
n += m
|
||||
|
||||
// Append comma or object end.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, io.ErrUnexpectedEOF
|
||||
}
|
||||
switch src[n] {
|
||||
case ',':
|
||||
dst = append(dst, ',')
|
||||
if e.Flags.Get(jsonflags.SpaceAfterComma) {
|
||||
dst = append(dst, ' ')
|
||||
}
|
||||
n += len(",")
|
||||
continue
|
||||
case '}':
|
||||
if e.Flags.Get(jsonflags.Multiline) {
|
||||
dst = e.AppendIndent(dst, depth-1)
|
||||
}
|
||||
dst = append(dst, '}')
|
||||
n += len("}")
|
||||
return dst, n, nil
|
||||
default:
|
||||
return dst, n, jsonwire.NewInvalidCharacterError(src[n:], "after object value (expecting ',' or '}')")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reformatArray parses a JSON array from the start of src and
|
||||
// appends it to the end of dst, reformatting whitespace and strings as needed.
|
||||
// It returns the extended dst buffer and the number of consumed input bytes.
|
||||
func (e *encoderState) reformatArray(dst []byte, src Value, depth int) ([]byte, int, error) {
|
||||
// Append array begin.
|
||||
if len(src) == 0 || src[0] != '[' {
|
||||
panic("BUG: reformatArray must be called with a buffer that starts with '['")
|
||||
} else if depth == maxNestingDepth+1 {
|
||||
return dst, 0, errMaxDepth
|
||||
}
|
||||
dst = append(dst, '[')
|
||||
n := len("[")
|
||||
|
||||
// Append (possible) array end.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, io.ErrUnexpectedEOF
|
||||
}
|
||||
if src[n] == ']' {
|
||||
dst = append(dst, ']')
|
||||
n += len("]")
|
||||
return dst, n, nil
|
||||
}
|
||||
|
||||
var idx int64
|
||||
var err error
|
||||
depth++
|
||||
for {
|
||||
// Append optional newline and indentation.
|
||||
if e.Flags.Get(jsonflags.Multiline) {
|
||||
dst = e.AppendIndent(dst, depth)
|
||||
}
|
||||
|
||||
// Append array value.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, io.ErrUnexpectedEOF
|
||||
}
|
||||
var m int
|
||||
dst, m, err = e.reformatValue(dst, src[n:], depth)
|
||||
if err != nil {
|
||||
return dst, n + m, wrapWithArrayIndex(err, idx)
|
||||
}
|
||||
n += m
|
||||
|
||||
// Append comma or array end.
|
||||
n += jsonwire.ConsumeWhitespace(src[n:])
|
||||
if uint(len(src)) <= uint(n) {
|
||||
return dst, n, io.ErrUnexpectedEOF
|
||||
}
|
||||
switch src[n] {
|
||||
case ',':
|
||||
dst = append(dst, ',')
|
||||
if e.Flags.Get(jsonflags.SpaceAfterComma) {
|
||||
dst = append(dst, ' ')
|
||||
}
|
||||
n += len(",")
|
||||
idx++
|
||||
continue
|
||||
case ']':
|
||||
if e.Flags.Get(jsonflags.Multiline) {
|
||||
dst = e.AppendIndent(dst, depth-1)
|
||||
}
|
||||
dst = append(dst, ']')
|
||||
n += len("]")
|
||||
return dst, n, nil
|
||||
default:
|
||||
return dst, n, jsonwire.NewInvalidCharacterError(src[n:], "after array value (expecting ',' or ']')")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// OutputOffset returns the current output byte offset. It gives the location
|
||||
// of the next byte immediately after the most recently written token or value.
|
||||
// The number of bytes actually written to the underlying [io.Writer] may be less
|
||||
// than this offset due to internal buffering effects.
|
||||
func (e *Encoder) OutputOffset() int64 {
|
||||
return e.s.previousOffsetEnd()
|
||||
}
|
||||
|
||||
// AvailableBuffer returns a zero-length buffer with a possible non-zero capacity.
|
||||
// This buffer is intended to be used to populate a [Value]
|
||||
// being passed to an immediately succeeding [Encoder.WriteValue] call.
|
||||
//
|
||||
// Example usage:
|
||||
//
|
||||
// b := d.AvailableBuffer()
|
||||
// b = append(b, '"')
|
||||
// b = appendString(b, v) // append the string formatting of v
|
||||
// b = append(b, '"')
|
||||
// ... := d.WriteValue(b)
|
||||
//
|
||||
// It is the user's responsibility to ensure that the value is valid JSON.
|
||||
func (e *Encoder) AvailableBuffer() []byte {
|
||||
// NOTE: We don't return e.buf[len(e.buf):cap(e.buf)] since WriteValue would
|
||||
// need to take special care to avoid mangling the data while reformatting.
|
||||
// WriteValue can't easily identify whether the input Value aliases e.buf
|
||||
// without using unsafe.Pointer. Thus, we just return a different buffer.
|
||||
// Should this ever alias e.buf, we need to consider how it operates with
|
||||
// the specialized performance optimization for bytes.Buffer.
|
||||
n := 1 << bits.Len(uint(e.s.maxValue|63)) // fast approximation for max length
|
||||
if cap(e.s.availBuffer) < n {
|
||||
e.s.availBuffer = make([]byte, 0, n)
|
||||
}
|
||||
return e.s.availBuffer
|
||||
}
|
||||
|
||||
// StackDepth returns the depth of the state machine for written JSON data.
|
||||
// Each level on the stack represents a nested JSON object or array.
|
||||
// It is incremented whenever an [BeginObject] or [BeginArray] token is encountered
|
||||
// and decremented whenever an [EndObject] or [EndArray] token is encountered.
|
||||
// The depth is zero-indexed, where zero represents the top-level JSON value.
|
||||
func (e *Encoder) StackDepth() int {
|
||||
// NOTE: Keep in sync with Decoder.StackDepth.
|
||||
return e.s.Tokens.Depth() - 1
|
||||
}
|
||||
|
||||
// StackIndex returns information about the specified stack level.
|
||||
// It must be a number between 0 and [Encoder.StackDepth], inclusive.
|
||||
// For each level, it reports the kind:
|
||||
//
|
||||
// - 0 for a level of zero,
|
||||
// - '{' for a level representing a JSON object, and
|
||||
// - '[' for a level representing a JSON array.
|
||||
//
|
||||
// It also reports the length of that JSON object or array.
|
||||
// Each name and value in a JSON object is counted separately,
|
||||
// so the effective number of members would be half the length.
|
||||
// A complete JSON object must have an even length.
|
||||
func (e *Encoder) StackIndex(i int) (Kind, int64) {
|
||||
// NOTE: Keep in sync with Decoder.StackIndex.
|
||||
switch s := e.s.Tokens.index(i); {
|
||||
case i > 0 && s.isObject():
|
||||
return '{', s.Length()
|
||||
case i > 0 && s.isArray():
|
||||
return '[', s.Length()
|
||||
default:
|
||||
return 0, s.Length()
|
||||
}
|
||||
}
|
||||
|
||||
// StackPointer returns a JSON Pointer (RFC 6901) to the most recently written value.
|
||||
func (e *Encoder) StackPointer() Pointer {
|
||||
return Pointer(e.s.AppendStackPointer(nil, -1))
|
||||
}
|
||||
|
||||
func (e *encoderState) AppendStackPointer(b []byte, where int) []byte {
|
||||
e.Names.copyQuotedBuffer(e.Buf)
|
||||
return e.state.appendStackPointer(b, where)
|
||||
}
|
||||
@@ -1,182 +0,0 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"strconv"
|
||||
|
||||
"next.orly.dev/pkg/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
const errorPrefix = "jsontext: "
|
||||
|
||||
type ioError struct {
|
||||
action string // either "read" or "write"
|
||||
err error
|
||||
}
|
||||
|
||||
func (e *ioError) Error() string {
|
||||
return errorPrefix + e.action + " error: " + e.err.Error()
|
||||
}
|
||||
func (e *ioError) Unwrap() error {
|
||||
return e.err
|
||||
}
|
||||
|
||||
// SyntacticError is a description of a syntactic error that occurred when
|
||||
// encoding or decoding JSON according to the grammar.
|
||||
//
|
||||
// The contents of this error as produced by this package may change over time.
|
||||
type SyntacticError struct {
|
||||
requireKeyedLiterals
|
||||
nonComparable
|
||||
|
||||
// ByteOffset indicates that an error occurred after this byte offset.
|
||||
ByteOffset int64
|
||||
// JSONPointer indicates that an error occurred within this JSON value
|
||||
// as indicated using the JSON Pointer notation (see RFC 6901).
|
||||
JSONPointer Pointer
|
||||
|
||||
// Err is the underlying error.
|
||||
Err error
|
||||
}
|
||||
|
||||
// wrapSyntacticError wraps an error and annotates it with a precise location
|
||||
// using the provided [encoderState] or [decoderState].
|
||||
// If err is an [ioError] or [io.EOF], then it is not wrapped.
|
||||
//
|
||||
// It takes a relative offset pos that can be resolved into
|
||||
// an absolute offset using state.offsetAt.
|
||||
//
|
||||
// It takes a where that specify how the JSON pointer is derived.
|
||||
// If the underlying error is a [pointerSuffixError],
|
||||
// then the suffix is appended to the derived pointer.
|
||||
func wrapSyntacticError(state interface {
|
||||
offsetAt(pos int) int64
|
||||
AppendStackPointer(b []byte, where int) []byte
|
||||
}, err error, pos, where int) error {
|
||||
if _, ok := err.(*ioError); err == io.EOF || ok {
|
||||
return err
|
||||
}
|
||||
offset := state.offsetAt(pos)
|
||||
ptr := state.AppendStackPointer(nil, where)
|
||||
if serr, ok := err.(*pointerSuffixError); ok {
|
||||
ptr = serr.appendPointer(ptr)
|
||||
err = serr.error
|
||||
}
|
||||
if d, ok := state.(*decoderState); ok && err == errMismatchDelim {
|
||||
where := "at start of value"
|
||||
if len(d.Tokens.Stack) > 0 && d.Tokens.Last.Length() > 0 {
|
||||
switch {
|
||||
case d.Tokens.Last.isArray():
|
||||
where = "after array element (expecting ',' or ']')"
|
||||
ptr = []byte(Pointer(ptr).Parent()) // problem is with parent array
|
||||
case d.Tokens.Last.isObject():
|
||||
where = "after object value (expecting ',' or '}')"
|
||||
ptr = []byte(Pointer(ptr).Parent()) // problem is with parent object
|
||||
}
|
||||
}
|
||||
err = jsonwire.NewInvalidCharacterError(d.buf[pos:], where)
|
||||
}
|
||||
return &SyntacticError{ByteOffset: offset, JSONPointer: Pointer(ptr), Err: err}
|
||||
}
|
||||
|
||||
func (e *SyntacticError) Error() string {
|
||||
pointer := e.JSONPointer
|
||||
offset := e.ByteOffset
|
||||
b := []byte(errorPrefix)
|
||||
if e.Err != nil {
|
||||
b = append(b, e.Err.Error()...)
|
||||
if e.Err == ErrDuplicateName {
|
||||
b = strconv.AppendQuote(append(b, ' '), pointer.LastToken())
|
||||
pointer = pointer.Parent()
|
||||
offset = 0 // not useful to print offset for duplicate names
|
||||
}
|
||||
} else {
|
||||
b = append(b, "syntactic error"...)
|
||||
}
|
||||
if pointer != "" {
|
||||
b = strconv.AppendQuote(append(b, " within "...), jsonwire.TruncatePointer(string(pointer), 100))
|
||||
}
|
||||
if offset > 0 {
|
||||
b = strconv.AppendInt(append(b, " after offset "...), offset, 10)
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
func (e *SyntacticError) Unwrap() error {
|
||||
return e.Err
|
||||
}
|
||||
|
||||
// pointerSuffixError represents a JSON pointer suffix to be appended
|
||||
// to [SyntacticError.JSONPointer]. It is an internal error type
|
||||
// used within this package and does not appear in the public API.
|
||||
//
|
||||
// This type is primarily used to annotate errors in Encoder.WriteValue
|
||||
// and Decoder.ReadValue with precise positions.
|
||||
// At the time WriteValue or ReadValue is called, a JSON pointer to the
|
||||
// upcoming value can be constructed using the Encoder/Decoder state.
|
||||
// However, tracking pointers within values during normal operation
|
||||
// would incur a performance penalty in the error-free case.
|
||||
//
|
||||
// To provide precise error locations without this overhead,
|
||||
// the error is wrapped with object names or array indices
|
||||
// as the call stack is popped when an error occurs.
|
||||
// Since this happens in reverse order, pointerSuffixError holds
|
||||
// the pointer in reverse and is only later reversed when appending to
|
||||
// the pointer prefix.
|
||||
//
|
||||
// For example, if the encoder is at "/alpha/bravo/charlie"
|
||||
// and an error occurs in WriteValue at "/xray/yankee/zulu", then
|
||||
// the final pointer should be "/alpha/bravo/charlie/xray/yankee/zulu".
|
||||
//
|
||||
// As pointerSuffixError is populated during the error return path,
|
||||
// it first contains "/zulu", then "/zulu/yankee",
|
||||
// and finally "/zulu/yankee/xray".
|
||||
// These tokens are reversed and concatenated to "/alpha/bravo/charlie"
|
||||
// to form the full pointer.
|
||||
type pointerSuffixError struct {
|
||||
error
|
||||
|
||||
// reversePointer is a JSON pointer, but with each token in reverse order.
|
||||
reversePointer []byte
|
||||
}
|
||||
|
||||
// wrapWithObjectName wraps err with a JSON object name access,
|
||||
// which must be a valid quoted JSON string.
|
||||
func wrapWithObjectName(err error, quotedName []byte) error {
|
||||
serr, _ := err.(*pointerSuffixError)
|
||||
if serr == nil {
|
||||
serr = &pointerSuffixError{error: err}
|
||||
}
|
||||
name := jsonwire.UnquoteMayCopy(quotedName, false)
|
||||
serr.reversePointer = appendEscapePointerName(append(serr.reversePointer, '/'), name)
|
||||
return serr
|
||||
}
|
||||
|
||||
// wrapWithArrayIndex wraps err with a JSON array index access.
|
||||
func wrapWithArrayIndex(err error, index int64) error {
|
||||
serr, _ := err.(*pointerSuffixError)
|
||||
if serr == nil {
|
||||
serr = &pointerSuffixError{error: err}
|
||||
}
|
||||
serr.reversePointer = strconv.AppendUint(append(serr.reversePointer, '/'), uint64(index), 10)
|
||||
return serr
|
||||
}
|
||||
|
||||
// appendPointer appends the path encoded in e to the end of pointer.
|
||||
func (e *pointerSuffixError) appendPointer(pointer []byte) []byte {
|
||||
// Copy each token in reversePointer to the end of pointer in reverse order.
|
||||
// Double reversal means that the appended suffix is now in forward order.
|
||||
bi, bo := e.reversePointer, pointer
|
||||
for len(bi) > 0 {
|
||||
i := bytes.LastIndexByte(bi, '/')
|
||||
bi, bo = bi[:i], append(bo, bi[i:]...)
|
||||
}
|
||||
return bo
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"next.orly.dev/pkg/json/internal"
|
||||
)
|
||||
|
||||
// Internal is for internal use only.
|
||||
// This is exempt from the Go compatibility agreement.
|
||||
var Internal exporter
|
||||
|
||||
type exporter struct{}
|
||||
|
||||
// Export exposes internal functionality from "jsontext" to "json".
|
||||
// This cannot be dynamically called by other packages since
|
||||
// they cannot obtain a reference to the internal.AllowInternalUse value.
|
||||
func (exporter) Export(p *internal.NotForPublicUse) export {
|
||||
if p != &internal.AllowInternalUse {
|
||||
panic("unauthorized call to Export")
|
||||
}
|
||||
return export{}
|
||||
}
|
||||
|
||||
// The export type exposes functionality to packages with visibility to
|
||||
// the internal.AllowInternalUse variable. The "json" package uses this
|
||||
// to modify low-level state in the Encoder and Decoder types.
|
||||
// It mutates the state directly instead of calling ReadToken or WriteToken
|
||||
// since this is more performant. The public APIs need to track state to ensure
|
||||
// that users are constructing a valid JSON value, but the "json" implementation
|
||||
// guarantees that it emits valid JSON by the structure of the code itself.
|
||||
type export struct{}
|
||||
|
||||
// Encoder returns a pointer to the underlying encoderState.
|
||||
func (export) Encoder(e *Encoder) *encoderState { return &e.s }
|
||||
|
||||
// Decoder returns a pointer to the underlying decoderState.
|
||||
func (export) Decoder(d *Decoder) *decoderState { return &d.s }
|
||||
|
||||
func (export) GetBufferedEncoder(o ...Options) *Encoder {
|
||||
return getBufferedEncoder(o...)
|
||||
}
|
||||
func (export) PutBufferedEncoder(e *Encoder) {
|
||||
putBufferedEncoder(e)
|
||||
}
|
||||
|
||||
func (export) GetStreamingEncoder(w io.Writer, o ...Options) *Encoder {
|
||||
return getStreamingEncoder(w, o...)
|
||||
}
|
||||
func (export) PutStreamingEncoder(e *Encoder) {
|
||||
putStreamingEncoder(e)
|
||||
}
|
||||
|
||||
func (export) GetBufferedDecoder(b []byte, o ...Options) *Decoder {
|
||||
return getBufferedDecoder(b, o...)
|
||||
}
|
||||
func (export) PutBufferedDecoder(d *Decoder) {
|
||||
putBufferedDecoder(d)
|
||||
}
|
||||
|
||||
func (export) GetStreamingDecoder(r io.Reader, o ...Options) *Decoder {
|
||||
return getStreamingDecoder(r, o...)
|
||||
}
|
||||
func (export) PutStreamingDecoder(d *Decoder) {
|
||||
putStreamingDecoder(d)
|
||||
}
|
||||
|
||||
func (export) IsIOError(err error) bool {
|
||||
_, ok := err.(*ioError)
|
||||
return ok
|
||||
}
|
||||
@@ -1,304 +0,0 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"next.orly.dev/pkg/json/internal/jsonflags"
|
||||
"next.orly.dev/pkg/json/internal/jsonopts"
|
||||
"next.orly.dev/pkg/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
// Options configures [NewEncoder], [Encoder.Reset], [NewDecoder],
|
||||
// and [Decoder.Reset] with specific features.
|
||||
// Each function takes in a variadic list of options, where properties
|
||||
// set in latter options override the value of previously set properties.
|
||||
//
|
||||
// There is a single Options type, which is used with both encoding and decoding.
|
||||
// Some options affect both operations, while others only affect one operation:
|
||||
//
|
||||
// - [AllowDuplicateNames] affects encoding and decoding
|
||||
// - [AllowInvalidUTF8] affects encoding and decoding
|
||||
// - [EscapeForHTML] affects encoding only
|
||||
// - [EscapeForJS] affects encoding only
|
||||
// - [PreserveRawStrings] affects encoding only
|
||||
// - [CanonicalizeRawInts] affects encoding only
|
||||
// - [CanonicalizeRawFloats] affects encoding only
|
||||
// - [ReorderRawObjects] affects encoding only
|
||||
// - [SpaceAfterColon] affects encoding only
|
||||
// - [SpaceAfterComma] affects encoding only
|
||||
// - [Multiline] affects encoding only
|
||||
// - [WithIndent] affects encoding only
|
||||
// - [WithIndentPrefix] affects encoding only
|
||||
//
|
||||
// Options that do not affect a particular operation are ignored.
|
||||
//
|
||||
// The Options type is identical to [encoding/json.Options] and
|
||||
// [encoding/json/v2.Options]. Options from the other packages may
|
||||
// be passed to functionality in this package, but are ignored.
|
||||
// Options from this package may be used with the other packages.
|
||||
type Options = jsonopts.Options
|
||||
|
||||
// AllowDuplicateNames specifies that JSON objects may contain
|
||||
// duplicate member names. Disabling the duplicate name check may provide
|
||||
// performance benefits, but breaks compliance with RFC 7493, section 2.3.
|
||||
// The input or output will still be compliant with RFC 8259,
|
||||
// which leaves the handling of duplicate names as unspecified behavior.
|
||||
//
|
||||
// This affects either encoding or decoding.
|
||||
func AllowDuplicateNames(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.AllowDuplicateNames | 1
|
||||
} else {
|
||||
return jsonflags.AllowDuplicateNames | 0
|
||||
}
|
||||
}
|
||||
|
||||
// AllowInvalidUTF8 specifies that JSON strings may contain invalid UTF-8,
|
||||
// which will be mangled as the Unicode replacement character, U+FFFD.
|
||||
// This causes the encoder or decoder to break compliance with
|
||||
// RFC 7493, section 2.1, and RFC 8259, section 8.1.
|
||||
//
|
||||
// This affects either encoding or decoding.
|
||||
func AllowInvalidUTF8(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.AllowInvalidUTF8 | 1
|
||||
} else {
|
||||
return jsonflags.AllowInvalidUTF8 | 0
|
||||
}
|
||||
}
|
||||
|
||||
// EscapeForHTML specifies that '<', '>', and '&' characters within JSON strings
|
||||
// should be escaped as a hexadecimal Unicode codepoint (e.g., \u003c) so that
|
||||
// the output is safe to embed within HTML.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func EscapeForHTML(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.EscapeForHTML | 1
|
||||
} else {
|
||||
return jsonflags.EscapeForHTML | 0
|
||||
}
|
||||
}
|
||||
|
||||
// EscapeForJS specifies that U+2028 and U+2029 characters within JSON strings
|
||||
// should be escaped as a hexadecimal Unicode codepoint (e.g., \u2028) so that
|
||||
// the output is valid to embed within JavaScript. See RFC 8259, section 12.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func EscapeForJS(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.EscapeForJS | 1
|
||||
} else {
|
||||
return jsonflags.EscapeForJS | 0
|
||||
}
|
||||
}
|
||||
|
||||
// PreserveRawStrings specifies that when encoding a raw JSON string in a
|
||||
// [Token] or [Value], pre-escaped sequences
|
||||
// in a JSON string are preserved to the output.
|
||||
// However, raw strings still respect [EscapeForHTML] and [EscapeForJS]
|
||||
// such that the relevant characters are escaped.
|
||||
// If [AllowInvalidUTF8] is enabled, bytes of invalid UTF-8
|
||||
// are preserved to the output.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func PreserveRawStrings(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.PreserveRawStrings | 1
|
||||
} else {
|
||||
return jsonflags.PreserveRawStrings | 0
|
||||
}
|
||||
}
|
||||
|
||||
// CanonicalizeRawInts specifies that when encoding a raw JSON
|
||||
// integer number (i.e., a number without a fraction and exponent) in a
|
||||
// [Token] or [Value], the number is canonicalized
|
||||
// according to RFC 8785, section 3.2.2.3. As a special case,
|
||||
// the number -0 is canonicalized as 0.
|
||||
//
|
||||
// JSON numbers are treated as IEEE 754 double precision numbers.
|
||||
// Any numbers with precision beyond what is representable by that form
|
||||
// will lose their precision when canonicalized. For example,
|
||||
// integer values beyond ±2⁵³ will lose their precision.
|
||||
// For example, 1234567890123456789 is formatted as 1234567890123456800.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func CanonicalizeRawInts(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.CanonicalizeRawInts | 1
|
||||
} else {
|
||||
return jsonflags.CanonicalizeRawInts | 0
|
||||
}
|
||||
}
|
||||
|
||||
// CanonicalizeRawFloats specifies that when encoding a raw JSON
|
||||
// floating-point number (i.e., a number with a fraction or exponent) in a
|
||||
// [Token] or [Value], the number is canonicalized
|
||||
// according to RFC 8785, section 3.2.2.3. As a special case,
|
||||
// the number -0 is canonicalized as 0.
|
||||
//
|
||||
// JSON numbers are treated as IEEE 754 double precision numbers.
|
||||
// It is safe to canonicalize a serialized single precision number and
|
||||
// parse it back as a single precision number and expect the same value.
|
||||
// If a number exceeds ±1.7976931348623157e+308, which is the maximum
|
||||
// finite number, then it saturated at that value and formatted as such.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func CanonicalizeRawFloats(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.CanonicalizeRawFloats | 1
|
||||
} else {
|
||||
return jsonflags.CanonicalizeRawFloats | 0
|
||||
}
|
||||
}
|
||||
|
||||
// ReorderRawObjects specifies that when encoding a raw JSON object in a
|
||||
// [Value], the object members are reordered according to
|
||||
// RFC 8785, section 3.2.3.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func ReorderRawObjects(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.ReorderRawObjects | 1
|
||||
} else {
|
||||
return jsonflags.ReorderRawObjects | 0
|
||||
}
|
||||
}
|
||||
|
||||
// SpaceAfterColon specifies that the JSON output should emit a space character
|
||||
// after each colon separator following a JSON object name.
|
||||
// If false, then no space character appears after the colon separator.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func SpaceAfterColon(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.SpaceAfterColon | 1
|
||||
} else {
|
||||
return jsonflags.SpaceAfterColon | 0
|
||||
}
|
||||
}
|
||||
|
||||
// SpaceAfterComma specifies that the JSON output should emit a space character
|
||||
// after each comma separator following a JSON object value or array element.
|
||||
// If false, then no space character appears after the comma separator.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func SpaceAfterComma(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.SpaceAfterComma | 1
|
||||
} else {
|
||||
return jsonflags.SpaceAfterComma | 0
|
||||
}
|
||||
}
|
||||
|
||||
// Multiline specifies that the JSON output should expand to multiple lines,
|
||||
// where every JSON object member or JSON array element appears on
|
||||
// a new, indented line according to the nesting depth.
|
||||
//
|
||||
// If [SpaceAfterColon] is not specified, then the default is true.
|
||||
// If [SpaceAfterComma] is not specified, then the default is false.
|
||||
// If [WithIndent] is not specified, then the default is "\t".
|
||||
//
|
||||
// If set to false, then the output is a single-line,
|
||||
// where the only whitespace emitted is determined by the current
|
||||
// values of [SpaceAfterColon] and [SpaceAfterComma].
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
func Multiline(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.Multiline | 1
|
||||
} else {
|
||||
return jsonflags.Multiline | 0
|
||||
}
|
||||
}
|
||||
|
||||
// WithIndent specifies that the encoder should emit multiline output
|
||||
// where each element in a JSON object or array begins on a new, indented line
|
||||
// beginning with the indent prefix (see [WithIndentPrefix])
|
||||
// followed by one or more copies of indent according to the nesting depth.
|
||||
// The indent must only be composed of space or tab characters.
|
||||
//
|
||||
// If the intent to emit indented output without a preference for
|
||||
// the particular indent string, then use [Multiline] instead.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
// Use of this option implies [Multiline] being set to true.
|
||||
func WithIndent(indent string) Options {
|
||||
// Fast-path: Return a constant for common indents, which avoids allocating.
|
||||
// These are derived from analyzing the Go module proxy on 2023-07-01.
|
||||
switch indent {
|
||||
case "\t":
|
||||
return jsonopts.Indent("\t") // ~14k usages
|
||||
case " ":
|
||||
return jsonopts.Indent(" ") // ~18k usages
|
||||
case " ":
|
||||
return jsonopts.Indent(" ") // ~1.7k usages
|
||||
case " ":
|
||||
return jsonopts.Indent(" ") // ~52k usages
|
||||
case " ":
|
||||
return jsonopts.Indent(" ") // ~12k usages
|
||||
case "":
|
||||
return jsonopts.Indent("") // ~1.5k usages
|
||||
}
|
||||
|
||||
// Otherwise, allocate for this unique value.
|
||||
if s := strings.Trim(indent, " \t"); len(s) > 0 {
|
||||
panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent")
|
||||
}
|
||||
return jsonopts.Indent(indent)
|
||||
}
|
||||
|
||||
// WithIndentPrefix specifies that the encoder should emit multiline output
|
||||
// where each element in a JSON object or array begins on a new, indented line
|
||||
// beginning with the indent prefix followed by one or more copies of indent
|
||||
// (see [WithIndent]) according to the nesting depth.
|
||||
// The prefix must only be composed of space or tab characters.
|
||||
//
|
||||
// This only affects encoding and is ignored when decoding.
|
||||
// Use of this option implies [Multiline] being set to true.
|
||||
func WithIndentPrefix(prefix string) Options {
|
||||
if s := strings.Trim(prefix, " \t"); len(s) > 0 {
|
||||
panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent prefix")
|
||||
}
|
||||
return jsonopts.IndentPrefix(prefix)
|
||||
}
|
||||
|
||||
/*
|
||||
// TODO(https://go.dev/issue/56733): Implement WithByteLimit and WithDepthLimit.
|
||||
// Remember to also update the "Security Considerations" section.
|
||||
|
||||
// WithByteLimit sets a limit on the number of bytes of input or output bytes
|
||||
// that may be consumed or produced for each top-level JSON value.
|
||||
// If a [Decoder] or [Encoder] method call would need to consume/produce
|
||||
// more than a total of n bytes to make progress on the top-level JSON value,
|
||||
// then the call will report an error.
|
||||
// Whitespace before and within the top-level value are counted against the limit.
|
||||
// Whitespace after a top-level value are counted against the limit
|
||||
// for the next top-level value.
|
||||
//
|
||||
// A non-positive limit is equivalent to no limit at all.
|
||||
// If unspecified, the default limit is no limit at all.
|
||||
// This affects either encoding or decoding.
|
||||
func WithByteLimit(n int64) Options {
|
||||
return jsonopts.ByteLimit(max(n, 0))
|
||||
}
|
||||
|
||||
// WithDepthLimit sets a limit on the maximum depth of JSON nesting
|
||||
// that may be consumed or produced for each top-level JSON value.
|
||||
// If a [Decoder] or [Encoder] method call would need to consume or produce
|
||||
// a depth greater than n to make progress on the top-level JSON value,
|
||||
// then the call will report an error.
|
||||
//
|
||||
// A non-positive limit is equivalent to no limit at all.
|
||||
// If unspecified, the default limit is 10000.
|
||||
// This affects either encoding or decoding.
|
||||
func WithDepthLimit(n int) Options {
|
||||
return jsonopts.DepthLimit(max(n, 0))
|
||||
}
|
||||
*/
|
||||
@@ -1,152 +0,0 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"math/bits"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// TODO(https://go.dev/issue/47657): Use sync.PoolOf.
|
||||
|
||||
var (
|
||||
// This owns the internal buffer since there is no io.Writer to output to.
|
||||
// Since the buffer can get arbitrarily large in normal usage,
|
||||
// there is statistical tracking logic to determine whether to recycle
|
||||
// the internal buffer or not based on a history of utilization.
|
||||
bufferedEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
|
||||
|
||||
// This owns the internal buffer, but it is only used to temporarily store
|
||||
// buffered JSON before flushing it to the underlying io.Writer.
|
||||
// In a sufficiently efficient streaming mode, we do not expect the buffer
|
||||
// to grow arbitrarily large. Thus, we avoid recycling large buffers.
|
||||
streamingEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
|
||||
|
||||
// This does not own the internal buffer since
|
||||
// it is taken directly from the provided bytes.Buffer.
|
||||
bytesBufferEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
|
||||
)
|
||||
|
||||
// bufferStatistics is statistics to track buffer utilization.
|
||||
// It is used to determine whether to recycle a buffer or not
|
||||
// to avoid https://go.dev/issue/23199.
|
||||
type bufferStatistics struct {
|
||||
strikes int // number of times the buffer was under-utilized
|
||||
prevLen int // length of previous buffer
|
||||
}
|
||||
|
||||
func getBufferedEncoder(opts ...Options) *Encoder {
|
||||
e := bufferedEncoderPool.Get().(*Encoder)
|
||||
if e.s.Buf == nil {
|
||||
// Round up to nearest 2ⁿ to make best use of malloc size classes.
|
||||
// See runtime/sizeclasses.go on Go1.15.
|
||||
// Logical OR with 63 to ensure 64 as the minimum buffer size.
|
||||
n := 1 << bits.Len(uint(e.s.bufStats.prevLen|63))
|
||||
e.s.Buf = make([]byte, 0, n)
|
||||
}
|
||||
e.s.reset(e.s.Buf[:0], nil, opts...)
|
||||
return e
|
||||
}
|
||||
func putBufferedEncoder(e *Encoder) {
|
||||
// Recycle large buffers only if sufficiently utilized.
|
||||
// If a buffer is under-utilized enough times sequentially,
|
||||
// then it is discarded, ensuring that a single large buffer
|
||||
// won't be kept alive by a continuous stream of small usages.
|
||||
//
|
||||
// The worst case utilization is computed as:
|
||||
// MIN_UTILIZATION_THRESHOLD / (1 + MAX_NUM_STRIKES)
|
||||
//
|
||||
// For the constants chosen below, this is (25%)/(1+4) ⇒ 5%.
|
||||
// This may seem low, but it ensures a lower bound on
|
||||
// the absolute worst-case utilization. Without this check,
|
||||
// this would be theoretically 0%, which is infinitely worse.
|
||||
//
|
||||
// See https://go.dev/issue/27735.
|
||||
switch {
|
||||
case cap(e.s.Buf) <= 4<<10: // always recycle buffers smaller than 4KiB
|
||||
e.s.bufStats.strikes = 0
|
||||
case cap(e.s.Buf)/4 <= len(e.s.Buf): // at least 25% utilization
|
||||
e.s.bufStats.strikes = 0
|
||||
case e.s.bufStats.strikes < 4: // at most 4 strikes
|
||||
e.s.bufStats.strikes++
|
||||
default: // discard the buffer; too large and too often under-utilized
|
||||
e.s.bufStats.strikes = 0
|
||||
e.s.bufStats.prevLen = len(e.s.Buf) // heuristic for size to allocate next time
|
||||
e.s.Buf = nil
|
||||
}
|
||||
bufferedEncoderPool.Put(e)
|
||||
}
|
||||
|
||||
func getStreamingEncoder(w io.Writer, opts ...Options) *Encoder {
|
||||
if _, ok := w.(*bytes.Buffer); ok {
|
||||
e := bytesBufferEncoderPool.Get().(*Encoder)
|
||||
e.s.reset(nil, w, opts...) // buffer taken from bytes.Buffer
|
||||
return e
|
||||
} else {
|
||||
e := streamingEncoderPool.Get().(*Encoder)
|
||||
e.s.reset(e.s.Buf[:0], w, opts...) // preserve existing buffer
|
||||
return e
|
||||
}
|
||||
}
|
||||
func putStreamingEncoder(e *Encoder) {
|
||||
if _, ok := e.s.wr.(*bytes.Buffer); ok {
|
||||
bytesBufferEncoderPool.Put(e)
|
||||
} else {
|
||||
if cap(e.s.Buf) > 64<<10 {
|
||||
e.s.Buf = nil // avoid pinning arbitrarily large amounts of memory
|
||||
}
|
||||
streamingEncoderPool.Put(e)
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
// This does not own the internal buffer since it is externally provided.
|
||||
bufferedDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }}
|
||||
|
||||
// This owns the internal buffer, but it is only used to temporarily store
|
||||
// buffered JSON fetched from the underlying io.Reader.
|
||||
// In a sufficiently efficient streaming mode, we do not expect the buffer
|
||||
// to grow arbitrarily large. Thus, we avoid recycling large buffers.
|
||||
streamingDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }}
|
||||
|
||||
// This does not own the internal buffer since
|
||||
// it is taken directly from the provided bytes.Buffer.
|
||||
bytesBufferDecoderPool = bufferedDecoderPool
|
||||
)
|
||||
|
||||
func getBufferedDecoder(b []byte, opts ...Options) *Decoder {
|
||||
d := bufferedDecoderPool.Get().(*Decoder)
|
||||
d.s.reset(b, nil, opts...)
|
||||
return d
|
||||
}
|
||||
func putBufferedDecoder(d *Decoder) {
|
||||
bufferedDecoderPool.Put(d)
|
||||
}
|
||||
|
||||
func getStreamingDecoder(r io.Reader, opts ...Options) *Decoder {
|
||||
if _, ok := r.(*bytes.Buffer); ok {
|
||||
d := bytesBufferDecoderPool.Get().(*Decoder)
|
||||
d.s.reset(nil, r, opts...) // buffer taken from bytes.Buffer
|
||||
return d
|
||||
} else {
|
||||
d := streamingDecoderPool.Get().(*Decoder)
|
||||
d.s.reset(d.s.buf[:0], r, opts...) // preserve existing buffer
|
||||
return d
|
||||
}
|
||||
}
|
||||
func putStreamingDecoder(d *Decoder) {
|
||||
if _, ok := d.s.rd.(*bytes.Buffer); ok {
|
||||
bytesBufferDecoderPool.Put(d)
|
||||
} else {
|
||||
if cap(d.s.buf) > 64<<10 {
|
||||
d.s.buf = nil // avoid pinning arbitrarily large amounts of memory
|
||||
}
|
||||
streamingDecoderPool.Put(d)
|
||||
}
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"next.orly.dev/pkg/json/internal/jsonflags"
|
||||
"next.orly.dev/pkg/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
// AppendQuote appends a double-quoted JSON string literal representing src
|
||||
// to dst and returns the extended buffer.
|
||||
// It uses the minimal string representation per RFC 8785, section 3.2.2.2.
|
||||
// Invalid UTF-8 bytes are replaced with the Unicode replacement character
|
||||
// and an error is returned at the end indicating the presence of invalid UTF-8.
|
||||
// The dst must not overlap with the src.
|
||||
func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) {
|
||||
dst, err := jsonwire.AppendQuote(dst, src, &jsonflags.Flags{})
|
||||
if err != nil {
|
||||
err = &SyntacticError{Err: err}
|
||||
}
|
||||
return dst, err
|
||||
}
|
||||
|
||||
// AppendUnquote appends the decoded interpretation of src as a
|
||||
// double-quoted JSON string literal to dst and returns the extended buffer.
|
||||
// The input src must be a JSON string without any surrounding whitespace.
|
||||
// Invalid UTF-8 bytes are replaced with the Unicode replacement character
|
||||
// and an error is returned at the end indicating the presence of invalid UTF-8.
|
||||
// Any trailing bytes after the JSON string literal results in an error.
|
||||
// The dst must not overlap with the src.
|
||||
func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) {
|
||||
dst, err := jsonwire.AppendUnquote(dst, src)
|
||||
if err != nil {
|
||||
err = &SyntacticError{Err: err}
|
||||
}
|
||||
return dst, err
|
||||
}
|
||||
@@ -1,828 +0,0 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"iter"
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"next.orly.dev/pkg/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
// ErrDuplicateName indicates that a JSON token could not be
|
||||
// encoded or decoded because it results in a duplicate JSON object name.
|
||||
// This error is directly wrapped within a [SyntacticError] when produced.
|
||||
//
|
||||
// The name of a duplicate JSON object member can be extracted as:
|
||||
//
|
||||
// err := ...
|
||||
// var serr jsontext.SyntacticError
|
||||
// if errors.As(err, &serr) && serr.Err == jsontext.ErrDuplicateName {
|
||||
// ptr := serr.JSONPointer // JSON pointer to duplicate name
|
||||
// name := ptr.LastToken() // duplicate name itself
|
||||
// ...
|
||||
// }
|
||||
//
|
||||
// This error is only returned if [AllowDuplicateNames] is false.
|
||||
var ErrDuplicateName = errors.New("duplicate object member name")
|
||||
|
||||
// ErrNonStringName indicates that a JSON token could not be
|
||||
// encoded or decoded because it is not a string,
|
||||
// as required for JSON object names according to RFC 8259, section 4.
|
||||
// This error is directly wrapped within a [SyntacticError] when produced.
|
||||
var ErrNonStringName = errors.New("object member name must be a string")
|
||||
|
||||
var (
|
||||
errMissingValue = errors.New("missing value after object name")
|
||||
errMismatchDelim = errors.New("mismatching structural token for object or array")
|
||||
errMaxDepth = errors.New("exceeded max depth")
|
||||
|
||||
errInvalidNamespace = errors.New("object namespace is in an invalid state")
|
||||
)
|
||||
|
||||
// Per RFC 8259, section 9, implementations may enforce a maximum depth.
|
||||
// Such a limit is necessary to prevent stack overflows.
|
||||
const maxNestingDepth = 10000
|
||||
|
||||
type state struct {
|
||||
// Tokens validates whether the next token kind is valid.
|
||||
Tokens stateMachine
|
||||
|
||||
// Names is a stack of object names.
|
||||
Names objectNameStack
|
||||
|
||||
// Namespaces is a stack of object namespaces.
|
||||
// For performance reasons, Encoder or Decoder may not update this
|
||||
// if Marshal or Unmarshal is able to track names in a more efficient way.
|
||||
// See makeMapArshaler and makeStructArshaler.
|
||||
// Not used if AllowDuplicateNames is true.
|
||||
Namespaces objectNamespaceStack
|
||||
}
|
||||
|
||||
// needObjectValue reports whether the next token should be an object value.
|
||||
// This method is used by [wrapSyntacticError].
|
||||
func (s *state) needObjectValue() bool {
|
||||
return s.Tokens.Last.needObjectValue()
|
||||
}
|
||||
|
||||
func (s *state) reset() {
|
||||
s.Tokens.reset()
|
||||
s.Names.reset()
|
||||
s.Namespaces.reset()
|
||||
}
|
||||
|
||||
// Pointer is a JSON Pointer (RFC 6901) that references a particular JSON value
|
||||
// relative to the root of the top-level JSON value.
|
||||
//
|
||||
// A Pointer is a slash-separated list of tokens, where each token is
|
||||
// either a JSON object name or an index to a JSON array element
|
||||
// encoded as a base-10 integer value.
|
||||
// It is impossible to distinguish between an array index and an object name
|
||||
// (that happens to be an base-10 encoded integer) without also knowing
|
||||
// the structure of the top-level JSON value that the pointer refers to.
|
||||
//
|
||||
// There is exactly one representation of a pointer to a particular value,
|
||||
// so comparability of Pointer values is equivalent to checking whether
|
||||
// they both point to the exact same value.
|
||||
type Pointer string
|
||||
|
||||
// IsValid reports whether p is a valid JSON Pointer according to RFC 6901.
|
||||
// Note that the concatenation of two valid pointers produces a valid pointer.
|
||||
func (p Pointer) IsValid() bool {
|
||||
for i, r := range p {
|
||||
switch {
|
||||
case r == '~' && (i+1 == len(p) || (p[i+1] != '0' && p[i+1] != '1')):
|
||||
return false // invalid escape
|
||||
case r == '\ufffd' && !strings.HasPrefix(string(p[i:]), "\ufffd"):
|
||||
return false // invalid UTF-8
|
||||
}
|
||||
}
|
||||
return len(p) == 0 || p[0] == '/'
|
||||
}
|
||||
|
||||
// Contains reports whether the JSON value that p points to
|
||||
// is equal to or contains the JSON value that pc points to.
|
||||
func (p Pointer) Contains(pc Pointer) bool {
|
||||
// Invariant: len(p) <= len(pc) if p.Contains(pc)
|
||||
suffix, ok := strings.CutPrefix(string(pc), string(p))
|
||||
return ok && (suffix == "" || suffix[0] == '/')
|
||||
}
|
||||
|
||||
// Parent strips off the last token and returns the remaining pointer.
|
||||
// The parent of an empty p is an empty string.
|
||||
func (p Pointer) Parent() Pointer {
|
||||
return p[:max(strings.LastIndexByte(string(p), '/'), 0)]
|
||||
}
|
||||
|
||||
// LastToken returns the last token in the pointer.
|
||||
// The last token of an empty p is an empty string.
|
||||
func (p Pointer) LastToken() string {
|
||||
last := p[max(strings.LastIndexByte(string(p), '/'), 0):]
|
||||
return unescapePointerToken(strings.TrimPrefix(string(last), "/"))
|
||||
}
|
||||
|
||||
// AppendToken appends a token to the end of p and returns the full pointer.
|
||||
func (p Pointer) AppendToken(tok string) Pointer {
|
||||
return Pointer(appendEscapePointerName([]byte(p+"/"), tok))
|
||||
}
|
||||
|
||||
// TODO: Add Pointer.AppendTokens,
|
||||
// but should this take in a ...string or an iter.Seq[string]?
|
||||
|
||||
// Tokens returns an iterator over the reference tokens in the JSON pointer,
|
||||
// starting from the first token until the last token (unless stopped early).
|
||||
func (p Pointer) Tokens() iter.Seq[string] {
|
||||
return func(yield func(string) bool) {
|
||||
for len(p) > 0 {
|
||||
p = Pointer(strings.TrimPrefix(string(p), "/"))
|
||||
i := min(uint(strings.IndexByte(string(p), '/')), uint(len(p)))
|
||||
if !yield(unescapePointerToken(string(p)[:i])) {
|
||||
return
|
||||
}
|
||||
p = p[i:]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func unescapePointerToken(token string) string {
|
||||
if strings.Contains(token, "~") {
|
||||
// Per RFC 6901, section 3, unescape '~' and '/' characters.
|
||||
token = strings.ReplaceAll(token, "~1", "/")
|
||||
token = strings.ReplaceAll(token, "~0", "~")
|
||||
}
|
||||
return token
|
||||
}
|
||||
|
||||
// appendStackPointer appends a JSON Pointer (RFC 6901) to the current value.
|
||||
//
|
||||
// - If where is -1, then it points to the previously processed token.
|
||||
//
|
||||
// - If where is 0, then it points to the parent JSON object or array,
|
||||
// or an object member if in-between an object member key and value.
|
||||
// This is useful when the position is ambiguous whether
|
||||
// we are interested in the previous or next token, or
|
||||
// when we are uncertain whether the next token
|
||||
// continues or terminates the current object or array.
|
||||
//
|
||||
// - If where is +1, then it points to the next expected value,
|
||||
// assuming that it continues the current JSON object or array.
|
||||
// As a special case, if the next token is a JSON object name,
|
||||
// then it points to the parent JSON object.
|
||||
//
|
||||
// Invariant: Must call s.names.copyQuotedBuffer beforehand.
|
||||
func (s state) appendStackPointer(b []byte, where int) []byte {
|
||||
var objectDepth int
|
||||
for i := 1; i < s.Tokens.Depth(); i++ {
|
||||
e := s.Tokens.index(i)
|
||||
arrayDelta := -1 // by default point to previous array element
|
||||
if isLast := i == s.Tokens.Depth()-1; isLast {
|
||||
switch {
|
||||
case where < 0 && e.Length() == 0 || where == 0 && !e.needObjectValue() || where > 0 && e.NeedObjectName():
|
||||
return b
|
||||
case where > 0 && e.isArray():
|
||||
arrayDelta = 0 // point to next array element
|
||||
}
|
||||
}
|
||||
switch {
|
||||
case e.isObject():
|
||||
b = appendEscapePointerName(append(b, '/'), s.Names.getUnquoted(objectDepth))
|
||||
objectDepth++
|
||||
case e.isArray():
|
||||
b = strconv.AppendUint(append(b, '/'), uint64(e.Length()+int64(arrayDelta)), 10)
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func appendEscapePointerName[Bytes ~[]byte | ~string](b []byte, name Bytes) []byte {
|
||||
for _, r := range string(name) {
|
||||
// Per RFC 6901, section 3, escape '~' and '/' characters.
|
||||
switch r {
|
||||
case '~':
|
||||
b = append(b, "~0"...)
|
||||
case '/':
|
||||
b = append(b, "~1"...)
|
||||
default:
|
||||
b = utf8.AppendRune(b, r)
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// stateMachine is a push-down automaton that validates whether
|
||||
// a sequence of tokens is valid or not according to the JSON grammar.
|
||||
// It is useful for both encoding and decoding.
|
||||
//
|
||||
// It is a stack where each entry represents a nested JSON object or array.
|
||||
// The stack has a minimum depth of 1 where the first level is a
|
||||
// virtual JSON array to handle a stream of top-level JSON values.
|
||||
// The top-level virtual JSON array is special in that it doesn't require commas
|
||||
// between each JSON value.
|
||||
//
|
||||
// For performance, most methods are carefully written to be inlinable.
|
||||
// The zero value is a valid state machine ready for use.
|
||||
type stateMachine struct {
|
||||
Stack []stateEntry
|
||||
Last stateEntry
|
||||
}
|
||||
|
||||
// reset resets the state machine.
|
||||
// The machine always starts with a minimum depth of 1.
|
||||
func (m *stateMachine) reset() {
|
||||
m.Stack = m.Stack[:0]
|
||||
if cap(m.Stack) > 1<<10 {
|
||||
m.Stack = nil
|
||||
}
|
||||
m.Last = stateTypeArray
|
||||
}
|
||||
|
||||
// Depth is the current nested depth of JSON objects and arrays.
|
||||
// It is one-indexed (i.e., top-level values have a depth of 1).
|
||||
func (m stateMachine) Depth() int {
|
||||
return len(m.Stack) + 1
|
||||
}
|
||||
|
||||
// index returns a reference to the ith entry.
|
||||
// It is only valid until the next push method call.
|
||||
func (m *stateMachine) index(i int) *stateEntry {
|
||||
if i == len(m.Stack) {
|
||||
return &m.Last
|
||||
}
|
||||
return &m.Stack[i]
|
||||
}
|
||||
|
||||
// DepthLength reports the current nested depth and
|
||||
// the length of the last JSON object or array.
|
||||
func (m stateMachine) DepthLength() (int, int64) {
|
||||
return m.Depth(), m.Last.Length()
|
||||
}
|
||||
|
||||
// appendLiteral appends a JSON literal as the next token in the sequence.
|
||||
// If an error is returned, the state is not mutated.
|
||||
func (m *stateMachine) appendLiteral() error {
|
||||
switch {
|
||||
case m.Last.NeedObjectName():
|
||||
return ErrNonStringName
|
||||
case !m.Last.isValidNamespace():
|
||||
return errInvalidNamespace
|
||||
default:
|
||||
m.Last.Increment()
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// appendString appends a JSON string as the next token in the sequence.
|
||||
// If an error is returned, the state is not mutated.
|
||||
func (m *stateMachine) appendString() error {
|
||||
switch {
|
||||
case !m.Last.isValidNamespace():
|
||||
return errInvalidNamespace
|
||||
default:
|
||||
m.Last.Increment()
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// appendNumber appends a JSON number as the next token in the sequence.
|
||||
// If an error is returned, the state is not mutated.
|
||||
func (m *stateMachine) appendNumber() error {
|
||||
return m.appendLiteral()
|
||||
}
|
||||
|
||||
// pushObject appends a JSON begin object token as next in the sequence.
|
||||
// If an error is returned, the state is not mutated.
|
||||
func (m *stateMachine) pushObject() error {
|
||||
switch {
|
||||
case m.Last.NeedObjectName():
|
||||
return ErrNonStringName
|
||||
case !m.Last.isValidNamespace():
|
||||
return errInvalidNamespace
|
||||
case len(m.Stack) == maxNestingDepth:
|
||||
return errMaxDepth
|
||||
default:
|
||||
m.Last.Increment()
|
||||
m.Stack = append(m.Stack, m.Last)
|
||||
m.Last = stateTypeObject
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// popObject appends a JSON end object token as next in the sequence.
|
||||
// If an error is returned, the state is not mutated.
|
||||
func (m *stateMachine) popObject() error {
|
||||
switch {
|
||||
case !m.Last.isObject():
|
||||
return errMismatchDelim
|
||||
case m.Last.needObjectValue():
|
||||
return errMissingValue
|
||||
case !m.Last.isValidNamespace():
|
||||
return errInvalidNamespace
|
||||
default:
|
||||
m.Last = m.Stack[len(m.Stack)-1]
|
||||
m.Stack = m.Stack[:len(m.Stack)-1]
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// pushArray appends a JSON begin array token as next in the sequence.
|
||||
// If an error is returned, the state is not mutated.
|
||||
func (m *stateMachine) pushArray() error {
|
||||
switch {
|
||||
case m.Last.NeedObjectName():
|
||||
return ErrNonStringName
|
||||
case !m.Last.isValidNamespace():
|
||||
return errInvalidNamespace
|
||||
case len(m.Stack) == maxNestingDepth:
|
||||
return errMaxDepth
|
||||
default:
|
||||
m.Last.Increment()
|
||||
m.Stack = append(m.Stack, m.Last)
|
||||
m.Last = stateTypeArray
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// popArray appends a JSON end array token as next in the sequence.
|
||||
// If an error is returned, the state is not mutated.
|
||||
func (m *stateMachine) popArray() error {
|
||||
switch {
|
||||
case !m.Last.isArray() || len(m.Stack) == 0: // forbid popping top-level virtual JSON array
|
||||
return errMismatchDelim
|
||||
case !m.Last.isValidNamespace():
|
||||
return errInvalidNamespace
|
||||
default:
|
||||
m.Last = m.Stack[len(m.Stack)-1]
|
||||
m.Stack = m.Stack[:len(m.Stack)-1]
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// NeedIndent reports whether indent whitespace should be injected.
|
||||
// A zero value means that no whitespace should be injected.
|
||||
// A positive value means '\n', indentPrefix, and (n-1) copies of indentBody
|
||||
// should be appended to the output immediately before the next token.
|
||||
func (m stateMachine) NeedIndent(next Kind) (n int) {
|
||||
willEnd := next == '}' || next == ']'
|
||||
switch {
|
||||
case m.Depth() == 1:
|
||||
return 0 // top-level values are never indented
|
||||
case m.Last.Length() == 0 && willEnd:
|
||||
return 0 // an empty object or array is never indented
|
||||
case m.Last.Length() == 0 || m.Last.needImplicitComma(next):
|
||||
return m.Depth()
|
||||
case willEnd:
|
||||
return m.Depth() - 1
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// MayAppendDelim appends a colon or comma that may precede the next token.
|
||||
func (m stateMachine) MayAppendDelim(b []byte, next Kind) []byte {
|
||||
switch {
|
||||
case m.Last.needImplicitColon():
|
||||
return append(b, ':')
|
||||
case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
|
||||
return append(b, ',')
|
||||
default:
|
||||
return b
|
||||
}
|
||||
}
|
||||
|
||||
// needDelim reports whether a colon or comma token should be implicitly emitted
|
||||
// before the next token of the specified kind.
|
||||
// A zero value means no delimiter should be emitted.
|
||||
func (m stateMachine) needDelim(next Kind) (delim byte) {
|
||||
switch {
|
||||
case m.Last.needImplicitColon():
|
||||
return ':'
|
||||
case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
|
||||
return ','
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// InvalidateDisabledNamespaces marks all disabled namespaces as invalid.
|
||||
//
|
||||
// For efficiency, Marshal and Unmarshal may disable namespaces since there are
|
||||
// more efficient ways to track duplicate names. However, if an error occurs,
|
||||
// the namespaces in Encoder or Decoder will be left in an inconsistent state.
|
||||
// Mark the namespaces as invalid so that future method calls on
|
||||
// Encoder or Decoder will return an error.
|
||||
func (m *stateMachine) InvalidateDisabledNamespaces() {
|
||||
for i := range m.Depth() {
|
||||
e := m.index(i)
|
||||
if !e.isActiveNamespace() {
|
||||
e.invalidateNamespace()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// stateEntry encodes several artifacts within a single unsigned integer:
|
||||
// - whether this represents a JSON object or array,
|
||||
// - whether this object should check for duplicate names, and
|
||||
// - how many elements are in this JSON object or array.
|
||||
type stateEntry uint64
|
||||
|
||||
const (
|
||||
// The type mask (1 bit) records whether this is a JSON object or array.
|
||||
stateTypeMask stateEntry = 0x8000_0000_0000_0000
|
||||
stateTypeObject stateEntry = 0x8000_0000_0000_0000
|
||||
stateTypeArray stateEntry = 0x0000_0000_0000_0000
|
||||
|
||||
// The name check mask (2 bit) records whether to update
|
||||
// the namespaces for the current JSON object and
|
||||
// whether the namespace is valid.
|
||||
stateNamespaceMask stateEntry = 0x6000_0000_0000_0000
|
||||
stateDisableNamespace stateEntry = 0x4000_0000_0000_0000
|
||||
stateInvalidNamespace stateEntry = 0x2000_0000_0000_0000
|
||||
|
||||
// The count mask (61 bits) records the number of elements.
|
||||
stateCountMask stateEntry = 0x1fff_ffff_ffff_ffff
|
||||
stateCountLSBMask stateEntry = 0x0000_0000_0000_0001
|
||||
stateCountOdd stateEntry = 0x0000_0000_0000_0001
|
||||
stateCountEven stateEntry = 0x0000_0000_0000_0000
|
||||
)
|
||||
|
||||
// Length reports the number of elements in the JSON object or array.
|
||||
// Each name and value in an object entry is treated as a separate element.
|
||||
func (e stateEntry) Length() int64 {
|
||||
return int64(e & stateCountMask)
|
||||
}
|
||||
|
||||
// isObject reports whether this is a JSON object.
|
||||
func (e stateEntry) isObject() bool {
|
||||
return e&stateTypeMask == stateTypeObject
|
||||
}
|
||||
|
||||
// isArray reports whether this is a JSON array.
|
||||
func (e stateEntry) isArray() bool {
|
||||
return e&stateTypeMask == stateTypeArray
|
||||
}
|
||||
|
||||
// NeedObjectName reports whether the next token must be a JSON string,
|
||||
// which is necessary for JSON object names.
|
||||
func (e stateEntry) NeedObjectName() bool {
|
||||
return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven
|
||||
}
|
||||
|
||||
// needImplicitColon reports whether an colon should occur next,
|
||||
// which always occurs after JSON object names.
|
||||
func (e stateEntry) needImplicitColon() bool {
|
||||
return e.needObjectValue()
|
||||
}
|
||||
|
||||
// needObjectValue reports whether the next token must be a JSON value,
|
||||
// which is necessary after every JSON object name.
|
||||
func (e stateEntry) needObjectValue() bool {
|
||||
return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountOdd
|
||||
}
|
||||
|
||||
// needImplicitComma reports whether an comma should occur next,
|
||||
// which always occurs after a value in a JSON object or array
|
||||
// before the next value (or name).
|
||||
func (e stateEntry) needImplicitComma(next Kind) bool {
|
||||
return !e.needObjectValue() && e.Length() > 0 && next != '}' && next != ']'
|
||||
}
|
||||
|
||||
// Increment increments the number of elements for the current object or array.
|
||||
// This assumes that overflow won't practically be an issue since
|
||||
// 1<<bits.OnesCount(stateCountMask) is sufficiently large.
|
||||
func (e *stateEntry) Increment() {
|
||||
(*e)++
|
||||
}
|
||||
|
||||
// decrement decrements the number of elements for the current object or array.
|
||||
// It is the callers responsibility to ensure that e.length > 0.
|
||||
func (e *stateEntry) decrement() {
|
||||
(*e)--
|
||||
}
|
||||
|
||||
// DisableNamespace disables the JSON object namespace such that the
|
||||
// Encoder or Decoder no longer updates the namespace.
|
||||
func (e *stateEntry) DisableNamespace() {
|
||||
*e |= stateDisableNamespace
|
||||
}
|
||||
|
||||
// isActiveNamespace reports whether the JSON object namespace is actively
|
||||
// being updated and used for duplicate name checks.
|
||||
func (e stateEntry) isActiveNamespace() bool {
|
||||
return e&(stateDisableNamespace) == 0
|
||||
}
|
||||
|
||||
// invalidateNamespace marks the JSON object namespace as being invalid.
|
||||
func (e *stateEntry) invalidateNamespace() {
|
||||
*e |= stateInvalidNamespace
|
||||
}
|
||||
|
||||
// isValidNamespace reports whether the JSON object namespace is valid.
|
||||
func (e stateEntry) isValidNamespace() bool {
|
||||
return e&(stateInvalidNamespace) == 0
|
||||
}
|
||||
|
||||
// objectNameStack is a stack of names when descending into a JSON object.
|
||||
// In contrast to objectNamespaceStack, this only has to remember a single name
|
||||
// per JSON object.
|
||||
//
|
||||
// This data structure may contain offsets to encodeBuffer or decodeBuffer.
|
||||
// It violates clean abstraction of layers, but is significantly more efficient.
|
||||
// This ensures that popping and pushing in the common case is a trivial
|
||||
// push/pop of an offset integer.
|
||||
//
|
||||
// The zero value is an empty names stack ready for use.
|
||||
type objectNameStack struct {
|
||||
// offsets is a stack of offsets for each name.
|
||||
// A non-negative offset is the ending offset into the local names buffer.
|
||||
// A negative offset is the bit-wise inverse of a starting offset into
|
||||
// a remote buffer (e.g., encodeBuffer or decodeBuffer).
|
||||
// A math.MinInt offset at the end implies that the last object is empty.
|
||||
// Invariant: Positive offsets always occur before negative offsets.
|
||||
offsets []int
|
||||
// unquotedNames is a back-to-back concatenation of names.
|
||||
unquotedNames []byte
|
||||
}
|
||||
|
||||
func (ns *objectNameStack) reset() {
|
||||
ns.offsets = ns.offsets[:0]
|
||||
ns.unquotedNames = ns.unquotedNames[:0]
|
||||
if cap(ns.offsets) > 1<<6 {
|
||||
ns.offsets = nil // avoid pinning arbitrarily large amounts of memory
|
||||
}
|
||||
if cap(ns.unquotedNames) > 1<<10 {
|
||||
ns.unquotedNames = nil // avoid pinning arbitrarily large amounts of memory
|
||||
}
|
||||
}
|
||||
|
||||
func (ns *objectNameStack) length() int {
|
||||
return len(ns.offsets)
|
||||
}
|
||||
|
||||
// getUnquoted retrieves the ith unquoted name in the stack.
|
||||
// It returns an empty string if the last object is empty.
|
||||
//
|
||||
// Invariant: Must call copyQuotedBuffer beforehand.
|
||||
func (ns *objectNameStack) getUnquoted(i int) []byte {
|
||||
ns.ensureCopiedBuffer()
|
||||
if i == 0 {
|
||||
return ns.unquotedNames[:ns.offsets[0]]
|
||||
} else {
|
||||
return ns.unquotedNames[ns.offsets[i-1]:ns.offsets[i-0]]
|
||||
}
|
||||
}
|
||||
|
||||
// invalidOffset indicates that the last JSON object currently has no name.
|
||||
const invalidOffset = math.MinInt
|
||||
|
||||
// push descends into a nested JSON object.
|
||||
func (ns *objectNameStack) push() {
|
||||
ns.offsets = append(ns.offsets, invalidOffset)
|
||||
}
|
||||
|
||||
// ReplaceLastQuotedOffset replaces the last name with the starting offset
|
||||
// to the quoted name in some remote buffer. All offsets provided must be
|
||||
// relative to the same buffer until copyQuotedBuffer is called.
|
||||
func (ns *objectNameStack) ReplaceLastQuotedOffset(i int) {
|
||||
// Use bit-wise inversion instead of naive multiplication by -1 to avoid
|
||||
// ambiguity regarding zero (which is a valid offset into the names field).
|
||||
// Bit-wise inversion is mathematically equivalent to -i-1,
|
||||
// such that 0 becomes -1, 1 becomes -2, and so forth.
|
||||
// This ensures that remote offsets are always negative.
|
||||
ns.offsets[len(ns.offsets)-1] = ^i
|
||||
}
|
||||
|
||||
// replaceLastUnquotedName replaces the last name with the provided name.
|
||||
//
|
||||
// Invariant: Must call copyQuotedBuffer beforehand.
|
||||
func (ns *objectNameStack) replaceLastUnquotedName(s string) {
|
||||
ns.ensureCopiedBuffer()
|
||||
var startOffset int
|
||||
if len(ns.offsets) > 1 {
|
||||
startOffset = ns.offsets[len(ns.offsets)-2]
|
||||
}
|
||||
ns.unquotedNames = append(ns.unquotedNames[:startOffset], s...)
|
||||
ns.offsets[len(ns.offsets)-1] = len(ns.unquotedNames)
|
||||
}
|
||||
|
||||
// clearLast removes any name in the last JSON object.
|
||||
// It is semantically equivalent to ns.push followed by ns.pop.
|
||||
func (ns *objectNameStack) clearLast() {
|
||||
ns.offsets[len(ns.offsets)-1] = invalidOffset
|
||||
}
|
||||
|
||||
// pop ascends out of a nested JSON object.
|
||||
func (ns *objectNameStack) pop() {
|
||||
ns.offsets = ns.offsets[:len(ns.offsets)-1]
|
||||
}
|
||||
|
||||
// copyQuotedBuffer copies names from the remote buffer into the local names
|
||||
// buffer so that there are no more offset references into the remote buffer.
|
||||
// This allows the remote buffer to change contents without affecting
|
||||
// the names that this data structure is trying to remember.
|
||||
func (ns *objectNameStack) copyQuotedBuffer(b []byte) {
|
||||
// Find the first negative offset.
|
||||
var i int
|
||||
for i = len(ns.offsets) - 1; i >= 0 && ns.offsets[i] < 0; i-- {
|
||||
continue
|
||||
}
|
||||
|
||||
// Copy each name from the remote buffer into the local buffer.
|
||||
for i = i + 1; i < len(ns.offsets); i++ {
|
||||
if i == len(ns.offsets)-1 && ns.offsets[i] == invalidOffset {
|
||||
if i == 0 {
|
||||
ns.offsets[i] = 0
|
||||
} else {
|
||||
ns.offsets[i] = ns.offsets[i-1]
|
||||
}
|
||||
break // last JSON object had a push without any names
|
||||
}
|
||||
|
||||
// As a form of Hyrum proofing, we write an invalid character into the
|
||||
// buffer to make misuse of Decoder.ReadToken more obvious.
|
||||
// We need to undo that mutation here.
|
||||
quotedName := b[^ns.offsets[i]:]
|
||||
if quotedName[0] == invalidateBufferByte {
|
||||
quotedName[0] = '"'
|
||||
}
|
||||
|
||||
// Append the unquoted name to the local buffer.
|
||||
var startOffset int
|
||||
if i > 0 {
|
||||
startOffset = ns.offsets[i-1]
|
||||
}
|
||||
if n := jsonwire.ConsumeSimpleString(quotedName); n > 0 {
|
||||
ns.unquotedNames = append(ns.unquotedNames[:startOffset], quotedName[len(`"`):n-len(`"`)]...)
|
||||
} else {
|
||||
ns.unquotedNames, _ = jsonwire.AppendUnquote(ns.unquotedNames[:startOffset], quotedName)
|
||||
}
|
||||
ns.offsets[i] = len(ns.unquotedNames)
|
||||
}
|
||||
}
|
||||
|
||||
func (ns *objectNameStack) ensureCopiedBuffer() {
|
||||
if len(ns.offsets) > 0 && ns.offsets[len(ns.offsets)-1] < 0 {
|
||||
panic("BUG: copyQuotedBuffer not called beforehand")
|
||||
}
|
||||
}
|
||||
|
||||
// objectNamespaceStack is a stack of object namespaces.
|
||||
// This data structure assists in detecting duplicate names.
|
||||
type objectNamespaceStack []objectNamespace
|
||||
|
||||
// reset resets the object namespace stack.
|
||||
func (nss *objectNamespaceStack) reset() {
|
||||
if cap(*nss) > 1<<10 {
|
||||
*nss = nil
|
||||
}
|
||||
*nss = (*nss)[:0]
|
||||
}
|
||||
|
||||
// push starts a new namespace for a nested JSON object.
|
||||
func (nss *objectNamespaceStack) push() {
|
||||
if cap(*nss) > len(*nss) {
|
||||
*nss = (*nss)[:len(*nss)+1]
|
||||
nss.Last().reset()
|
||||
} else {
|
||||
*nss = append(*nss, objectNamespace{})
|
||||
}
|
||||
}
|
||||
|
||||
// Last returns a pointer to the last JSON object namespace.
|
||||
func (nss objectNamespaceStack) Last() *objectNamespace {
|
||||
return &nss[len(nss)-1]
|
||||
}
|
||||
|
||||
// pop terminates the namespace for a nested JSON object.
|
||||
func (nss *objectNamespaceStack) pop() {
|
||||
*nss = (*nss)[:len(*nss)-1]
|
||||
}
|
||||
|
||||
// objectNamespace is the namespace for a JSON object.
|
||||
// In contrast to objectNameStack, this needs to remember a all names
|
||||
// per JSON object.
|
||||
//
|
||||
// The zero value is an empty namespace ready for use.
|
||||
type objectNamespace struct {
|
||||
// It relies on a linear search over all the names before switching
|
||||
// to use a Go map for direct lookup.
|
||||
|
||||
// endOffsets is a list of offsets to the end of each name in buffers.
|
||||
// The length of offsets is the number of names in the namespace.
|
||||
endOffsets []uint
|
||||
// allUnquotedNames is a back-to-back concatenation of every name in the namespace.
|
||||
allUnquotedNames []byte
|
||||
// mapNames is a Go map containing every name in the namespace.
|
||||
// Only valid if non-nil.
|
||||
mapNames map[string]struct{}
|
||||
}
|
||||
|
||||
// reset resets the namespace to be empty.
|
||||
func (ns *objectNamespace) reset() {
|
||||
ns.endOffsets = ns.endOffsets[:0]
|
||||
ns.allUnquotedNames = ns.allUnquotedNames[:0]
|
||||
ns.mapNames = nil
|
||||
if cap(ns.endOffsets) > 1<<6 {
|
||||
ns.endOffsets = nil // avoid pinning arbitrarily large amounts of memory
|
||||
}
|
||||
if cap(ns.allUnquotedNames) > 1<<10 {
|
||||
ns.allUnquotedNames = nil // avoid pinning arbitrarily large amounts of memory
|
||||
}
|
||||
}
|
||||
|
||||
// length reports the number of names in the namespace.
|
||||
func (ns *objectNamespace) length() int {
|
||||
return len(ns.endOffsets)
|
||||
}
|
||||
|
||||
// getUnquoted retrieves the ith unquoted name in the namespace.
|
||||
func (ns *objectNamespace) getUnquoted(i int) []byte {
|
||||
if i == 0 {
|
||||
return ns.allUnquotedNames[:ns.endOffsets[0]]
|
||||
} else {
|
||||
return ns.allUnquotedNames[ns.endOffsets[i-1]:ns.endOffsets[i-0]]
|
||||
}
|
||||
}
|
||||
|
||||
// lastUnquoted retrieves the last name in the namespace.
|
||||
func (ns *objectNamespace) lastUnquoted() []byte {
|
||||
return ns.getUnquoted(ns.length() - 1)
|
||||
}
|
||||
|
||||
// insertQuoted inserts a name and reports whether it was inserted,
|
||||
// which only occurs if name is not already in the namespace.
|
||||
// The provided name must be a valid JSON string.
|
||||
func (ns *objectNamespace) insertQuoted(name []byte, isVerbatim bool) bool {
|
||||
if isVerbatim {
|
||||
name = name[len(`"`) : len(name)-len(`"`)]
|
||||
}
|
||||
return ns.insert(name, !isVerbatim)
|
||||
}
|
||||
func (ns *objectNamespace) InsertUnquoted(name []byte) bool {
|
||||
return ns.insert(name, false)
|
||||
}
|
||||
func (ns *objectNamespace) insert(name []byte, quoted bool) bool {
|
||||
var allNames []byte
|
||||
if quoted {
|
||||
allNames, _ = jsonwire.AppendUnquote(ns.allUnquotedNames, name)
|
||||
} else {
|
||||
allNames = append(ns.allUnquotedNames, name...)
|
||||
}
|
||||
name = allNames[len(ns.allUnquotedNames):]
|
||||
|
||||
// Switch to a map if the buffer is too large for linear search.
|
||||
// This does not add the current name to the map.
|
||||
if ns.mapNames == nil && (ns.length() > 64 || len(ns.allUnquotedNames) > 1024) {
|
||||
ns.mapNames = make(map[string]struct{})
|
||||
var startOffset uint
|
||||
for _, endOffset := range ns.endOffsets {
|
||||
name := ns.allUnquotedNames[startOffset:endOffset]
|
||||
ns.mapNames[string(name)] = struct{}{} // allocates a new string
|
||||
startOffset = endOffset
|
||||
}
|
||||
}
|
||||
|
||||
if ns.mapNames == nil {
|
||||
// Perform linear search over the buffer to find matching names.
|
||||
// It provides O(n) lookup, but does not require any allocations.
|
||||
var startOffset uint
|
||||
for _, endOffset := range ns.endOffsets {
|
||||
if string(ns.allUnquotedNames[startOffset:endOffset]) == string(name) {
|
||||
return false
|
||||
}
|
||||
startOffset = endOffset
|
||||
}
|
||||
} else {
|
||||
// Use the map if it is populated.
|
||||
// It provides O(1) lookup, but requires a string allocation per name.
|
||||
if _, ok := ns.mapNames[string(name)]; ok {
|
||||
return false
|
||||
}
|
||||
ns.mapNames[string(name)] = struct{}{} // allocates a new string
|
||||
}
|
||||
|
||||
ns.allUnquotedNames = allNames
|
||||
ns.endOffsets = append(ns.endOffsets, uint(len(ns.allUnquotedNames)))
|
||||
return true
|
||||
}
|
||||
|
||||
// removeLast removes the last name in the namespace.
|
||||
func (ns *objectNamespace) removeLast() {
|
||||
if ns.mapNames != nil {
|
||||
delete(ns.mapNames, string(ns.lastUnquoted()))
|
||||
}
|
||||
if ns.length()-1 == 0 {
|
||||
ns.endOffsets = ns.endOffsets[:0]
|
||||
ns.allUnquotedNames = ns.allUnquotedNames[:0]
|
||||
} else {
|
||||
ns.endOffsets = ns.endOffsets[:ns.length()-1]
|
||||
ns.allUnquotedNames = ns.allUnquotedNames[:ns.endOffsets[ns.length()-1]]
|
||||
}
|
||||
}
|
||||
@@ -1,527 +0,0 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
"next.orly.dev/pkg/json/internal/jsonflags"
|
||||
"next.orly.dev/pkg/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
// NOTE: Token is analogous to v1 json.Token.
|
||||
|
||||
const (
|
||||
maxInt64 = math.MaxInt64
|
||||
minInt64 = math.MinInt64
|
||||
maxUint64 = math.MaxUint64
|
||||
minUint64 = 0 // for consistency and readability purposes
|
||||
|
||||
invalidTokenPanic = "invalid jsontext.Token; it has been voided by a subsequent json.Decoder call"
|
||||
)
|
||||
|
||||
var errInvalidToken = errors.New("invalid jsontext.Token")
|
||||
|
||||
// Token represents a lexical JSON token, which may be one of the following:
|
||||
// - a JSON literal (i.e., null, true, or false)
|
||||
// - a JSON string (e.g., "hello, world!")
|
||||
// - a JSON number (e.g., 123.456)
|
||||
// - a begin or end delimiter for a JSON object (i.e., { or } )
|
||||
// - a begin or end delimiter for a JSON array (i.e., [ or ] )
|
||||
//
|
||||
// A Token cannot represent entire array or object values, while a [Value] can.
|
||||
// There is no Token to represent commas and colons since
|
||||
// these structural tokens can be inferred from the surrounding context.
|
||||
type Token struct {
|
||||
nonComparable
|
||||
|
||||
// Tokens can exist in either a "raw" or an "exact" form.
|
||||
// Tokens produced by the Decoder are in the "raw" form.
|
||||
// Tokens returned by constructors are usually in the "exact" form.
|
||||
// The Encoder accepts Tokens in either the "raw" or "exact" form.
|
||||
//
|
||||
// The following chart shows the possible values for each Token type:
|
||||
// ╔═════════════════╦════════════╤════════════╤════════════╗
|
||||
// ║ Token type ║ raw field │ str field │ num field ║
|
||||
// ╠═════════════════╬════════════╪════════════╪════════════╣
|
||||
// ║ null (raw) ║ "null" │ "" │ 0 ║
|
||||
// ║ false (raw) ║ "false" │ "" │ 0 ║
|
||||
// ║ true (raw) ║ "true" │ "" │ 0 ║
|
||||
// ║ string (raw) ║ non-empty │ "" │ offset ║
|
||||
// ║ string (string) ║ nil │ non-empty │ 0 ║
|
||||
// ║ number (raw) ║ non-empty │ "" │ offset ║
|
||||
// ║ number (float) ║ nil │ "f" │ non-zero ║
|
||||
// ║ number (int64) ║ nil │ "i" │ non-zero ║
|
||||
// ║ number (uint64) ║ nil │ "u" │ non-zero ║
|
||||
// ║ object (delim) ║ "{" or "}" │ "" │ 0 ║
|
||||
// ║ array (delim) ║ "[" or "]" │ "" │ 0 ║
|
||||
// ╚═════════════════╩════════════╧════════════╧════════════╝
|
||||
//
|
||||
// Notes:
|
||||
// - For tokens stored in "raw" form, the num field contains the
|
||||
// absolute offset determined by raw.previousOffsetStart().
|
||||
// The buffer itself is stored in raw.previousBuffer().
|
||||
// - JSON literals and structural characters are always in the "raw" form.
|
||||
// - JSON strings and numbers can be in either "raw" or "exact" forms.
|
||||
// - The exact zero value of JSON strings and numbers in the "exact" forms
|
||||
// have ambiguous representation. Thus, they are always represented
|
||||
// in the "raw" form.
|
||||
|
||||
// raw contains a reference to the raw decode buffer.
|
||||
// If non-nil, then its value takes precedence over str and num.
|
||||
// It is only valid if num == raw.previousOffsetStart().
|
||||
raw *decodeBuffer
|
||||
|
||||
// str is the unescaped JSON string if num is zero.
|
||||
// Otherwise, it is "f", "i", or "u" if num should be interpreted
|
||||
// as a float64, int64, or uint64, respectively.
|
||||
str string
|
||||
|
||||
// num is a float64, int64, or uint64 stored as a uint64 value.
|
||||
// It is non-zero for any JSON number in the "exact" form.
|
||||
num uint64
|
||||
}
|
||||
|
||||
// TODO: Does representing 1-byte delimiters as *decodeBuffer cause performance issues?
|
||||
|
||||
var (
|
||||
Null Token = rawToken("null")
|
||||
False Token = rawToken("false")
|
||||
True Token = rawToken("true")
|
||||
|
||||
BeginObject Token = rawToken("{")
|
||||
EndObject Token = rawToken("}")
|
||||
BeginArray Token = rawToken("[")
|
||||
EndArray Token = rawToken("]")
|
||||
|
||||
zeroString Token = rawToken(`""`)
|
||||
zeroNumber Token = rawToken(`0`)
|
||||
|
||||
nanString Token = String("NaN")
|
||||
pinfString Token = String("Infinity")
|
||||
ninfString Token = String("-Infinity")
|
||||
)
|
||||
|
||||
func rawToken(s string) Token {
|
||||
return Token{raw: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}}
|
||||
}
|
||||
|
||||
// Bool constructs a Token representing a JSON boolean.
|
||||
func Bool(b bool) Token {
|
||||
if b {
|
||||
return True
|
||||
}
|
||||
return False
|
||||
}
|
||||
|
||||
// String constructs a Token representing a JSON string.
|
||||
// The provided string should contain valid UTF-8, otherwise invalid characters
|
||||
// may be mangled as the Unicode replacement character.
|
||||
func String(s string) Token {
|
||||
if len(s) == 0 {
|
||||
return zeroString
|
||||
}
|
||||
return Token{str: s}
|
||||
}
|
||||
|
||||
// Float constructs a Token representing a JSON number.
|
||||
// The values NaN, +Inf, and -Inf will be represented
|
||||
// as a JSON string with the values "NaN", "Infinity", and "-Infinity".
|
||||
func Float(n float64) Token {
|
||||
switch {
|
||||
case math.Float64bits(n) == 0:
|
||||
return zeroNumber
|
||||
case math.IsNaN(n):
|
||||
return nanString
|
||||
case math.IsInf(n, +1):
|
||||
return pinfString
|
||||
case math.IsInf(n, -1):
|
||||
return ninfString
|
||||
}
|
||||
return Token{str: "f", num: math.Float64bits(n)}
|
||||
}
|
||||
|
||||
// Int constructs a Token representing a JSON number from an int64.
|
||||
func Int(n int64) Token {
|
||||
if n == 0 {
|
||||
return zeroNumber
|
||||
}
|
||||
return Token{str: "i", num: uint64(n)}
|
||||
}
|
||||
|
||||
// Uint constructs a Token representing a JSON number from a uint64.
|
||||
func Uint(n uint64) Token {
|
||||
if n == 0 {
|
||||
return zeroNumber
|
||||
}
|
||||
return Token{str: "u", num: uint64(n)}
|
||||
}
|
||||
|
||||
// Clone makes a copy of the Token such that its value remains valid
|
||||
// even after a subsequent [Decoder.Read] call.
|
||||
func (t Token) Clone() Token {
|
||||
// TODO: Allow caller to avoid any allocations?
|
||||
if raw := t.raw; raw != nil {
|
||||
// Avoid copying globals.
|
||||
if t.raw.prevStart == 0 {
|
||||
switch t.raw {
|
||||
case Null.raw:
|
||||
return Null
|
||||
case False.raw:
|
||||
return False
|
||||
case True.raw:
|
||||
return True
|
||||
case BeginObject.raw:
|
||||
return BeginObject
|
||||
case EndObject.raw:
|
||||
return EndObject
|
||||
case BeginArray.raw:
|
||||
return BeginArray
|
||||
case EndArray.raw:
|
||||
return EndArray
|
||||
}
|
||||
}
|
||||
|
||||
if uint64(raw.previousOffsetStart()) != t.num {
|
||||
panic(invalidTokenPanic)
|
||||
}
|
||||
buf := bytes.Clone(raw.previousBuffer())
|
||||
return Token{raw: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}}
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// Bool returns the value for a JSON boolean.
|
||||
// It panics if the token kind is not a JSON boolean.
|
||||
func (t Token) Bool() bool {
|
||||
switch t.raw {
|
||||
case True.raw:
|
||||
return true
|
||||
case False.raw:
|
||||
return false
|
||||
default:
|
||||
panic("invalid JSON token kind: " + t.Kind().String())
|
||||
}
|
||||
}
|
||||
|
||||
// appendString appends a JSON string to dst and returns it.
|
||||
// It panics if t is not a JSON string.
|
||||
func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
|
||||
if raw := t.raw; raw != nil {
|
||||
// Handle raw string value.
|
||||
buf := raw.previousBuffer()
|
||||
if Kind(buf[0]) == '"' {
|
||||
if jsonwire.ConsumeSimpleString(buf) == len(buf) {
|
||||
return append(dst, buf...), nil
|
||||
}
|
||||
dst, _, err := jsonwire.ReformatString(dst, buf, flags)
|
||||
return dst, err
|
||||
}
|
||||
} else if len(t.str) != 0 && t.num == 0 {
|
||||
// Handle exact string value.
|
||||
return jsonwire.AppendQuote(dst, t.str, flags)
|
||||
}
|
||||
|
||||
panic("invalid JSON token kind: " + t.Kind().String())
|
||||
}
|
||||
|
||||
// String returns the unescaped string value for a JSON string.
|
||||
// For other JSON kinds, this returns the raw JSON representation.
|
||||
func (t Token) String() string {
|
||||
// This is inlinable to take advantage of "function outlining".
|
||||
// This avoids an allocation for the string(b) conversion
|
||||
// if the caller does not use the string in an escaping manner.
|
||||
// See https://blog.filippo.io/efficient-go-apis-with-the-inliner/
|
||||
s, b := t.string()
|
||||
if len(b) > 0 {
|
||||
return string(b)
|
||||
}
|
||||
return s
|
||||
}
|
||||
func (t Token) string() (string, []byte) {
|
||||
if raw := t.raw; raw != nil {
|
||||
if uint64(raw.previousOffsetStart()) != t.num {
|
||||
panic(invalidTokenPanic)
|
||||
}
|
||||
buf := raw.previousBuffer()
|
||||
if buf[0] == '"' {
|
||||
// TODO: Preserve ValueFlags in Token?
|
||||
isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf)
|
||||
return "", jsonwire.UnquoteMayCopy(buf, isVerbatim)
|
||||
}
|
||||
// Handle tokens that are not JSON strings for fmt.Stringer.
|
||||
return "", buf
|
||||
}
|
||||
if len(t.str) != 0 && t.num == 0 {
|
||||
return t.str, nil
|
||||
}
|
||||
// Handle tokens that are not JSON strings for fmt.Stringer.
|
||||
if t.num > 0 {
|
||||
switch t.str[0] {
|
||||
case 'f':
|
||||
return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.num), 64)), nil
|
||||
case 'i':
|
||||
return strconv.FormatInt(int64(t.num), 10), nil
|
||||
case 'u':
|
||||
return strconv.FormatUint(uint64(t.num), 10), nil
|
||||
}
|
||||
}
|
||||
return "<invalid jsontext.Token>", nil
|
||||
}
|
||||
|
||||
// appendNumber appends a JSON number to dst and returns it.
|
||||
// It panics if t is not a JSON number.
|
||||
func (t Token) appendNumber(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
|
||||
if raw := t.raw; raw != nil {
|
||||
// Handle raw number value.
|
||||
buf := raw.previousBuffer()
|
||||
if Kind(buf[0]).normalize() == '0' {
|
||||
dst, _, err := jsonwire.ReformatNumber(dst, buf, flags)
|
||||
return dst, err
|
||||
}
|
||||
} else if t.num != 0 {
|
||||
// Handle exact number value.
|
||||
switch t.str[0] {
|
||||
case 'f':
|
||||
return jsonwire.AppendFloat(dst, math.Float64frombits(t.num), 64), nil
|
||||
case 'i':
|
||||
return strconv.AppendInt(dst, int64(t.num), 10), nil
|
||||
case 'u':
|
||||
return strconv.AppendUint(dst, uint64(t.num), 10), nil
|
||||
}
|
||||
}
|
||||
|
||||
panic("invalid JSON token kind: " + t.Kind().String())
|
||||
}
|
||||
|
||||
// Float returns the floating-point value for a JSON number.
|
||||
// It returns a NaN, +Inf, or -Inf value for any JSON string
|
||||
// with the values "NaN", "Infinity", or "-Infinity".
|
||||
// It panics for all other cases.
|
||||
func (t Token) Float() float64 {
|
||||
if raw := t.raw; raw != nil {
|
||||
// Handle raw number value.
|
||||
if uint64(raw.previousOffsetStart()) != t.num {
|
||||
panic(invalidTokenPanic)
|
||||
}
|
||||
buf := raw.previousBuffer()
|
||||
if Kind(buf[0]).normalize() == '0' {
|
||||
fv, _ := jsonwire.ParseFloat(buf, 64)
|
||||
return fv
|
||||
}
|
||||
} else if t.num != 0 {
|
||||
// Handle exact number value.
|
||||
switch t.str[0] {
|
||||
case 'f':
|
||||
return math.Float64frombits(t.num)
|
||||
case 'i':
|
||||
return float64(int64(t.num))
|
||||
case 'u':
|
||||
return float64(uint64(t.num))
|
||||
}
|
||||
}
|
||||
|
||||
// Handle string values with "NaN", "Infinity", or "-Infinity".
|
||||
if t.Kind() == '"' {
|
||||
switch t.String() {
|
||||
case "NaN":
|
||||
return math.NaN()
|
||||
case "Infinity":
|
||||
return math.Inf(+1)
|
||||
case "-Infinity":
|
||||
return math.Inf(-1)
|
||||
}
|
||||
}
|
||||
|
||||
panic("invalid JSON token kind: " + t.Kind().String())
|
||||
}
|
||||
|
||||
// Int returns the signed integer value for a JSON number.
|
||||
// The fractional component of any number is ignored (truncation toward zero).
|
||||
// Any number beyond the representation of an int64 will be saturated
|
||||
// to the closest representable value.
|
||||
// It panics if the token kind is not a JSON number.
|
||||
func (t Token) Int() int64 {
|
||||
if raw := t.raw; raw != nil {
|
||||
// Handle raw integer value.
|
||||
if uint64(raw.previousOffsetStart()) != t.num {
|
||||
panic(invalidTokenPanic)
|
||||
}
|
||||
neg := false
|
||||
buf := raw.previousBuffer()
|
||||
if len(buf) > 0 && buf[0] == '-' {
|
||||
neg, buf = true, buf[1:]
|
||||
}
|
||||
if numAbs, ok := jsonwire.ParseUint(buf); ok {
|
||||
if neg {
|
||||
if numAbs > -minInt64 {
|
||||
return minInt64
|
||||
}
|
||||
return -1 * int64(numAbs)
|
||||
} else {
|
||||
if numAbs > +maxInt64 {
|
||||
return maxInt64
|
||||
}
|
||||
return +1 * int64(numAbs)
|
||||
}
|
||||
}
|
||||
} else if t.num != 0 {
|
||||
// Handle exact integer value.
|
||||
switch t.str[0] {
|
||||
case 'i':
|
||||
return int64(t.num)
|
||||
case 'u':
|
||||
if t.num > maxInt64 {
|
||||
return maxInt64
|
||||
}
|
||||
return int64(t.num)
|
||||
}
|
||||
}
|
||||
|
||||
// Handle JSON number that is a floating-point value.
|
||||
if t.Kind() == '0' {
|
||||
switch fv := t.Float(); {
|
||||
case fv >= maxInt64:
|
||||
return maxInt64
|
||||
case fv <= minInt64:
|
||||
return minInt64
|
||||
default:
|
||||
return int64(fv) // truncation toward zero
|
||||
}
|
||||
}
|
||||
|
||||
panic("invalid JSON token kind: " + t.Kind().String())
|
||||
}
|
||||
|
||||
// Uint returns the unsigned integer value for a JSON number.
|
||||
// The fractional component of any number is ignored (truncation toward zero).
|
||||
// Any number beyond the representation of an uint64 will be saturated
|
||||
// to the closest representable value.
|
||||
// It panics if the token kind is not a JSON number.
|
||||
func (t Token) Uint() uint64 {
|
||||
// NOTE: This accessor returns 0 for any negative JSON number,
|
||||
// which might be surprising, but is at least consistent with the behavior
|
||||
// of saturating out-of-bounds numbers to the closest representable number.
|
||||
|
||||
if raw := t.raw; raw != nil {
|
||||
// Handle raw integer value.
|
||||
if uint64(raw.previousOffsetStart()) != t.num {
|
||||
panic(invalidTokenPanic)
|
||||
}
|
||||
neg := false
|
||||
buf := raw.previousBuffer()
|
||||
if len(buf) > 0 && buf[0] == '-' {
|
||||
neg, buf = true, buf[1:]
|
||||
}
|
||||
if num, ok := jsonwire.ParseUint(buf); ok {
|
||||
if neg {
|
||||
return minUint64
|
||||
}
|
||||
return num
|
||||
}
|
||||
} else if t.num != 0 {
|
||||
// Handle exact integer value.
|
||||
switch t.str[0] {
|
||||
case 'u':
|
||||
return t.num
|
||||
case 'i':
|
||||
if int64(t.num) < minUint64 {
|
||||
return minUint64
|
||||
}
|
||||
return uint64(int64(t.num))
|
||||
}
|
||||
}
|
||||
|
||||
// Handle JSON number that is a floating-point value.
|
||||
if t.Kind() == '0' {
|
||||
switch fv := t.Float(); {
|
||||
case fv >= maxUint64:
|
||||
return maxUint64
|
||||
case fv <= minUint64:
|
||||
return minUint64
|
||||
default:
|
||||
return uint64(fv) // truncation toward zero
|
||||
}
|
||||
}
|
||||
|
||||
panic("invalid JSON token kind: " + t.Kind().String())
|
||||
}
|
||||
|
||||
// Kind returns the token kind.
|
||||
func (t Token) Kind() Kind {
|
||||
switch {
|
||||
case t.raw != nil:
|
||||
raw := t.raw
|
||||
if uint64(raw.previousOffsetStart()) != t.num {
|
||||
panic(invalidTokenPanic)
|
||||
}
|
||||
return Kind(t.raw.buf[raw.prevStart]).normalize()
|
||||
case t.num != 0:
|
||||
return '0'
|
||||
case len(t.str) != 0:
|
||||
return '"'
|
||||
default:
|
||||
return invalidKind
|
||||
}
|
||||
}
|
||||
|
||||
// Kind represents each possible JSON token kind with a single byte,
|
||||
// which is conveniently the first byte of that kind's grammar
|
||||
// with the restriction that numbers always be represented with '0':
|
||||
//
|
||||
// - 'n': null
|
||||
// - 'f': false
|
||||
// - 't': true
|
||||
// - '"': string
|
||||
// - '0': number
|
||||
// - '{': object begin
|
||||
// - '}': object end
|
||||
// - '[': array begin
|
||||
// - ']': array end
|
||||
//
|
||||
// An invalid kind is usually represented using 0,
|
||||
// but may be non-zero due to invalid JSON data.
|
||||
type Kind byte
|
||||
|
||||
const invalidKind Kind = 0
|
||||
|
||||
// String prints the kind in a humanly readable fashion.
|
||||
func (k Kind) String() string {
|
||||
switch k {
|
||||
case 'n':
|
||||
return "null"
|
||||
case 'f':
|
||||
return "false"
|
||||
case 't':
|
||||
return "true"
|
||||
case '"':
|
||||
return "string"
|
||||
case '0':
|
||||
return "number"
|
||||
case '{':
|
||||
return "{"
|
||||
case '}':
|
||||
return "}"
|
||||
case '[':
|
||||
return "["
|
||||
case ']':
|
||||
return "]"
|
||||
default:
|
||||
return "<invalid jsontext.Kind: " + jsonwire.QuoteRune(string(k)) + ">"
|
||||
}
|
||||
}
|
||||
|
||||
// normalize coalesces all possible starting characters of a number as just '0'.
|
||||
func (k Kind) normalize() Kind {
|
||||
if k == '-' || ('0' <= k && k <= '9') {
|
||||
return '0'
|
||||
}
|
||||
return k
|
||||
}
|
||||
@@ -1,395 +0,0 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package jsontext
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"io"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
"next.orly.dev/pkg/json/internal/jsonflags"
|
||||
"next.orly.dev/pkg/json/internal/jsonwire"
|
||||
)
|
||||
|
||||
// NOTE: Value is analogous to v1 json.RawMessage.
|
||||
|
||||
// AppendFormat formats the JSON value in src and appends it to dst
|
||||
// according to the specified options.
|
||||
// See [Value.Format] for more details about the formatting behavior.
|
||||
//
|
||||
// The dst and src may overlap.
|
||||
// If an error is reported, then the entirety of src is appended to dst.
|
||||
func AppendFormat(dst, src []byte, opts ...Options) ([]byte, error) {
|
||||
e := getBufferedEncoder(opts...)
|
||||
defer putBufferedEncoder(e)
|
||||
e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
|
||||
if err := e.s.WriteValue(src); err != nil {
|
||||
return append(dst, src...), err
|
||||
}
|
||||
return append(dst, e.s.Buf...), nil
|
||||
}
|
||||
|
||||
// Value represents a single raw JSON value, which may be one of the following:
|
||||
// - a JSON literal (i.e., null, true, or false)
|
||||
// - a JSON string (e.g., "hello, world!")
|
||||
// - a JSON number (e.g., 123.456)
|
||||
// - an entire JSON object (e.g., {"fizz":"buzz"} )
|
||||
// - an entire JSON array (e.g., [1,2,3] )
|
||||
//
|
||||
// Value can represent entire array or object values, while [Token] cannot.
|
||||
// Value may contain leading and/or trailing whitespace.
|
||||
type Value []byte
|
||||
|
||||
// Clone returns a copy of v.
|
||||
func (v Value) Clone() Value {
|
||||
return bytes.Clone(v)
|
||||
}
|
||||
|
||||
// String returns the string formatting of v.
|
||||
func (v Value) String() string {
|
||||
if v == nil {
|
||||
return "null"
|
||||
}
|
||||
return string(v)
|
||||
}
|
||||
|
||||
// IsValid reports whether the raw JSON value is syntactically valid
|
||||
// according to the specified options.
|
||||
//
|
||||
// By default (if no options are specified), it validates according to RFC 7493.
|
||||
// It verifies whether the input is properly encoded as UTF-8,
|
||||
// that escape sequences within strings decode to valid Unicode codepoints, and
|
||||
// that all names in each object are unique.
|
||||
// It does not verify whether numbers are representable within the limits
|
||||
// of any common numeric type (e.g., float64, int64, or uint64).
|
||||
//
|
||||
// Relevant options include:
|
||||
// - [AllowDuplicateNames]
|
||||
// - [AllowInvalidUTF8]
|
||||
//
|
||||
// All other options are ignored.
|
||||
func (v Value) IsValid(opts ...Options) bool {
|
||||
// TODO: Document support for [WithByteLimit] and [WithDepthLimit].
|
||||
d := getBufferedDecoder(v, opts...)
|
||||
defer putBufferedDecoder(d)
|
||||
_, errVal := d.ReadValue()
|
||||
_, errEOF := d.ReadToken()
|
||||
return errVal == nil && errEOF == io.EOF
|
||||
}
|
||||
|
||||
// Format formats the raw JSON value in place.
|
||||
//
|
||||
// By default (if no options are specified), it validates according to RFC 7493
|
||||
// and produces the minimal JSON representation, where
|
||||
// all whitespace is elided and JSON strings use the shortest encoding.
|
||||
//
|
||||
// Relevant options include:
|
||||
// - [AllowDuplicateNames]
|
||||
// - [AllowInvalidUTF8]
|
||||
// - [EscapeForHTML]
|
||||
// - [EscapeForJS]
|
||||
// - [PreserveRawStrings]
|
||||
// - [CanonicalizeRawInts]
|
||||
// - [CanonicalizeRawFloats]
|
||||
// - [ReorderRawObjects]
|
||||
// - [SpaceAfterColon]
|
||||
// - [SpaceAfterComma]
|
||||
// - [Multiline]
|
||||
// - [WithIndent]
|
||||
// - [WithIndentPrefix]
|
||||
//
|
||||
// All other options are ignored.
|
||||
//
|
||||
// It is guaranteed to succeed if the value is valid according to the same options.
|
||||
// If the value is already formatted, then the buffer is not mutated.
|
||||
func (v *Value) Format(opts ...Options) error {
|
||||
// TODO: Document support for [WithByteLimit] and [WithDepthLimit].
|
||||
return v.format(opts, nil)
|
||||
}
|
||||
|
||||
// format accepts two []Options to avoid the allocation appending them together.
|
||||
// It is equivalent to v.Format(append(opts1, opts2...)...).
|
||||
func (v *Value) format(opts1, opts2 []Options) error {
|
||||
e := getBufferedEncoder(opts1...)
|
||||
defer putBufferedEncoder(e)
|
||||
e.s.Join(opts2...)
|
||||
e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
|
||||
if err := e.s.WriteValue(*v); err != nil {
|
||||
return err
|
||||
}
|
||||
if !bytes.Equal(*v, e.s.Buf) {
|
||||
*v = append((*v)[:0], e.s.Buf...)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Compact removes all whitespace from the raw JSON value.
|
||||
//
|
||||
// It does not reformat JSON strings or numbers to use any other representation.
|
||||
// To maximize the set of JSON values that can be formatted,
|
||||
// this permits values with duplicate names and invalid UTF-8.
|
||||
//
|
||||
// Compact is equivalent to calling [Value.Format] with the following options:
|
||||
// - [AllowDuplicateNames](true)
|
||||
// - [AllowInvalidUTF8](true)
|
||||
// - [PreserveRawStrings](true)
|
||||
//
|
||||
// Any options specified by the caller are applied after the initial set
|
||||
// and may deliberately override prior options.
|
||||
func (v *Value) Compact(opts ...Options) error {
|
||||
return v.format([]Options{
|
||||
AllowDuplicateNames(true),
|
||||
AllowInvalidUTF8(true),
|
||||
PreserveRawStrings(true),
|
||||
}, opts)
|
||||
}
|
||||
|
||||
// Indent reformats the whitespace in the raw JSON value so that each element
|
||||
// in a JSON object or array begins on a indented line according to the nesting.
|
||||
//
|
||||
// It does not reformat JSON strings or numbers to use any other representation.
|
||||
// To maximize the set of JSON values that can be formatted,
|
||||
// this permits values with duplicate names and invalid UTF-8.
|
||||
//
|
||||
// Indent is equivalent to calling [Value.Format] with the following options:
|
||||
// - [AllowDuplicateNames](true)
|
||||
// - [AllowInvalidUTF8](true)
|
||||
// - [PreserveRawStrings](true)
|
||||
// - [Multiline](true)
|
||||
//
|
||||
// Any options specified by the caller are applied after the initial set
|
||||
// and may deliberately override prior options.
|
||||
func (v *Value) Indent(opts ...Options) error {
|
||||
return v.format([]Options{
|
||||
AllowDuplicateNames(true),
|
||||
AllowInvalidUTF8(true),
|
||||
PreserveRawStrings(true),
|
||||
Multiline(true),
|
||||
}, opts)
|
||||
}
|
||||
|
||||
// Canonicalize canonicalizes the raw JSON value according to the
|
||||
// JSON Canonicalization Scheme (JCS) as defined by RFC 8785
|
||||
// where it produces a stable representation of a JSON value.
|
||||
//
|
||||
// JSON strings are formatted to use their minimal representation,
|
||||
// JSON numbers are formatted as double precision numbers according
|
||||
// to some stable serialization algorithm.
|
||||
// JSON object members are sorted in ascending order by name.
|
||||
// All whitespace is removed.
|
||||
//
|
||||
// The output stability is dependent on the stability of the application data
|
||||
// (see RFC 8785, Appendix E). It cannot produce stable output from
|
||||
// fundamentally unstable input. For example, if the JSON value
|
||||
// contains ephemeral data (e.g., a frequently changing timestamp),
|
||||
// then the value is still unstable regardless of whether this is called.
|
||||
//
|
||||
// Canonicalize is equivalent to calling [Value.Format] with the following options:
|
||||
// - [CanonicalizeRawInts](true)
|
||||
// - [CanonicalizeRawFloats](true)
|
||||
// - [ReorderRawObjects](true)
|
||||
//
|
||||
// Any options specified by the caller are applied after the initial set
|
||||
// and may deliberately override prior options.
|
||||
//
|
||||
// Note that JCS treats all JSON numbers as IEEE 754 double precision numbers.
|
||||
// Any numbers with precision beyond what is representable by that form
|
||||
// will lose their precision when canonicalized. For example, integer values
|
||||
// beyond ±2⁵³ will lose their precision. To preserve the original representation
|
||||
// of JSON integers, additionally set [CanonicalizeRawInts] to false:
|
||||
//
|
||||
// v.Canonicalize(jsontext.CanonicalizeRawInts(false))
|
||||
func (v *Value) Canonicalize(opts ...Options) error {
|
||||
return v.format([]Options{
|
||||
CanonicalizeRawInts(true),
|
||||
CanonicalizeRawFloats(true),
|
||||
ReorderRawObjects(true),
|
||||
}, opts)
|
||||
}
|
||||
|
||||
// MarshalJSON returns v as the JSON encoding of v.
|
||||
// It returns the stored value as the raw JSON output without any validation.
|
||||
// If v is nil, then this returns a JSON null.
|
||||
func (v Value) MarshalJSON() ([]byte, error) {
|
||||
// NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON.
|
||||
if v == nil {
|
||||
return []byte("null"), nil
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// UnmarshalJSON sets v as the JSON encoding of b.
|
||||
// It stores a copy of the provided raw JSON input without any validation.
|
||||
func (v *Value) UnmarshalJSON(b []byte) error {
|
||||
// NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON.
|
||||
if v == nil {
|
||||
return errors.New("jsontext.Value: UnmarshalJSON on nil pointer")
|
||||
}
|
||||
*v = append((*v)[:0], b...)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Kind returns the starting token kind.
|
||||
// For a valid value, this will never include '}' or ']'.
|
||||
func (v Value) Kind() Kind {
|
||||
if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 {
|
||||
return Kind(v[0]).normalize()
|
||||
}
|
||||
return invalidKind
|
||||
}
|
||||
|
||||
const commaAndWhitespace = ", \n\r\t"
|
||||
|
||||
type objectMember struct {
|
||||
// name is the unquoted name.
|
||||
name []byte // e.g., "name"
|
||||
// buffer is the entirety of the raw JSON object member
|
||||
// starting from right after the previous member (or opening '{')
|
||||
// until right after the member value.
|
||||
buffer []byte // e.g., `, \n\r\t"name": "value"`
|
||||
}
|
||||
|
||||
func (x objectMember) Compare(y objectMember) int {
|
||||
if c := jsonwire.CompareUTF16(x.name, y.name); c != 0 {
|
||||
return c
|
||||
}
|
||||
// With [AllowDuplicateNames] or [AllowInvalidUTF8],
|
||||
// names could be identical, so also sort using the member value.
|
||||
return jsonwire.CompareUTF16(
|
||||
bytes.TrimLeft(x.buffer, commaAndWhitespace),
|
||||
bytes.TrimLeft(y.buffer, commaAndWhitespace))
|
||||
}
|
||||
|
||||
var objectMemberPool = sync.Pool{New: func() any { return new([]objectMember) }}
|
||||
|
||||
func getObjectMembers() *[]objectMember {
|
||||
ns := objectMemberPool.Get().(*[]objectMember)
|
||||
*ns = (*ns)[:0]
|
||||
return ns
|
||||
}
|
||||
func putObjectMembers(ns *[]objectMember) {
|
||||
if cap(*ns) < 1<<10 {
|
||||
clear(*ns) // avoid pinning name and buffer
|
||||
objectMemberPool.Put(ns)
|
||||
}
|
||||
}
|
||||
|
||||
// mustReorderObjects reorders in-place all object members in a JSON value,
|
||||
// which must be valid otherwise it panics.
|
||||
func mustReorderObjects(b []byte) {
|
||||
// Obtain a buffered encoder just to use its internal buffer as
|
||||
// a scratch buffer for reordering object members.
|
||||
e2 := getBufferedEncoder()
|
||||
defer putBufferedEncoder(e2)
|
||||
|
||||
// Disable unnecessary checks to syntactically parse the JSON value.
|
||||
d := getBufferedDecoder(b)
|
||||
defer putBufferedDecoder(d)
|
||||
d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1)
|
||||
mustReorderObjectsFromDecoder(d, &e2.s.Buf) // per RFC 8785, section 3.2.3
|
||||
}
|
||||
|
||||
// mustReorderObjectsFromDecoder recursively reorders all object members in place
|
||||
// according to the ordering specified in RFC 8785, section 3.2.3.
|
||||
//
|
||||
// Pre-conditions:
|
||||
// - The value is valid (i.e., no decoder errors should ever occur).
|
||||
// - Initial call is provided a Decoder reading from the start of v.
|
||||
//
|
||||
// Post-conditions:
|
||||
// - Exactly one JSON value is read from the Decoder.
|
||||
// - All fully-parsed JSON objects are reordered by directly moving
|
||||
// the members in the value buffer.
|
||||
//
|
||||
// The runtime is approximately O(n·log(n)) + O(m·log(m)),
|
||||
// where n is len(v) and m is the total number of object members.
|
||||
func mustReorderObjectsFromDecoder(d *Decoder, scratch *[]byte) {
|
||||
switch tok, err := d.ReadToken(); tok.Kind() {
|
||||
case '{':
|
||||
// Iterate and collect the name and offsets for every object member.
|
||||
members := getObjectMembers()
|
||||
defer putObjectMembers(members)
|
||||
var prevMember objectMember
|
||||
isSorted := true
|
||||
|
||||
beforeBody := d.InputOffset() // offset after '{'
|
||||
for d.PeekKind() != '}' {
|
||||
beforeName := d.InputOffset()
|
||||
var flags jsonwire.ValueFlags
|
||||
name, _ := d.s.ReadValue(&flags)
|
||||
name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim())
|
||||
mustReorderObjectsFromDecoder(d, scratch)
|
||||
afterValue := d.InputOffset()
|
||||
|
||||
currMember := objectMember{name, d.s.buf[beforeName:afterValue]}
|
||||
if isSorted && len(*members) > 0 {
|
||||
isSorted = objectMember.Compare(prevMember, currMember) < 0
|
||||
}
|
||||
*members = append(*members, currMember)
|
||||
prevMember = currMember
|
||||
}
|
||||
afterBody := d.InputOffset() // offset before '}'
|
||||
d.ReadToken()
|
||||
|
||||
// Sort the members; return early if it's already sorted.
|
||||
if isSorted {
|
||||
return
|
||||
}
|
||||
firstBufferBeforeSorting := (*members)[0].buffer
|
||||
slices.SortFunc(*members, objectMember.Compare)
|
||||
firstBufferAfterSorting := (*members)[0].buffer
|
||||
|
||||
// Append the reordered members to a new buffer,
|
||||
// then copy the reordered members back over the original members.
|
||||
// Avoid swapping in place since each member may be a different size
|
||||
// where moving a member over a smaller member may corrupt the data
|
||||
// for subsequent members before they have been moved.
|
||||
//
|
||||
// The following invariant must hold:
|
||||
// sum([m.after-m.before for m in members]) == afterBody-beforeBody
|
||||
commaAndWhitespacePrefix := func(b []byte) []byte {
|
||||
return b[:len(b)-len(bytes.TrimLeft(b, commaAndWhitespace))]
|
||||
}
|
||||
sorted := (*scratch)[:0]
|
||||
for i, member := range *members {
|
||||
switch {
|
||||
case i == 0 && &member.buffer[0] != &firstBufferBeforeSorting[0]:
|
||||
// First member after sorting is not the first member before sorting,
|
||||
// so use the prefix of the first member before sorting.
|
||||
sorted = append(sorted, commaAndWhitespacePrefix(firstBufferBeforeSorting)...)
|
||||
sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
|
||||
case i != 0 && &member.buffer[0] == &firstBufferBeforeSorting[0]:
|
||||
// Later member after sorting is the first member before sorting,
|
||||
// so use the prefix of the first member after sorting.
|
||||
sorted = append(sorted, commaAndWhitespacePrefix(firstBufferAfterSorting)...)
|
||||
sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
|
||||
default:
|
||||
sorted = append(sorted, member.buffer...)
|
||||
}
|
||||
}
|
||||
if int(afterBody-beforeBody) != len(sorted) {
|
||||
panic("BUG: length invariant violated")
|
||||
}
|
||||
copy(d.s.buf[beforeBody:afterBody], sorted)
|
||||
|
||||
// Update scratch buffer to the largest amount ever used.
|
||||
if len(sorted) > len(*scratch) {
|
||||
*scratch = sorted
|
||||
}
|
||||
case '[':
|
||||
for d.PeekKind() != ']' {
|
||||
mustReorderObjectsFromDecoder(d, scratch)
|
||||
}
|
||||
d.ReadToken()
|
||||
default:
|
||||
if err != nil {
|
||||
panic("BUG: " + err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,612 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !goexperiment.jsonv2
|
||||
|
||||
package json
|
||||
|
||||
// JSON value parser state machine.
|
||||
// Just about at the limit of what is reasonable to write by hand.
|
||||
// Some parts are a bit tedious, but overall it nicely factors out the
|
||||
// otherwise common code from the multiple scanning functions
|
||||
// in this package (Compact, Indent, checkValid, etc).
|
||||
//
|
||||
// This file starts with two simple examples using the scanner
|
||||
// before diving into the scanner itself.
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Valid reports whether data is a valid JSON encoding.
|
||||
func Valid(data []byte) bool {
|
||||
scan := newScanner()
|
||||
defer freeScanner(scan)
|
||||
return checkValid(data, scan) == nil
|
||||
}
|
||||
|
||||
// checkValid verifies that data is valid JSON-encoded data.
|
||||
// scan is passed in for use by checkValid to avoid an allocation.
|
||||
// checkValid returns nil or a SyntaxError.
|
||||
func checkValid(data []byte, scan *scanner) error {
|
||||
scan.reset()
|
||||
for _, c := range data {
|
||||
scan.bytes++
|
||||
if scan.step(scan, c) == scanError {
|
||||
return scan.err
|
||||
}
|
||||
}
|
||||
if scan.eof() == scanError {
|
||||
return scan.err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// A SyntaxError is a description of a JSON syntax error.
|
||||
// [Unmarshal] will return a SyntaxError if the JSON can't be parsed.
|
||||
type SyntaxError struct {
|
||||
msg string // description of error
|
||||
Offset int64 // error occurred after reading Offset bytes
|
||||
}
|
||||
|
||||
func (e *SyntaxError) Error() string { return e.msg }
|
||||
|
||||
// A scanner is a JSON scanning state machine.
|
||||
// Callers call scan.reset and then pass bytes in one at a time
|
||||
// by calling scan.step(&scan, c) for each byte.
|
||||
// The return value, referred to as an opcode, tells the
|
||||
// caller about significant parsing events like beginning
|
||||
// and ending literals, objects, and arrays, so that the
|
||||
// caller can follow along if it wishes.
|
||||
// The return value scanEnd indicates that a single top-level
|
||||
// JSON value has been completed, *before* the byte that
|
||||
// just got passed in. (The indication must be delayed in order
|
||||
// to recognize the end of numbers: is 123 a whole value or
|
||||
// the beginning of 12345e+6?).
|
||||
type scanner struct {
|
||||
// The step is a func to be called to execute the next transition.
|
||||
// Also tried using an integer constant and a single func
|
||||
// with a switch, but using the func directly was 10% faster
|
||||
// on a 64-bit Mac Mini, and it's nicer to read.
|
||||
step func(*scanner, byte) int
|
||||
|
||||
// Reached end of top-level value.
|
||||
endTop bool
|
||||
|
||||
// Stack of what we're in the middle of - array values, object keys, object values.
|
||||
parseState []int
|
||||
|
||||
// Error that happened, if any.
|
||||
err error
|
||||
|
||||
// total bytes consumed, updated by decoder.Decode (and deliberately
|
||||
// not set to zero by scan.reset)
|
||||
bytes int64
|
||||
}
|
||||
|
||||
var scannerPool = sync.Pool{
|
||||
New: func() any {
|
||||
return &scanner{}
|
||||
},
|
||||
}
|
||||
|
||||
func newScanner() *scanner {
|
||||
scan := scannerPool.Get().(*scanner)
|
||||
// scan.reset by design doesn't set bytes to zero
|
||||
scan.bytes = 0
|
||||
scan.reset()
|
||||
return scan
|
||||
}
|
||||
|
||||
func freeScanner(scan *scanner) {
|
||||
// Avoid hanging on to too much memory in extreme cases.
|
||||
if len(scan.parseState) > 1024 {
|
||||
scan.parseState = nil
|
||||
}
|
||||
scannerPool.Put(scan)
|
||||
}
|
||||
|
||||
// These values are returned by the state transition functions
|
||||
// assigned to scanner.state and the method scanner.eof.
|
||||
// They give details about the current state of the scan that
|
||||
// callers might be interested to know about.
|
||||
// It is okay to ignore the return value of any particular
|
||||
// call to scanner.state: if one call returns scanError,
|
||||
// every subsequent call will return scanError too.
|
||||
const (
|
||||
// Continue.
|
||||
scanContinue = iota // uninteresting byte
|
||||
scanBeginLiteral // end implied by next result != scanContinue
|
||||
scanBeginObject // begin object
|
||||
scanObjectKey // just finished object key (string)
|
||||
scanObjectValue // just finished non-last object value
|
||||
scanEndObject // end object (implies scanObjectValue if possible)
|
||||
scanBeginArray // begin array
|
||||
scanArrayValue // just finished array value
|
||||
scanEndArray // end array (implies scanArrayValue if possible)
|
||||
scanSkipSpace // space byte; can skip; known to be last "continue" result
|
||||
|
||||
// Stop.
|
||||
scanEnd // top-level value ended *before* this byte; known to be first "stop" result
|
||||
scanError // hit an error, scanner.err.
|
||||
)
|
||||
|
||||
// These values are stored in the parseState stack.
|
||||
// They give the current state of a composite value
|
||||
// being scanned. If the parser is inside a nested value
|
||||
// the parseState describes the nested state, outermost at entry 0.
|
||||
const (
|
||||
parseObjectKey = iota // parsing object key (before colon)
|
||||
parseObjectValue // parsing object value (after colon)
|
||||
parseArrayValue // parsing array value
|
||||
)
|
||||
|
||||
// This limits the max nesting depth to prevent stack overflow.
|
||||
// This is permitted by https://tools.ietf.org/html/rfc7159#section-9
|
||||
const maxNestingDepth = 10000
|
||||
|
||||
// reset prepares the scanner for use.
|
||||
// It must be called before calling s.step.
|
||||
func (s *scanner) reset() {
|
||||
s.step = stateBeginValue
|
||||
s.parseState = s.parseState[0:0]
|
||||
s.err = nil
|
||||
s.endTop = false
|
||||
}
|
||||
|
||||
// eof tells the scanner that the end of input has been reached.
|
||||
// It returns a scan status just as s.step does.
|
||||
func (s *scanner) eof() int {
|
||||
if s.err != nil {
|
||||
return scanError
|
||||
}
|
||||
if s.endTop {
|
||||
return scanEnd
|
||||
}
|
||||
s.step(s, ' ')
|
||||
if s.endTop {
|
||||
return scanEnd
|
||||
}
|
||||
if s.err == nil {
|
||||
s.err = &SyntaxError{"unexpected end of JSON input", s.bytes}
|
||||
}
|
||||
return scanError
|
||||
}
|
||||
|
||||
// pushParseState pushes a new parse state newParseState onto the parse stack.
|
||||
// an error state is returned if maxNestingDepth was exceeded, otherwise successState is returned.
|
||||
func (s *scanner) pushParseState(c byte, newParseState int, successState int) int {
|
||||
s.parseState = append(s.parseState, newParseState)
|
||||
if len(s.parseState) <= maxNestingDepth {
|
||||
return successState
|
||||
}
|
||||
return s.error(c, "exceeded max depth")
|
||||
}
|
||||
|
||||
// popParseState pops a parse state (already obtained) off the stack
|
||||
// and updates s.step accordingly.
|
||||
func (s *scanner) popParseState() {
|
||||
n := len(s.parseState) - 1
|
||||
s.parseState = s.parseState[0:n]
|
||||
if n == 0 {
|
||||
s.step = stateEndTop
|
||||
s.endTop = true
|
||||
} else {
|
||||
s.step = stateEndValue
|
||||
}
|
||||
}
|
||||
|
||||
func isSpace(c byte) bool {
|
||||
return c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n')
|
||||
}
|
||||
|
||||
// stateBeginValueOrEmpty is the state after reading `[`.
|
||||
func stateBeginValueOrEmpty(s *scanner, c byte) int {
|
||||
if isSpace(c) {
|
||||
return scanSkipSpace
|
||||
}
|
||||
if c == ']' {
|
||||
return stateEndValue(s, c)
|
||||
}
|
||||
return stateBeginValue(s, c)
|
||||
}
|
||||
|
||||
// stateBeginValue is the state at the beginning of the input.
|
||||
func stateBeginValue(s *scanner, c byte) int {
|
||||
if isSpace(c) {
|
||||
return scanSkipSpace
|
||||
}
|
||||
switch c {
|
||||
case '{':
|
||||
s.step = stateBeginStringOrEmpty
|
||||
return s.pushParseState(c, parseObjectKey, scanBeginObject)
|
||||
case '[':
|
||||
s.step = stateBeginValueOrEmpty
|
||||
return s.pushParseState(c, parseArrayValue, scanBeginArray)
|
||||
case '"':
|
||||
s.step = stateInString
|
||||
return scanBeginLiteral
|
||||
case '-':
|
||||
s.step = stateNeg
|
||||
return scanBeginLiteral
|
||||
case '0': // beginning of 0.123
|
||||
s.step = state0
|
||||
return scanBeginLiteral
|
||||
case 't': // beginning of true
|
||||
s.step = stateT
|
||||
return scanBeginLiteral
|
||||
case 'f': // beginning of false
|
||||
s.step = stateF
|
||||
return scanBeginLiteral
|
||||
case 'n': // beginning of null
|
||||
s.step = stateN
|
||||
return scanBeginLiteral
|
||||
}
|
||||
if '1' <= c && c <= '9' { // beginning of 1234.5
|
||||
s.step = state1
|
||||
return scanBeginLiteral
|
||||
}
|
||||
return s.error(c, "looking for beginning of value")
|
||||
}
|
||||
|
||||
// stateBeginStringOrEmpty is the state after reading `{`.
|
||||
func stateBeginStringOrEmpty(s *scanner, c byte) int {
|
||||
if isSpace(c) {
|
||||
return scanSkipSpace
|
||||
}
|
||||
if c == '}' {
|
||||
n := len(s.parseState)
|
||||
s.parseState[n-1] = parseObjectValue
|
||||
return stateEndValue(s, c)
|
||||
}
|
||||
return stateBeginString(s, c)
|
||||
}
|
||||
|
||||
// stateBeginString is the state after reading `{"key": value,`.
|
||||
func stateBeginString(s *scanner, c byte) int {
|
||||
if isSpace(c) {
|
||||
return scanSkipSpace
|
||||
}
|
||||
if c == '"' {
|
||||
s.step = stateInString
|
||||
return scanBeginLiteral
|
||||
}
|
||||
return s.error(c, "looking for beginning of object key string")
|
||||
}
|
||||
|
||||
// stateEndValue is the state after completing a value,
|
||||
// such as after reading `{}` or `true` or `["x"`.
|
||||
func stateEndValue(s *scanner, c byte) int {
|
||||
n := len(s.parseState)
|
||||
if n == 0 {
|
||||
// Completed top-level before the current byte.
|
||||
s.step = stateEndTop
|
||||
s.endTop = true
|
||||
return stateEndTop(s, c)
|
||||
}
|
||||
if isSpace(c) {
|
||||
s.step = stateEndValue
|
||||
return scanSkipSpace
|
||||
}
|
||||
ps := s.parseState[n-1]
|
||||
switch ps {
|
||||
case parseObjectKey:
|
||||
if c == ':' {
|
||||
s.parseState[n-1] = parseObjectValue
|
||||
s.step = stateBeginValue
|
||||
return scanObjectKey
|
||||
}
|
||||
return s.error(c, "after object key")
|
||||
case parseObjectValue:
|
||||
if c == ',' {
|
||||
s.parseState[n-1] = parseObjectKey
|
||||
s.step = stateBeginString
|
||||
return scanObjectValue
|
||||
}
|
||||
if c == '}' {
|
||||
s.popParseState()
|
||||
return scanEndObject
|
||||
}
|
||||
return s.error(c, "after object key:value pair")
|
||||
case parseArrayValue:
|
||||
if c == ',' {
|
||||
s.step = stateBeginValue
|
||||
return scanArrayValue
|
||||
}
|
||||
if c == ']' {
|
||||
s.popParseState()
|
||||
return scanEndArray
|
||||
}
|
||||
return s.error(c, "after array element")
|
||||
}
|
||||
return s.error(c, "")
|
||||
}
|
||||
|
||||
// stateEndTop is the state after finishing the top-level value,
|
||||
// such as after reading `{}` or `[1,2,3]`.
|
||||
// Only space characters should be seen now.
|
||||
func stateEndTop(s *scanner, c byte) int {
|
||||
if !isSpace(c) {
|
||||
// Complain about non-space byte on next call.
|
||||
s.error(c, "after top-level value")
|
||||
}
|
||||
return scanEnd
|
||||
}
|
||||
|
||||
// stateInString is the state after reading `"`.
|
||||
func stateInString(s *scanner, c byte) int {
|
||||
if c == '"' {
|
||||
s.step = stateEndValue
|
||||
return scanContinue
|
||||
}
|
||||
if c == '\\' {
|
||||
s.step = stateInStringEsc
|
||||
return scanContinue
|
||||
}
|
||||
if c < 0x20 {
|
||||
return s.error(c, "in string literal")
|
||||
}
|
||||
return scanContinue
|
||||
}
|
||||
|
||||
// stateInStringEsc is the state after reading `"\` during a quoted string.
|
||||
func stateInStringEsc(s *scanner, c byte) int {
|
||||
switch c {
|
||||
case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
|
||||
s.step = stateInString
|
||||
return scanContinue
|
||||
case 'u':
|
||||
s.step = stateInStringEscU
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in string escape code")
|
||||
}
|
||||
|
||||
// stateInStringEscU is the state after reading `"\u` during a quoted string.
|
||||
func stateInStringEscU(s *scanner, c byte) int {
|
||||
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
|
||||
s.step = stateInStringEscU1
|
||||
return scanContinue
|
||||
}
|
||||
// numbers
|
||||
return s.error(c, "in \\u hexadecimal character escape")
|
||||
}
|
||||
|
||||
// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
|
||||
func stateInStringEscU1(s *scanner, c byte) int {
|
||||
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
|
||||
s.step = stateInStringEscU12
|
||||
return scanContinue
|
||||
}
|
||||
// numbers
|
||||
return s.error(c, "in \\u hexadecimal character escape")
|
||||
}
|
||||
|
||||
// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
|
||||
func stateInStringEscU12(s *scanner, c byte) int {
|
||||
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
|
||||
s.step = stateInStringEscU123
|
||||
return scanContinue
|
||||
}
|
||||
// numbers
|
||||
return s.error(c, "in \\u hexadecimal character escape")
|
||||
}
|
||||
|
||||
// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
|
||||
func stateInStringEscU123(s *scanner, c byte) int {
|
||||
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
|
||||
s.step = stateInString
|
||||
return scanContinue
|
||||
}
|
||||
// numbers
|
||||
return s.error(c, "in \\u hexadecimal character escape")
|
||||
}
|
||||
|
||||
// stateNeg is the state after reading `-` during a number.
|
||||
func stateNeg(s *scanner, c byte) int {
|
||||
if c == '0' {
|
||||
s.step = state0
|
||||
return scanContinue
|
||||
}
|
||||
if '1' <= c && c <= '9' {
|
||||
s.step = state1
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in numeric literal")
|
||||
}
|
||||
|
||||
// state1 is the state after reading a non-zero integer during a number,
|
||||
// such as after reading `1` or `100` but not `0`.
|
||||
func state1(s *scanner, c byte) int {
|
||||
if '0' <= c && c <= '9' {
|
||||
s.step = state1
|
||||
return scanContinue
|
||||
}
|
||||
return state0(s, c)
|
||||
}
|
||||
|
||||
// state0 is the state after reading `0` during a number.
|
||||
func state0(s *scanner, c byte) int {
|
||||
if c == '.' {
|
||||
s.step = stateDot
|
||||
return scanContinue
|
||||
}
|
||||
if c == 'e' || c == 'E' {
|
||||
s.step = stateE
|
||||
return scanContinue
|
||||
}
|
||||
return stateEndValue(s, c)
|
||||
}
|
||||
|
||||
// stateDot is the state after reading the integer and decimal point in a number,
|
||||
// such as after reading `1.`.
|
||||
func stateDot(s *scanner, c byte) int {
|
||||
if '0' <= c && c <= '9' {
|
||||
s.step = stateDot0
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "after decimal point in numeric literal")
|
||||
}
|
||||
|
||||
// stateDot0 is the state after reading the integer, decimal point, and subsequent
|
||||
// digits of a number, such as after reading `3.14`.
|
||||
func stateDot0(s *scanner, c byte) int {
|
||||
if '0' <= c && c <= '9' {
|
||||
return scanContinue
|
||||
}
|
||||
if c == 'e' || c == 'E' {
|
||||
s.step = stateE
|
||||
return scanContinue
|
||||
}
|
||||
return stateEndValue(s, c)
|
||||
}
|
||||
|
||||
// stateE is the state after reading the mantissa and e in a number,
|
||||
// such as after reading `314e` or `0.314e`.
|
||||
func stateE(s *scanner, c byte) int {
|
||||
if c == '+' || c == '-' {
|
||||
s.step = stateESign
|
||||
return scanContinue
|
||||
}
|
||||
return stateESign(s, c)
|
||||
}
|
||||
|
||||
// stateESign is the state after reading the mantissa, e, and sign in a number,
|
||||
// such as after reading `314e-` or `0.314e+`.
|
||||
func stateESign(s *scanner, c byte) int {
|
||||
if '0' <= c && c <= '9' {
|
||||
s.step = stateE0
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in exponent of numeric literal")
|
||||
}
|
||||
|
||||
// stateE0 is the state after reading the mantissa, e, optional sign,
|
||||
// and at least one digit of the exponent in a number,
|
||||
// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
|
||||
func stateE0(s *scanner, c byte) int {
|
||||
if '0' <= c && c <= '9' {
|
||||
return scanContinue
|
||||
}
|
||||
return stateEndValue(s, c)
|
||||
}
|
||||
|
||||
// stateT is the state after reading `t`.
|
||||
func stateT(s *scanner, c byte) int {
|
||||
if c == 'r' {
|
||||
s.step = stateTr
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in literal true (expecting 'r')")
|
||||
}
|
||||
|
||||
// stateTr is the state after reading `tr`.
|
||||
func stateTr(s *scanner, c byte) int {
|
||||
if c == 'u' {
|
||||
s.step = stateTru
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in literal true (expecting 'u')")
|
||||
}
|
||||
|
||||
// stateTru is the state after reading `tru`.
|
||||
func stateTru(s *scanner, c byte) int {
|
||||
if c == 'e' {
|
||||
s.step = stateEndValue
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in literal true (expecting 'e')")
|
||||
}
|
||||
|
||||
// stateF is the state after reading `f`.
|
||||
func stateF(s *scanner, c byte) int {
|
||||
if c == 'a' {
|
||||
s.step = stateFa
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in literal false (expecting 'a')")
|
||||
}
|
||||
|
||||
// stateFa is the state after reading `fa`.
|
||||
func stateFa(s *scanner, c byte) int {
|
||||
if c == 'l' {
|
||||
s.step = stateFal
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in literal false (expecting 'l')")
|
||||
}
|
||||
|
||||
// stateFal is the state after reading `fal`.
|
||||
func stateFal(s *scanner, c byte) int {
|
||||
if c == 's' {
|
||||
s.step = stateFals
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in literal false (expecting 's')")
|
||||
}
|
||||
|
||||
// stateFals is the state after reading `fals`.
|
||||
func stateFals(s *scanner, c byte) int {
|
||||
if c == 'e' {
|
||||
s.step = stateEndValue
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in literal false (expecting 'e')")
|
||||
}
|
||||
|
||||
// stateN is the state after reading `n`.
|
||||
func stateN(s *scanner, c byte) int {
|
||||
if c == 'u' {
|
||||
s.step = stateNu
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in literal null (expecting 'u')")
|
||||
}
|
||||
|
||||
// stateNu is the state after reading `nu`.
|
||||
func stateNu(s *scanner, c byte) int {
|
||||
if c == 'l' {
|
||||
s.step = stateNul
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in literal null (expecting 'l')")
|
||||
}
|
||||
|
||||
// stateNul is the state after reading `nul`.
|
||||
func stateNul(s *scanner, c byte) int {
|
||||
if c == 'l' {
|
||||
s.step = stateEndValue
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in literal null (expecting 'l')")
|
||||
}
|
||||
|
||||
// stateError is the state after reaching a syntax error,
|
||||
// such as after reading `[1}` or `5.1.2`.
|
||||
func stateError(s *scanner, c byte) int {
|
||||
return scanError
|
||||
}
|
||||
|
||||
// error records an error and switches to the error state.
|
||||
func (s *scanner) error(c byte, context string) int {
|
||||
s.step = stateError
|
||||
s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes}
|
||||
return scanError
|
||||
}
|
||||
|
||||
// quoteChar formats c as a quoted character literal.
|
||||
func quoteChar(c byte) string {
|
||||
// special cases - different from quoted strings
|
||||
if c == '\'' {
|
||||
return `'\''`
|
||||
}
|
||||
if c == '"' {
|
||||
return `'"'`
|
||||
}
|
||||
|
||||
// use quoted string with different quotation marks
|
||||
s := strconv.Quote(string(c))
|
||||
return "'" + s[1:len(s)-1] + "'"
|
||||
}
|
||||
@@ -1,514 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !goexperiment.jsonv2
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
// A Decoder reads and decodes JSON values from an input stream.
|
||||
type Decoder struct {
|
||||
r io.Reader
|
||||
buf []byte
|
||||
d decodeState
|
||||
scanp int // start of unread data in buf
|
||||
scanned int64 // amount of data already scanned
|
||||
scan scanner
|
||||
err error
|
||||
|
||||
tokenState int
|
||||
tokenStack []int
|
||||
}
|
||||
|
||||
// NewDecoder returns a new decoder that reads from r.
|
||||
//
|
||||
// The decoder introduces its own buffering and may
|
||||
// read data from r beyond the JSON values requested.
|
||||
func NewDecoder(r io.Reader) *Decoder {
|
||||
return &Decoder{r: r}
|
||||
}
|
||||
|
||||
// UseNumber causes the Decoder to unmarshal a number into an
|
||||
// interface value as a [Number] instead of as a float64.
|
||||
func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
|
||||
|
||||
// DisallowUnknownFields causes the Decoder to return an error when the destination
|
||||
// is a struct and the input contains object keys which do not match any
|
||||
// non-ignored, exported fields in the destination.
|
||||
func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
|
||||
|
||||
// Decode reads the next JSON-encoded value from its
|
||||
// input and stores it in the value pointed to by v.
|
||||
//
|
||||
// See the documentation for [Unmarshal] for details about
|
||||
// the conversion of JSON into a Go value.
|
||||
func (dec *Decoder) Decode(v any) error {
|
||||
if dec.err != nil {
|
||||
return dec.err
|
||||
}
|
||||
|
||||
if err := dec.tokenPrepareForDecode(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !dec.tokenValueAllowed() {
|
||||
return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()}
|
||||
}
|
||||
|
||||
// Read whole value into buffer.
|
||||
n, err := dec.readValue()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
|
||||
dec.scanp += n
|
||||
|
||||
// Don't save err from unmarshal into dec.err:
|
||||
// the connection is still usable since we read a complete JSON
|
||||
// object from it before the error happened.
|
||||
err = dec.d.unmarshal(v)
|
||||
|
||||
// fixup token streaming state
|
||||
dec.tokenValueEnd()
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Buffered returns a reader of the data remaining in the Decoder's
|
||||
// buffer. The reader is valid until the next call to [Decoder.Decode].
|
||||
func (dec *Decoder) Buffered() io.Reader {
|
||||
return bytes.NewReader(dec.buf[dec.scanp:])
|
||||
}
|
||||
|
||||
// readValue reads a JSON value into dec.buf.
|
||||
// It returns the length of the encoding.
|
||||
func (dec *Decoder) readValue() (int, error) {
|
||||
dec.scan.reset()
|
||||
|
||||
scanp := dec.scanp
|
||||
var err error
|
||||
Input:
|
||||
// help the compiler see that scanp is never negative, so it can remove
|
||||
// some bounds checks below.
|
||||
for scanp >= 0 {
|
||||
|
||||
// Look in the buffer for a new value.
|
||||
for ; scanp < len(dec.buf); scanp++ {
|
||||
c := dec.buf[scanp]
|
||||
dec.scan.bytes++
|
||||
switch dec.scan.step(&dec.scan, c) {
|
||||
case scanEnd:
|
||||
// scanEnd is delayed one byte so we decrement
|
||||
// the scanner bytes count by 1 to ensure that
|
||||
// this value is correct in the next call of Decode.
|
||||
dec.scan.bytes--
|
||||
break Input
|
||||
case scanEndObject, scanEndArray:
|
||||
// scanEnd is delayed one byte.
|
||||
// We might block trying to get that byte from src,
|
||||
// so instead invent a space byte.
|
||||
if stateEndValue(&dec.scan, ' ') == scanEnd {
|
||||
scanp++
|
||||
break Input
|
||||
}
|
||||
case scanError:
|
||||
dec.err = dec.scan.err
|
||||
return 0, dec.scan.err
|
||||
}
|
||||
}
|
||||
|
||||
// Did the last read have an error?
|
||||
// Delayed until now to allow buffer scan.
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
if dec.scan.step(&dec.scan, ' ') == scanEnd {
|
||||
break Input
|
||||
}
|
||||
if nonSpace(dec.buf) {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
}
|
||||
dec.err = err
|
||||
return 0, err
|
||||
}
|
||||
|
||||
n := scanp - dec.scanp
|
||||
err = dec.refill()
|
||||
scanp = dec.scanp + n
|
||||
}
|
||||
return scanp - dec.scanp, nil
|
||||
}
|
||||
|
||||
func (dec *Decoder) refill() error {
|
||||
// Make room to read more into the buffer.
|
||||
// First slide down data already consumed.
|
||||
if dec.scanp > 0 {
|
||||
dec.scanned += int64(dec.scanp)
|
||||
n := copy(dec.buf, dec.buf[dec.scanp:])
|
||||
dec.buf = dec.buf[:n]
|
||||
dec.scanp = 0
|
||||
}
|
||||
|
||||
// Grow buffer if not large enough.
|
||||
const minRead = 512
|
||||
if cap(dec.buf)-len(dec.buf) < minRead {
|
||||
newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
|
||||
copy(newBuf, dec.buf)
|
||||
dec.buf = newBuf
|
||||
}
|
||||
|
||||
// Read. Delay error for next iteration (after scan).
|
||||
n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
|
||||
dec.buf = dec.buf[0 : len(dec.buf)+n]
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func nonSpace(b []byte) bool {
|
||||
for _, c := range b {
|
||||
if !isSpace(c) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// An Encoder writes JSON values to an output stream.
|
||||
type Encoder struct {
|
||||
w io.Writer
|
||||
err error
|
||||
escapeHTML bool
|
||||
|
||||
indentBuf []byte
|
||||
indentPrefix string
|
||||
indentValue string
|
||||
}
|
||||
|
||||
// NewEncoder returns a new encoder that writes to w.
|
||||
func NewEncoder(w io.Writer) *Encoder {
|
||||
return &Encoder{w: w, escapeHTML: true}
|
||||
}
|
||||
|
||||
// Encode writes the JSON encoding of v to the stream,
|
||||
// with insignificant space characters elided,
|
||||
// followed by a newline character.
|
||||
//
|
||||
// See the documentation for [Marshal] for details about the
|
||||
// conversion of Go values to JSON.
|
||||
func (enc *Encoder) Encode(v any) error {
|
||||
if enc.err != nil {
|
||||
return enc.err
|
||||
}
|
||||
|
||||
e := newEncodeState()
|
||||
defer encodeStatePool.Put(e)
|
||||
|
||||
err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Terminate each value with a newline.
|
||||
// This makes the output look a little nicer
|
||||
// when debugging, and some kind of space
|
||||
// is required if the encoded value was a number,
|
||||
// so that the reader knows there aren't more
|
||||
// digits coming.
|
||||
e.WriteByte('\n')
|
||||
|
||||
b := e.Bytes()
|
||||
if enc.indentPrefix != "" || enc.indentValue != "" {
|
||||
enc.indentBuf, err = appendIndent(enc.indentBuf[:0], b, enc.indentPrefix, enc.indentValue)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b = enc.indentBuf
|
||||
}
|
||||
if _, err = enc.w.Write(b); err != nil {
|
||||
enc.err = err
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// SetIndent instructs the encoder to format each subsequent encoded
|
||||
// value as if indented by the package-level function Indent(dst, src, prefix, indent).
|
||||
// Calling SetIndent("", "") disables indentation.
|
||||
func (enc *Encoder) SetIndent(prefix, indent string) {
|
||||
enc.indentPrefix = prefix
|
||||
enc.indentValue = indent
|
||||
}
|
||||
|
||||
// SetEscapeHTML specifies whether problematic HTML characters
|
||||
// should be escaped inside JSON quoted strings.
|
||||
// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
|
||||
// to avoid certain safety problems that can arise when embedding JSON in HTML.
|
||||
//
|
||||
// In non-HTML settings where the escaping interferes with the readability
|
||||
// of the output, SetEscapeHTML(false) disables this behavior.
|
||||
func (enc *Encoder) SetEscapeHTML(on bool) {
|
||||
enc.escapeHTML = on
|
||||
}
|
||||
|
||||
// RawMessage is a raw encoded JSON value.
|
||||
// It implements [Marshaler] and [Unmarshaler] and can
|
||||
// be used to delay JSON decoding or precompute a JSON encoding.
|
||||
type RawMessage []byte
|
||||
|
||||
// MarshalJSON returns m as the JSON encoding of m.
|
||||
func (m RawMessage) MarshalJSON() ([]byte, error) {
|
||||
if m == nil {
|
||||
return []byte("null"), nil
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// UnmarshalJSON sets *m to a copy of data.
|
||||
func (m *RawMessage) UnmarshalJSON(data []byte) error {
|
||||
if m == nil {
|
||||
return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
|
||||
}
|
||||
*m = append((*m)[0:0], data...)
|
||||
return nil
|
||||
}
|
||||
|
||||
var _ Marshaler = (*RawMessage)(nil)
|
||||
var _ Unmarshaler = (*RawMessage)(nil)
|
||||
|
||||
// A Token holds a value of one of these types:
|
||||
//
|
||||
// - [Delim], for the four JSON delimiters [ ] { }
|
||||
// - bool, for JSON booleans
|
||||
// - float64, for JSON numbers
|
||||
// - [Number], for JSON numbers
|
||||
// - string, for JSON string literals
|
||||
// - nil, for JSON null
|
||||
type Token any
|
||||
|
||||
const (
|
||||
tokenTopValue = iota
|
||||
tokenArrayStart
|
||||
tokenArrayValue
|
||||
tokenArrayComma
|
||||
tokenObjectStart
|
||||
tokenObjectKey
|
||||
tokenObjectColon
|
||||
tokenObjectValue
|
||||
tokenObjectComma
|
||||
)
|
||||
|
||||
// advance tokenstate from a separator state to a value state
|
||||
func (dec *Decoder) tokenPrepareForDecode() error {
|
||||
// Note: Not calling peek before switch, to avoid
|
||||
// putting peek into the standard Decode path.
|
||||
// peek is only called when using the Token API.
|
||||
switch dec.tokenState {
|
||||
case tokenArrayComma:
|
||||
c, err := dec.peek()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if c != ',' {
|
||||
return &SyntaxError{"expected comma after array element", dec.InputOffset()}
|
||||
}
|
||||
dec.scanp++
|
||||
dec.tokenState = tokenArrayValue
|
||||
case tokenObjectColon:
|
||||
c, err := dec.peek()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if c != ':' {
|
||||
return &SyntaxError{"expected colon after object key", dec.InputOffset()}
|
||||
}
|
||||
dec.scanp++
|
||||
dec.tokenState = tokenObjectValue
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (dec *Decoder) tokenValueAllowed() bool {
|
||||
switch dec.tokenState {
|
||||
case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (dec *Decoder) tokenValueEnd() {
|
||||
switch dec.tokenState {
|
||||
case tokenArrayStart, tokenArrayValue:
|
||||
dec.tokenState = tokenArrayComma
|
||||
case tokenObjectValue:
|
||||
dec.tokenState = tokenObjectComma
|
||||
}
|
||||
}
|
||||
|
||||
// A Delim is a JSON array or object delimiter, one of [ ] { or }.
|
||||
type Delim rune
|
||||
|
||||
func (d Delim) String() string {
|
||||
return string(d)
|
||||
}
|
||||
|
||||
// Token returns the next JSON token in the input stream.
|
||||
// At the end of the input stream, Token returns nil, [io.EOF].
|
||||
//
|
||||
// Token guarantees that the delimiters [ ] { } it returns are
|
||||
// properly nested and matched: if Token encounters an unexpected
|
||||
// delimiter in the input, it will return an error.
|
||||
//
|
||||
// The input stream consists of basic JSON values—bool, string,
|
||||
// number, and null—along with delimiters [ ] { } of type [Delim]
|
||||
// to mark the start and end of arrays and objects.
|
||||
// Commas and colons are elided.
|
||||
func (dec *Decoder) Token() (Token, error) {
|
||||
for {
|
||||
c, err := dec.peek()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch c {
|
||||
case '[':
|
||||
if !dec.tokenValueAllowed() {
|
||||
return dec.tokenError(c)
|
||||
}
|
||||
dec.scanp++
|
||||
dec.tokenStack = append(dec.tokenStack, dec.tokenState)
|
||||
dec.tokenState = tokenArrayStart
|
||||
return Delim('['), nil
|
||||
|
||||
case ']':
|
||||
if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
|
||||
return dec.tokenError(c)
|
||||
}
|
||||
dec.scanp++
|
||||
dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
|
||||
dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
|
||||
dec.tokenValueEnd()
|
||||
return Delim(']'), nil
|
||||
|
||||
case '{':
|
||||
if !dec.tokenValueAllowed() {
|
||||
return dec.tokenError(c)
|
||||
}
|
||||
dec.scanp++
|
||||
dec.tokenStack = append(dec.tokenStack, dec.tokenState)
|
||||
dec.tokenState = tokenObjectStart
|
||||
return Delim('{'), nil
|
||||
|
||||
case '}':
|
||||
if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
|
||||
return dec.tokenError(c)
|
||||
}
|
||||
dec.scanp++
|
||||
dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
|
||||
dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
|
||||
dec.tokenValueEnd()
|
||||
return Delim('}'), nil
|
||||
|
||||
case ':':
|
||||
if dec.tokenState != tokenObjectColon {
|
||||
return dec.tokenError(c)
|
||||
}
|
||||
dec.scanp++
|
||||
dec.tokenState = tokenObjectValue
|
||||
continue
|
||||
|
||||
case ',':
|
||||
if dec.tokenState == tokenArrayComma {
|
||||
dec.scanp++
|
||||
dec.tokenState = tokenArrayValue
|
||||
continue
|
||||
}
|
||||
if dec.tokenState == tokenObjectComma {
|
||||
dec.scanp++
|
||||
dec.tokenState = tokenObjectKey
|
||||
continue
|
||||
}
|
||||
return dec.tokenError(c)
|
||||
|
||||
case '"':
|
||||
if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
|
||||
var x string
|
||||
old := dec.tokenState
|
||||
dec.tokenState = tokenTopValue
|
||||
err := dec.Decode(&x)
|
||||
dec.tokenState = old
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dec.tokenState = tokenObjectColon
|
||||
return x, nil
|
||||
}
|
||||
fallthrough
|
||||
|
||||
default:
|
||||
if !dec.tokenValueAllowed() {
|
||||
return dec.tokenError(c)
|
||||
}
|
||||
var x any
|
||||
if err := dec.Decode(&x); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return x, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (dec *Decoder) tokenError(c byte) (Token, error) {
|
||||
var context string
|
||||
switch dec.tokenState {
|
||||
case tokenTopValue:
|
||||
context = " looking for beginning of value"
|
||||
case tokenArrayStart, tokenArrayValue, tokenObjectValue:
|
||||
context = " looking for beginning of value"
|
||||
case tokenArrayComma:
|
||||
context = " after array element"
|
||||
case tokenObjectKey:
|
||||
context = " looking for beginning of object key string"
|
||||
case tokenObjectColon:
|
||||
context = " after object key"
|
||||
case tokenObjectComma:
|
||||
context = " after object key:value pair"
|
||||
}
|
||||
return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()}
|
||||
}
|
||||
|
||||
// More reports whether there is another element in the
|
||||
// current array or object being parsed.
|
||||
func (dec *Decoder) More() bool {
|
||||
c, err := dec.peek()
|
||||
return err == nil && c != ']' && c != '}'
|
||||
}
|
||||
|
||||
func (dec *Decoder) peek() (byte, error) {
|
||||
var err error
|
||||
for {
|
||||
for i := dec.scanp; i < len(dec.buf); i++ {
|
||||
c := dec.buf[i]
|
||||
if isSpace(c) {
|
||||
continue
|
||||
}
|
||||
dec.scanp = i
|
||||
return c, nil
|
||||
}
|
||||
// buffer has been scanned, now report any error
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
err = dec.refill()
|
||||
}
|
||||
}
|
||||
|
||||
// InputOffset returns the input stream byte offset of the current decoder position.
|
||||
// The offset gives the location of the end of the most recently returned token
|
||||
// and the beginning of the next token.
|
||||
func (dec *Decoder) InputOffset() int64 {
|
||||
return dec.scanned + int64(dec.scanp)
|
||||
}
|
||||
@@ -1,220 +0,0 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !goexperiment.jsonv2
|
||||
|
||||
package json
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// safeSet holds the value true if the ASCII character with the given array
|
||||
// position can be represented inside a JSON string without any further
|
||||
// escaping.
|
||||
//
|
||||
// All values are true except for the ASCII control characters (0-31), the
|
||||
// double quote ("), and the backslash character ("\").
|
||||
var safeSet = [utf8.RuneSelf]bool{
|
||||
' ': true,
|
||||
'!': true,
|
||||
'"': false,
|
||||
'#': true,
|
||||
'$': true,
|
||||
'%': true,
|
||||
'&': true,
|
||||
'\'': true,
|
||||
'(': true,
|
||||
')': true,
|
||||
'*': true,
|
||||
'+': true,
|
||||
',': true,
|
||||
'-': true,
|
||||
'.': true,
|
||||
'/': true,
|
||||
'0': true,
|
||||
'1': true,
|
||||
'2': true,
|
||||
'3': true,
|
||||
'4': true,
|
||||
'5': true,
|
||||
'6': true,
|
||||
'7': true,
|
||||
'8': true,
|
||||
'9': true,
|
||||
':': true,
|
||||
';': true,
|
||||
'<': true,
|
||||
'=': true,
|
||||
'>': true,
|
||||
'?': true,
|
||||
'@': true,
|
||||
'A': true,
|
||||
'B': true,
|
||||
'C': true,
|
||||
'D': true,
|
||||
'E': true,
|
||||
'F': true,
|
||||
'G': true,
|
||||
'H': true,
|
||||
'I': true,
|
||||
'J': true,
|
||||
'K': true,
|
||||
'L': true,
|
||||
'M': true,
|
||||
'N': true,
|
||||
'O': true,
|
||||
'P': true,
|
||||
'Q': true,
|
||||
'R': true,
|
||||
'S': true,
|
||||
'T': true,
|
||||
'U': true,
|
||||
'V': true,
|
||||
'W': true,
|
||||
'X': true,
|
||||
'Y': true,
|
||||
'Z': true,
|
||||
'[': true,
|
||||
'\\': false,
|
||||
']': true,
|
||||
'^': true,
|
||||
'_': true,
|
||||
'`': true,
|
||||
'a': true,
|
||||
'b': true,
|
||||
'c': true,
|
||||
'd': true,
|
||||
'e': true,
|
||||
'f': true,
|
||||
'g': true,
|
||||
'h': true,
|
||||
'i': true,
|
||||
'j': true,
|
||||
'k': true,
|
||||
'l': true,
|
||||
'm': true,
|
||||
'n': true,
|
||||
'o': true,
|
||||
'p': true,
|
||||
'q': true,
|
||||
'r': true,
|
||||
's': true,
|
||||
't': true,
|
||||
'u': true,
|
||||
'v': true,
|
||||
'w': true,
|
||||
'x': true,
|
||||
'y': true,
|
||||
'z': true,
|
||||
'{': true,
|
||||
'|': true,
|
||||
'}': true,
|
||||
'~': true,
|
||||
'\u007f': true,
|
||||
}
|
||||
|
||||
// htmlSafeSet holds the value true if the ASCII character with the given
|
||||
// array position can be safely represented inside a JSON string, embedded
|
||||
// inside of HTML <script> tags, without any additional escaping.
|
||||
//
|
||||
// All values are true except for the ASCII control characters (0-31), the
|
||||
// double quote ("), the backslash character ("\"), HTML opening and closing
|
||||
// tags ("<" and ">"), and the ampersand ("&").
|
||||
var htmlSafeSet = [utf8.RuneSelf]bool{
|
||||
' ': true,
|
||||
'!': true,
|
||||
'"': false,
|
||||
'#': true,
|
||||
'$': true,
|
||||
'%': true,
|
||||
'&': false,
|
||||
'\'': true,
|
||||
'(': true,
|
||||
')': true,
|
||||
'*': true,
|
||||
'+': true,
|
||||
',': true,
|
||||
'-': true,
|
||||
'.': true,
|
||||
'/': true,
|
||||
'0': true,
|
||||
'1': true,
|
||||
'2': true,
|
||||
'3': true,
|
||||
'4': true,
|
||||
'5': true,
|
||||
'6': true,
|
||||
'7': true,
|
||||
'8': true,
|
||||
'9': true,
|
||||
':': true,
|
||||
';': true,
|
||||
'<': false,
|
||||
'=': true,
|
||||
'>': false,
|
||||
'?': true,
|
||||
'@': true,
|
||||
'A': true,
|
||||
'B': true,
|
||||
'C': true,
|
||||
'D': true,
|
||||
'E': true,
|
||||
'F': true,
|
||||
'G': true,
|
||||
'H': true,
|
||||
'I': true,
|
||||
'J': true,
|
||||
'K': true,
|
||||
'L': true,
|
||||
'M': true,
|
||||
'N': true,
|
||||
'O': true,
|
||||
'P': true,
|
||||
'Q': true,
|
||||
'R': true,
|
||||
'S': true,
|
||||
'T': true,
|
||||
'U': true,
|
||||
'V': true,
|
||||
'W': true,
|
||||
'X': true,
|
||||
'Y': true,
|
||||
'Z': true,
|
||||
'[': true,
|
||||
'\\': false,
|
||||
']': true,
|
||||
'^': true,
|
||||
'_': true,
|
||||
'`': true,
|
||||
'a': true,
|
||||
'b': true,
|
||||
'c': true,
|
||||
'd': true,
|
||||
'e': true,
|
||||
'f': true,
|
||||
'g': true,
|
||||
'h': true,
|
||||
'i': true,
|
||||
'j': true,
|
||||
'k': true,
|
||||
'l': true,
|
||||
'm': true,
|
||||
'n': true,
|
||||
'o': true,
|
||||
'p': true,
|
||||
'q': true,
|
||||
'r': true,
|
||||
's': true,
|
||||
't': true,
|
||||
'u': true,
|
||||
'v': true,
|
||||
'w': true,
|
||||
'x': true,
|
||||
'y': true,
|
||||
'z': true,
|
||||
'{': true,
|
||||
'|': true,
|
||||
'}': true,
|
||||
'~': true,
|
||||
'\u007f': true,
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !goexperiment.jsonv2
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// tagOptions is the string following a comma in a struct field's "json"
|
||||
// tag, or the empty string. It does not include the leading comma.
|
||||
type tagOptions string
|
||||
|
||||
// parseTag splits a struct field's json tag into its name and
|
||||
// comma-separated options.
|
||||
func parseTag(tag string) (string, tagOptions) {
|
||||
tag, opt, _ := strings.Cut(tag, ",")
|
||||
return tag, tagOptions(opt)
|
||||
}
|
||||
|
||||
// Contains reports whether a comma-separated list of options
|
||||
// contains a particular substr flag. substr must be surrounded by a
|
||||
// string boundary or commas.
|
||||
func (o tagOptions) Contains(optionName string) bool {
|
||||
if len(o) == 0 {
|
||||
return false
|
||||
}
|
||||
s := string(o)
|
||||
for s != "" {
|
||||
var name string
|
||||
name, s, _ = strings.Cut(s, ",")
|
||||
if name == optionName {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -1,253 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
// Represents JSON data structure using native Go types: booleans, floats,
|
||||
// strings, arrays, and maps.
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strconv"
|
||||
|
||||
"next.orly.dev/pkg/json/internal/jsonwire"
|
||||
"next.orly.dev/pkg/json/jsontext"
|
||||
jsonv2 "next.orly.dev/pkg/json/v2"
|
||||
)
|
||||
|
||||
// Unmarshal parses the JSON-encoded data and stores the result
|
||||
// in the value pointed to by v. If v is nil or not a pointer,
|
||||
// Unmarshal returns an [InvalidUnmarshalError].
|
||||
//
|
||||
// Unmarshal uses the inverse of the encodings that
|
||||
// [Marshal] uses, allocating maps, slices, and pointers as necessary,
|
||||
// with the following additional rules:
|
||||
//
|
||||
// To unmarshal JSON into a pointer, Unmarshal first handles the case of
|
||||
// the JSON being the JSON literal null. In that case, Unmarshal sets
|
||||
// the pointer to nil. Otherwise, Unmarshal unmarshals the JSON into
|
||||
// the value pointed at by the pointer. If the pointer is nil, Unmarshal
|
||||
// allocates a new value for it to point to.
|
||||
//
|
||||
// To unmarshal JSON into a value implementing [Unmarshaler],
|
||||
// Unmarshal calls that value's [Unmarshaler.UnmarshalJSON] method, including
|
||||
// when the input is a JSON null.
|
||||
// Otherwise, if the value implements [encoding.TextUnmarshaler]
|
||||
// and the input is a JSON quoted string, Unmarshal calls
|
||||
// [encoding.TextUnmarshaler.UnmarshalText] with the unquoted form of the string.
|
||||
//
|
||||
// To unmarshal JSON into a struct, Unmarshal matches incoming object
|
||||
// keys to the keys used by [Marshal] (either the struct field name or its tag),
|
||||
// preferring an exact match but also accepting a case-insensitive match. By
|
||||
// default, object keys which don't have a corresponding struct field are
|
||||
// ignored (see [Decoder.DisallowUnknownFields] for an alternative).
|
||||
//
|
||||
// To unmarshal JSON into an interface value,
|
||||
// Unmarshal stores one of these in the interface value:
|
||||
//
|
||||
// - bool, for JSON booleans
|
||||
// - float64, for JSON numbers
|
||||
// - string, for JSON strings
|
||||
// - []any, for JSON arrays
|
||||
// - map[string]any, for JSON objects
|
||||
// - nil for JSON null
|
||||
//
|
||||
// To unmarshal a JSON array into a slice, Unmarshal resets the slice length
|
||||
// to zero and then appends each element to the slice.
|
||||
// As a special case, to unmarshal an empty JSON array into a slice,
|
||||
// Unmarshal replaces the slice with a new empty slice.
|
||||
//
|
||||
// To unmarshal a JSON array into a Go array, Unmarshal decodes
|
||||
// JSON array elements into corresponding Go array elements.
|
||||
// If the Go array is smaller than the JSON array,
|
||||
// the additional JSON array elements are discarded.
|
||||
// If the JSON array is smaller than the Go array,
|
||||
// the additional Go array elements are set to zero values.
|
||||
//
|
||||
// To unmarshal a JSON object into a map, Unmarshal first establishes a map to
|
||||
// use. If the map is nil, Unmarshal allocates a new map. Otherwise Unmarshal
|
||||
// reuses the existing map, keeping existing entries. Unmarshal then stores
|
||||
// key-value pairs from the JSON object into the map. The map's key type must
|
||||
// either be any string type, an integer, or implement [encoding.TextUnmarshaler].
|
||||
//
|
||||
// If the JSON-encoded data contain a syntax error, Unmarshal returns a [SyntaxError].
|
||||
//
|
||||
// If a JSON value is not appropriate for a given target type,
|
||||
// or if a JSON number overflows the target type, Unmarshal
|
||||
// skips that field and completes the unmarshaling as best it can.
|
||||
// If no more serious errors are encountered, Unmarshal returns
|
||||
// an [UnmarshalTypeError] describing the earliest such error. In any
|
||||
// case, it's not guaranteed that all the remaining fields following
|
||||
// the problematic one will be unmarshaled into the target object.
|
||||
//
|
||||
// The JSON null value unmarshals into an interface, map, pointer, or slice
|
||||
// by setting that Go value to nil. Because null is often used in JSON to mean
|
||||
// “not present,” unmarshaling a JSON null into any other Go type has no effect
|
||||
// on the value and produces no error.
|
||||
//
|
||||
// When unmarshaling quoted strings, invalid UTF-8 or
|
||||
// invalid UTF-16 surrogate pairs are not treated as an error.
|
||||
// Instead, they are replaced by the Unicode replacement
|
||||
// character U+FFFD.
|
||||
func Unmarshal(data []byte, v any) error {
|
||||
return jsonv2.Unmarshal(data, v, DefaultOptionsV1())
|
||||
}
|
||||
|
||||
// Unmarshaler is the interface implemented by types
|
||||
// that can unmarshal a JSON description of themselves.
|
||||
// The input can be assumed to be a valid encoding of
|
||||
// a JSON value. UnmarshalJSON must copy the JSON data
|
||||
// if it wishes to retain the data after returning.
|
||||
type Unmarshaler = jsonv2.Unmarshaler
|
||||
|
||||
// An UnmarshalTypeError describes a JSON value that was
|
||||
// not appropriate for a value of a specific Go type.
|
||||
type UnmarshalTypeError struct {
|
||||
Value string // description of JSON value - "bool", "array", "number -5"
|
||||
Type reflect.Type // type of Go value it could not be assigned to
|
||||
Offset int64 // error occurred after reading Offset bytes
|
||||
Struct string // name of the root type containing the field
|
||||
Field string // the full path from root node to the value
|
||||
Err error // may be nil
|
||||
}
|
||||
|
||||
func (e *UnmarshalTypeError) Error() string {
|
||||
s := "json: cannot unmarshal"
|
||||
if e.Value != "" {
|
||||
s += " JSON " + e.Value
|
||||
}
|
||||
s += " into"
|
||||
var preposition string
|
||||
if e.Field != "" {
|
||||
s += " " + e.Struct + "." + e.Field
|
||||
preposition = " of"
|
||||
}
|
||||
if e.Type != nil {
|
||||
s += preposition
|
||||
s += " Go type " + e.Type.String()
|
||||
}
|
||||
if e.Err != nil {
|
||||
s += ": " + e.Err.Error()
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (e *UnmarshalTypeError) Unwrap() error {
|
||||
return e.Err
|
||||
}
|
||||
|
||||
// An UnmarshalFieldError describes a JSON object key that
|
||||
// led to an unexported (and therefore unwritable) struct field.
|
||||
//
|
||||
// Deprecated: No longer used; kept for compatibility.
|
||||
type UnmarshalFieldError struct {
|
||||
Key string
|
||||
Type reflect.Type
|
||||
Field reflect.StructField
|
||||
}
|
||||
|
||||
func (e *UnmarshalFieldError) Error() string {
|
||||
return "json: cannot unmarshal object key " + strconv.Quote(e.Key) + " into unexported field " + e.Field.Name + " of type " + e.Type.String()
|
||||
}
|
||||
|
||||
// An InvalidUnmarshalError describes an invalid argument passed to [Unmarshal].
|
||||
// (The argument to [Unmarshal] must be a non-nil pointer.)
|
||||
type InvalidUnmarshalError struct {
|
||||
Type reflect.Type
|
||||
}
|
||||
|
||||
func (e *InvalidUnmarshalError) Error() string {
|
||||
if e.Type == nil {
|
||||
return "json: Unmarshal(nil)"
|
||||
}
|
||||
|
||||
if e.Type.Kind() != reflect.Pointer {
|
||||
return "json: Unmarshal(non-pointer " + e.Type.String() + ")"
|
||||
}
|
||||
return "json: Unmarshal(nil " + e.Type.String() + ")"
|
||||
}
|
||||
|
||||
// A Number represents a JSON number literal.
|
||||
type Number string
|
||||
|
||||
// String returns the literal text of the number.
|
||||
func (n Number) String() string { return string(n) }
|
||||
|
||||
// Float64 returns the number as a float64.
|
||||
func (n Number) Float64() (float64, error) {
|
||||
return strconv.ParseFloat(string(n), 64)
|
||||
}
|
||||
|
||||
// Int64 returns the number as an int64.
|
||||
func (n Number) Int64() (int64, error) {
|
||||
return strconv.ParseInt(string(n), 10, 64)
|
||||
}
|
||||
|
||||
var numberType = reflect.TypeFor[Number]()
|
||||
|
||||
// MarshalJSONTo implements [jsonv2.MarshalerTo].
|
||||
func (n Number) MarshalJSONTo(enc *jsontext.Encoder) error {
|
||||
opts := enc.Options()
|
||||
stringify, _ := jsonv2.GetOption(opts, jsonv2.StringifyNumbers)
|
||||
if k, n := enc.StackIndex(enc.StackDepth()); k == '{' && n%2 == 0 {
|
||||
stringify = true // expecting a JSON object name
|
||||
}
|
||||
n = cmp.Or(n, "0")
|
||||
var num []byte
|
||||
val := enc.AvailableBuffer()
|
||||
if stringify {
|
||||
val = append(val, '"')
|
||||
val = append(val, n...)
|
||||
val = append(val, '"')
|
||||
num = val[len(`"`) : len(val)-len(`"`)]
|
||||
} else {
|
||||
val = append(val, n...)
|
||||
num = val
|
||||
}
|
||||
if n, err := jsonwire.ConsumeNumber(num); n != len(num) || err != nil {
|
||||
return fmt.Errorf("cannot parse %q as JSON number: %w", val, strconv.ErrSyntax)
|
||||
}
|
||||
return enc.WriteValue(val)
|
||||
}
|
||||
|
||||
// UnmarshalJSONFrom implements [jsonv2.UnmarshalerFrom].
|
||||
func (n *Number) UnmarshalJSONFrom(dec *jsontext.Decoder) error {
|
||||
opts := dec.Options()
|
||||
stringify, _ := jsonv2.GetOption(opts, jsonv2.StringifyNumbers)
|
||||
if k, n := dec.StackIndex(dec.StackDepth()); k == '{' && n%2 == 0 {
|
||||
stringify = true // expecting a JSON object name
|
||||
}
|
||||
val, err := dec.ReadValue()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
val0 := val
|
||||
k := val.Kind()
|
||||
switch k {
|
||||
case 'n':
|
||||
if legacy, _ := jsonv2.GetOption(opts, MergeWithLegacySemantics); !legacy {
|
||||
*n = ""
|
||||
}
|
||||
return nil
|
||||
case '"':
|
||||
verbatim := jsonwire.ConsumeSimpleString(val) == len(val)
|
||||
val = jsonwire.UnquoteMayCopy(val, verbatim)
|
||||
if n, err := jsonwire.ConsumeNumber(val); n != len(val) || err != nil {
|
||||
return &jsonv2.SemanticError{JSONKind: val0.Kind(), JSONValue: val0.Clone(), GoType: numberType, Err: strconv.ErrSyntax}
|
||||
}
|
||||
*n = Number(val)
|
||||
return nil
|
||||
case '0':
|
||||
if stringify {
|
||||
break
|
||||
}
|
||||
*n = Number(val)
|
||||
return nil
|
||||
}
|
||||
return &jsonv2.SemanticError{JSONKind: k, GoType: numberType}
|
||||
}
|
||||
@@ -1,251 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
// Package json implements encoding and decoding of JSON as defined in
|
||||
// RFC 7159. The mapping between JSON and Go values is described
|
||||
// in the documentation for the Marshal and Unmarshal functions.
|
||||
//
|
||||
// See "JSON and Go" for an introduction to this package:
|
||||
// https://golang.org/doc/articles/json_and_go.html
|
||||
//
|
||||
// # Security Considerations
|
||||
//
|
||||
// See the "Security Considerations" section in [encoding/json/v2].
|
||||
//
|
||||
// For historical reasons, the default behavior of v1 [encoding/json]
|
||||
// unfortunately operates with less secure defaults.
|
||||
// New usages of JSON in Go are encouraged to use [encoding/json/v2] instead.
|
||||
package json
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strconv"
|
||||
|
||||
jsonv2 "next.orly.dev/pkg/json/v2"
|
||||
)
|
||||
|
||||
// Marshal returns the JSON encoding of v.
|
||||
//
|
||||
// Marshal traverses the value v recursively.
|
||||
// If an encountered value implements [Marshaler]
|
||||
// and is not a nil pointer, Marshal calls [Marshaler.MarshalJSON]
|
||||
// to produce JSON. If no [Marshaler.MarshalJSON] method is present but the
|
||||
// value implements [encoding.TextMarshaler] instead, Marshal calls
|
||||
// [encoding.TextMarshaler.MarshalText] and encodes the result as a JSON string.
|
||||
// The nil pointer exception is not strictly necessary
|
||||
// but mimics a similar, necessary exception in the behavior of
|
||||
// [Unmarshaler.UnmarshalJSON].
|
||||
//
|
||||
// Otherwise, Marshal uses the following type-dependent default encodings:
|
||||
//
|
||||
// Boolean values encode as JSON booleans.
|
||||
//
|
||||
// Floating point, integer, and [Number] values encode as JSON numbers.
|
||||
// NaN and +/-Inf values will return an [UnsupportedValueError].
|
||||
//
|
||||
// String values encode as JSON strings coerced to valid UTF-8,
|
||||
// replacing invalid bytes with the Unicode replacement rune.
|
||||
// So that the JSON will be safe to embed inside HTML <script> tags,
|
||||
// the string is encoded using [HTMLEscape],
|
||||
// which replaces "<", ">", "&", U+2028, and U+2029 are escaped
|
||||
// to "\u003c","\u003e", "\u0026", "\u2028", and "\u2029".
|
||||
// This replacement can be disabled when using an [Encoder],
|
||||
// by calling [Encoder.SetEscapeHTML](false).
|
||||
//
|
||||
// Array and slice values encode as JSON arrays, except that
|
||||
// []byte encodes as a base64-encoded string, and a nil slice
|
||||
// encodes as the null JSON value.
|
||||
//
|
||||
// Struct values encode as JSON objects.
|
||||
// Each exported struct field becomes a member of the object, using the
|
||||
// field name as the object key, unless the field is omitted for one of the
|
||||
// reasons given below.
|
||||
//
|
||||
// The encoding of each struct field can be customized by the format string
|
||||
// stored under the "json" key in the struct field's tag.
|
||||
// The format string gives the name of the field, possibly followed by a
|
||||
// comma-separated list of options. The name may be empty in order to
|
||||
// specify options without overriding the default field name.
|
||||
//
|
||||
// The "omitempty" option specifies that the field should be omitted
|
||||
// from the encoding if the field has an empty value, defined as
|
||||
// false, 0, a nil pointer, a nil interface value, and any array,
|
||||
// slice, map, or string of length zero.
|
||||
//
|
||||
// As a special case, if the field tag is "-", the field is always omitted.
|
||||
// JSON names containing commas or quotes, or names identical to "" or "-",
|
||||
// can be specified using a single-quoted string literal, where the syntax
|
||||
// is identical to the Go grammar for a double-quoted string literal,
|
||||
// but instead uses single quotes as the delimiters.
|
||||
//
|
||||
// Examples of struct field tags and their meanings:
|
||||
//
|
||||
// // Field appears in JSON as key "myName".
|
||||
// Field int `json:"myName"`
|
||||
//
|
||||
// // Field appears in JSON as key "myName" and
|
||||
// // the field is omitted from the object if its value is empty,
|
||||
// // as defined above.
|
||||
// Field int `json:"myName,omitempty"`
|
||||
//
|
||||
// // Field appears in JSON as key "Field" (the default), but
|
||||
// // the field is skipped if empty.
|
||||
// // Note the leading comma.
|
||||
// Field int `json:",omitempty"`
|
||||
//
|
||||
// // Field is ignored by this package.
|
||||
// Field int `json:"-"`
|
||||
//
|
||||
// // Field appears in JSON as key "-".
|
||||
// Field int `json:"'-'"`
|
||||
//
|
||||
// The "omitzero" option specifies that the field should be omitted
|
||||
// from the encoding if the field has a zero value, according to rules:
|
||||
//
|
||||
// 1) If the field type has an "IsZero() bool" method, that will be used to
|
||||
// determine whether the value is zero.
|
||||
//
|
||||
// 2) Otherwise, the value is zero if it is the zero value for its type.
|
||||
//
|
||||
// If both "omitempty" and "omitzero" are specified, the field will be omitted
|
||||
// if the value is either empty or zero (or both).
|
||||
//
|
||||
// The "string" option signals that a field is stored as JSON inside a
|
||||
// JSON-encoded string. It applies only to fields of string, floating point,
|
||||
// integer, or boolean types. This extra level of encoding is sometimes used
|
||||
// when communicating with JavaScript programs:
|
||||
//
|
||||
// Int64String int64 `json:",string"`
|
||||
//
|
||||
// The key name will be used if it's a non-empty string consisting of
|
||||
// only Unicode letters, digits, and ASCII punctuation except quotation
|
||||
// marks, backslash, and comma.
|
||||
//
|
||||
// Embedded struct fields are usually marshaled as if their inner exported fields
|
||||
// were fields in the outer struct, subject to the usual Go visibility rules amended
|
||||
// as described in the next paragraph.
|
||||
// An anonymous struct field with a name given in its JSON tag is treated as
|
||||
// having that name, rather than being anonymous.
|
||||
// An anonymous struct field of interface type is treated the same as having
|
||||
// that type as its name, rather than being anonymous.
|
||||
//
|
||||
// The Go visibility rules for struct fields are amended for JSON when
|
||||
// deciding which field to marshal or unmarshal. If there are
|
||||
// multiple fields at the same level, and that level is the least
|
||||
// nested (and would therefore be the nesting level selected by the
|
||||
// usual Go rules), the following extra rules apply:
|
||||
//
|
||||
// 1) Of those fields, if any are JSON-tagged, only tagged fields are considered,
|
||||
// even if there are multiple untagged fields that would otherwise conflict.
|
||||
//
|
||||
// 2) If there is exactly one field (tagged or not according to the first rule), that is selected.
|
||||
//
|
||||
// 3) Otherwise there are multiple fields, and all are ignored; no error occurs.
|
||||
//
|
||||
// Handling of anonymous struct fields is new in Go 1.1.
|
||||
// Prior to Go 1.1, anonymous struct fields were ignored. To force ignoring of
|
||||
// an anonymous struct field in both current and earlier versions, give the field
|
||||
// a JSON tag of "-".
|
||||
//
|
||||
// Map values encode as JSON objects. The map's key type must either be a
|
||||
// string, an integer type, or implement [encoding.TextMarshaler]. The map keys
|
||||
// are sorted and used as JSON object keys by applying the following rules,
|
||||
// subject to the UTF-8 coercion described for string values above:
|
||||
// - keys of any string type are used directly
|
||||
// - keys that implement [encoding.TextMarshaler] are marshaled
|
||||
// - integer keys are converted to strings
|
||||
//
|
||||
// Pointer values encode as the value pointed to.
|
||||
// A nil pointer encodes as the null JSON value.
|
||||
//
|
||||
// Interface values encode as the value contained in the interface.
|
||||
// A nil interface value encodes as the null JSON value.
|
||||
//
|
||||
// Channel, complex, and function values cannot be encoded in JSON.
|
||||
// Attempting to encode such a value causes Marshal to return
|
||||
// an [UnsupportedTypeError].
|
||||
//
|
||||
// JSON cannot represent cyclic data structures and Marshal does not
|
||||
// handle them. Passing cyclic structures to Marshal will result in
|
||||
// an error.
|
||||
func Marshal(v any) ([]byte, error) {
|
||||
return jsonv2.Marshal(v, DefaultOptionsV1())
|
||||
}
|
||||
|
||||
// MarshalIndent is like [Marshal] but applies [Indent] to format the output.
|
||||
// Each JSON element in the output will begin on a new line beginning with prefix
|
||||
// followed by one or more copies of indent according to the indentation nesting.
|
||||
func MarshalIndent(v any, prefix, indent string) ([]byte, error) {
|
||||
b, err := Marshal(v)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
b, err = appendIndent(nil, b, prefix, indent)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// Marshaler is the interface implemented by types that
|
||||
// can marshal themselves into valid JSON.
|
||||
type Marshaler = jsonv2.Marshaler
|
||||
|
||||
// An UnsupportedTypeError is returned by [Marshal] when attempting
|
||||
// to encode an unsupported value type.
|
||||
type UnsupportedTypeError struct {
|
||||
Type reflect.Type
|
||||
}
|
||||
|
||||
func (e *UnsupportedTypeError) Error() string {
|
||||
return "json: unsupported type: " + e.Type.String()
|
||||
}
|
||||
|
||||
// An UnsupportedValueError is returned by [Marshal] when attempting
|
||||
// to encode an unsupported value.
|
||||
type UnsupportedValueError struct {
|
||||
Value reflect.Value
|
||||
Str string
|
||||
}
|
||||
|
||||
func (e *UnsupportedValueError) Error() string {
|
||||
return "json: unsupported value: " + e.Str
|
||||
}
|
||||
|
||||
// Before Go 1.2, an InvalidUTF8Error was returned by [Marshal] when
|
||||
// attempting to encode a string value with invalid UTF-8 sequences.
|
||||
// As of Go 1.2, [Marshal] instead coerces the string to valid UTF-8 by
|
||||
// replacing invalid bytes with the Unicode replacement rune U+FFFD.
|
||||
//
|
||||
// Deprecated: No longer used; kept for compatibility.
|
||||
type InvalidUTF8Error struct {
|
||||
S string // the whole string value that caused the error
|
||||
}
|
||||
|
||||
func (e *InvalidUTF8Error) Error() string {
|
||||
return "json: invalid UTF-8 in string: " + strconv.Quote(e.S)
|
||||
}
|
||||
|
||||
// A MarshalerError represents an error from calling a
|
||||
// [Marshaler.MarshalJSON] or [encoding.TextMarshaler.MarshalText] method.
|
||||
type MarshalerError struct {
|
||||
Type reflect.Type
|
||||
Err error
|
||||
sourceFunc string
|
||||
}
|
||||
|
||||
func (e *MarshalerError) Error() string {
|
||||
srcFunc := e.sourceFunc
|
||||
if srcFunc == "" {
|
||||
srcFunc = "MarshalJSON"
|
||||
}
|
||||
return "json: error calling " + srcFunc +
|
||||
" for type " + e.Type.String() +
|
||||
": " + e.Err.Error()
|
||||
}
|
||||
|
||||
// Unwrap returns the underlying error.
|
||||
func (e *MarshalerError) Unwrap() error { return e.Err }
|
||||
@@ -1,133 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
|
||||
"next.orly.dev/pkg/json/jsontext"
|
||||
)
|
||||
|
||||
// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
|
||||
// characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
|
||||
// so that the JSON will be safe to embed inside HTML <script> tags.
|
||||
// For historical reasons, web browsers don't honor standard HTML
|
||||
// escaping within <script> tags, so an alternative JSON encoding must be used.
|
||||
func HTMLEscape(dst *bytes.Buffer, src []byte) {
|
||||
dst.Grow(len(src))
|
||||
dst.Write(appendHTMLEscape(dst.AvailableBuffer(), src))
|
||||
}
|
||||
|
||||
func appendHTMLEscape(dst, src []byte) []byte {
|
||||
const hex = "0123456789abcdef"
|
||||
// The characters can only appear in string literals,
|
||||
// so just scan the string one byte at a time.
|
||||
start := 0
|
||||
for i, c := range src {
|
||||
if c == '<' || c == '>' || c == '&' {
|
||||
dst = append(dst, src[start:i]...)
|
||||
dst = append(dst, '\\', 'u', '0', '0', hex[c>>4], hex[c&0xF])
|
||||
start = i + 1
|
||||
}
|
||||
// Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9).
|
||||
if c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 {
|
||||
dst = append(dst, src[start:i]...)
|
||||
dst = append(dst, '\\', 'u', '2', '0', '2', hex[src[i+2]&0xF])
|
||||
start = i + len("\u2029")
|
||||
}
|
||||
}
|
||||
return append(dst, src[start:]...)
|
||||
}
|
||||
|
||||
// Compact appends to dst the JSON-encoded src with
|
||||
// insignificant space characters elided.
|
||||
func Compact(dst *bytes.Buffer, src []byte) error {
|
||||
dst.Grow(len(src))
|
||||
b := dst.AvailableBuffer()
|
||||
b, err := jsontext.AppendFormat(b, src,
|
||||
jsontext.AllowDuplicateNames(true),
|
||||
jsontext.AllowInvalidUTF8(true),
|
||||
jsontext.PreserveRawStrings(true))
|
||||
if err != nil {
|
||||
return transformSyntacticError(err)
|
||||
}
|
||||
dst.Write(b)
|
||||
return nil
|
||||
}
|
||||
|
||||
// indentGrowthFactor specifies the growth factor of indenting JSON input.
|
||||
// Empirically, the growth factor was measured to be between 1.4x to 1.8x
|
||||
// for some set of compacted JSON with the indent being a single tab.
|
||||
// Specify a growth factor slightly larger than what is observed
|
||||
// to reduce probability of allocation in appendIndent.
|
||||
// A factor no higher than 2 ensures that wasted space never exceeds 50%.
|
||||
const indentGrowthFactor = 2
|
||||
|
||||
// Indent appends to dst an indented form of the JSON-encoded src.
|
||||
// Each element in a JSON object or array begins on a new,
|
||||
// indented line beginning with prefix followed by one or more
|
||||
// copies of indent according to the indentation nesting.
|
||||
// The data appended to dst does not begin with the prefix nor
|
||||
// any indentation, to make it easier to embed inside other formatted JSON data.
|
||||
// Although leading space characters (space, tab, carriage return, newline)
|
||||
// at the beginning of src are dropped, trailing space characters
|
||||
// at the end of src are preserved and copied to dst.
|
||||
// For example, if src has no trailing spaces, neither will dst;
|
||||
// if src ends in a trailing newline, so will dst.
|
||||
func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error {
|
||||
dst.Grow(indentGrowthFactor * len(src))
|
||||
b := dst.AvailableBuffer()
|
||||
b, err := appendIndent(b, src, prefix, indent)
|
||||
dst.Write(b)
|
||||
return err
|
||||
}
|
||||
|
||||
func appendIndent(dst, src []byte, prefix, indent string) ([]byte, error) {
|
||||
// In v2, trailing whitespace is discarded, while v1 preserved it.
|
||||
dstLen := len(dst)
|
||||
if n := len(src) - len(bytes.TrimRight(src, " \n\r\t")); n > 0 {
|
||||
// Append the trailing whitespace afterwards.
|
||||
defer func() {
|
||||
if len(dst) > dstLen {
|
||||
dst = append(dst, src[len(src)-n:]...)
|
||||
}
|
||||
}()
|
||||
}
|
||||
// In v2, only spaces and tabs are allowed, while v1 allowed any character.
|
||||
if len(strings.Trim(prefix, " \t"))+len(strings.Trim(indent, " \t")) > 0 {
|
||||
// Use placeholder spaces of correct length, and replace afterwards.
|
||||
invalidPrefix, invalidIndent := prefix, indent
|
||||
prefix = strings.Repeat(" ", len(prefix))
|
||||
indent = strings.Repeat(" ", len(indent))
|
||||
defer func() {
|
||||
b := dst[dstLen:]
|
||||
for i := bytes.IndexByte(b, '\n'); i >= 0; i = bytes.IndexByte(b, '\n') {
|
||||
b = b[i+len("\n"):]
|
||||
n := len(b) - len(bytes.TrimLeft(b, " ")) // len(prefix)+n*len(indent)
|
||||
spaces := b[:n]
|
||||
spaces = spaces[copy(spaces, invalidPrefix):]
|
||||
for len(spaces) > 0 {
|
||||
spaces = spaces[copy(spaces, invalidIndent):]
|
||||
}
|
||||
b = b[n:]
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
dst, err := jsontext.AppendFormat(dst, src,
|
||||
jsontext.AllowDuplicateNames(true),
|
||||
jsontext.AllowInvalidUTF8(true),
|
||||
jsontext.PreserveRawStrings(true),
|
||||
jsontext.Multiline(true),
|
||||
jsontext.WithIndentPrefix(prefix),
|
||||
jsontext.WithIndent(indent))
|
||||
if err != nil {
|
||||
return dst[:dstLen], transformSyntacticError(err)
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
@@ -1,153 +0,0 @@
|
||||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"next.orly.dev/pkg/json/internal"
|
||||
"next.orly.dev/pkg/json/jsontext"
|
||||
jsonv2 "next.orly.dev/pkg/json/v2"
|
||||
)
|
||||
|
||||
// Inject functionality into v2 to properly handle v1 types.
|
||||
func init() {
|
||||
internal.TransformMarshalError = transformMarshalError
|
||||
internal.TransformUnmarshalError = transformUnmarshalError
|
||||
internal.NewMarshalerError = func(val any, err error, funcName string) error {
|
||||
return &MarshalerError{reflect.TypeOf(val), err, funcName}
|
||||
}
|
||||
|
||||
internal.NewRawNumber = func() any { return new(Number) }
|
||||
internal.RawNumberOf = func(b []byte) any { return Number(b) }
|
||||
}
|
||||
|
||||
func transformMarshalError(root any, err error) error {
|
||||
// Historically, errors returned from Marshal methods were wrapped
|
||||
// in a [MarshalerError]. This is directly performed by the v2 package
|
||||
// via the injected [internal.NewMarshalerError] constructor
|
||||
// while operating under [ReportErrorsWithLegacySemantics].
|
||||
// Note that errors from a Marshal method were always wrapped,
|
||||
// even if wrapped for multiple layers.
|
||||
if err, ok := err.(*jsonv2.SemanticError); err != nil {
|
||||
if err.Err == nil {
|
||||
// Historically, this was only reported for unserializable types
|
||||
// like complex numbers, channels, functions, and unsafe.Pointers.
|
||||
return &UnsupportedTypeError{Type: err.GoType}
|
||||
} else {
|
||||
// Historically, this was only reported for NaN or ±Inf values
|
||||
// and cycles detected in the value.
|
||||
// The Val used to be populated with the reflect.Value,
|
||||
// but this is no longer supported.
|
||||
errStr := err.Err.Error()
|
||||
if err.Err == internal.ErrCycle && err.GoType != nil {
|
||||
errStr += " via " + err.GoType.String()
|
||||
}
|
||||
errStr = strings.TrimPrefix(errStr, "unsupported value: ")
|
||||
return &UnsupportedValueError{Str: errStr}
|
||||
}
|
||||
} else if ok {
|
||||
return (*UnsupportedValueError)(nil)
|
||||
}
|
||||
if err, _ := err.(*MarshalerError); err != nil {
|
||||
err.Err = transformSyntacticError(err.Err)
|
||||
return err
|
||||
}
|
||||
return transformSyntacticError(err)
|
||||
}
|
||||
|
||||
func transformUnmarshalError(root any, err error) error {
|
||||
// Historically, errors from Unmarshal methods were never wrapped and
|
||||
// returned verbatim while operating under [ReportErrorsWithLegacySemantics].
|
||||
if err, ok := err.(*jsonv2.SemanticError); err != nil {
|
||||
if err.Err == internal.ErrNonNilReference {
|
||||
return &InvalidUnmarshalError{err.GoType}
|
||||
}
|
||||
if err.Err == jsonv2.ErrUnknownName {
|
||||
return fmt.Errorf("json: unknown field %q", err.JSONPointer.LastToken())
|
||||
}
|
||||
|
||||
// Historically, UnmarshalTypeError has always been inconsistent
|
||||
// about how it reported position information.
|
||||
//
|
||||
// The Struct field now points to the root type,
|
||||
// rather than some intermediate struct in the path.
|
||||
// This better matches the original intent of the field based
|
||||
// on how the Error message was formatted.
|
||||
//
|
||||
// For a representation closer to the historical representation,
|
||||
// we switch the '/'-delimited representation of a JSON pointer
|
||||
// to use a '.'-delimited representation. This may be ambiguous,
|
||||
// but the prior representation was always ambiguous as well.
|
||||
// Users that care about precise positions should use v2 errors
|
||||
// by disabling [ReportErrorsWithLegacySemantics].
|
||||
//
|
||||
// The introduction of a Err field is new to the v1-to-v2 migration
|
||||
// and allows us to preserve stronger error information
|
||||
// that may be surfaced by the v2 package.
|
||||
//
|
||||
// See https://go.dev/issue/43126
|
||||
var value string
|
||||
switch err.JSONKind {
|
||||
case 'n', '"', '0':
|
||||
value = err.JSONKind.String()
|
||||
case 'f', 't':
|
||||
value = "bool"
|
||||
case '[', ']':
|
||||
value = "array"
|
||||
case '{', '}':
|
||||
value = "object"
|
||||
}
|
||||
if len(err.JSONValue) > 0 {
|
||||
isStrconvError := err.Err == strconv.ErrRange || err.Err == strconv.ErrSyntax
|
||||
isNumericKind := func(t reflect.Type) bool {
|
||||
if t == nil {
|
||||
return false
|
||||
}
|
||||
switch t.Kind() {
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
|
||||
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
|
||||
reflect.Float32, reflect.Float64:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
if isStrconvError && isNumericKind(err.GoType) {
|
||||
value = "number"
|
||||
if err.JSONKind == '"' {
|
||||
err.JSONValue, _ = jsontext.AppendUnquote(nil, err.JSONValue)
|
||||
}
|
||||
err.Err = nil
|
||||
}
|
||||
value += " " + string(err.JSONValue)
|
||||
}
|
||||
var rootName string
|
||||
if t := reflect.TypeOf(root); t != nil && err.JSONPointer != "" {
|
||||
if t.Kind() == reflect.Pointer {
|
||||
t = t.Elem()
|
||||
}
|
||||
rootName = t.Name()
|
||||
}
|
||||
fieldPath := string(err.JSONPointer)
|
||||
fieldPath = strings.TrimPrefix(fieldPath, "/")
|
||||
fieldPath = strings.ReplaceAll(fieldPath, "/", ".")
|
||||
return &UnmarshalTypeError{
|
||||
Value: value,
|
||||
Type: err.GoType,
|
||||
Offset: err.ByteOffset,
|
||||
Struct: rootName,
|
||||
Field: fieldPath,
|
||||
Err: transformSyntacticError(err.Err),
|
||||
}
|
||||
} else if ok {
|
||||
return (*UnmarshalTypeError)(nil)
|
||||
}
|
||||
return transformSyntacticError(err)
|
||||
}
|
||||
@@ -1,546 +0,0 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
// Migrating to v2
|
||||
//
|
||||
// This package (i.e., [encoding/json]) is now formally known as the v1 package
|
||||
// since a v2 package now exists at [encoding/json/v2].
|
||||
// All the behavior of the v1 package is implemented in terms of
|
||||
// the v2 package with the appropriate set of options specified that
|
||||
// preserve the historical behavior of v1.
|
||||
//
|
||||
// The [jsonv2.Marshal] function is the newer equivalent of v1 [Marshal].
|
||||
// The [jsonv2.Unmarshal] function is the newer equivalent of v1 [Unmarshal].
|
||||
// The v2 functions have the same calling signature as the v1 equivalent
|
||||
// except that they take in variadic [Options] arguments that can be specified
|
||||
// to alter the behavior of marshal or unmarshal. Both v1 and v2 generally
|
||||
// behave in similar ways, but there are some notable differences.
|
||||
//
|
||||
// The following is a list of differences between v1 and v2:
|
||||
//
|
||||
// - In v1, JSON object members are unmarshaled into a Go struct using a
|
||||
// case-insensitive name match with the JSON name of the fields.
|
||||
// In contrast, v2 matches fields using an exact, case-sensitive match.
|
||||
// The [jsonv2.MatchCaseInsensitiveNames] and [MatchCaseSensitiveDelimiter]
|
||||
// options control this behavior difference. To explicitly specify a Go struct
|
||||
// field to use a particular name matching scheme, either the `case:ignore`
|
||||
// or the `case:strict` field option can be specified.
|
||||
// Field-specified options take precedence over caller-specified options.
|
||||
//
|
||||
// - In v1, when marshaling a Go struct, a field marked as `omitempty`
|
||||
// is omitted if the field value is an "empty" Go value, which is defined as
|
||||
// false, 0, a nil pointer, a nil interface value, and
|
||||
// any empty array, slice, map, or string. In contrast, v2 redefines
|
||||
// `omitempty` to omit a field if it encodes as an "empty" JSON value,
|
||||
// which is defined as a JSON null, or an empty JSON string, object, or array.
|
||||
// The [OmitEmptyWithLegacySemantics] option controls this behavior difference.
|
||||
// Note that `omitempty` behaves identically in both v1 and v2 for a
|
||||
// Go array, slice, map, or string (assuming no user-defined MarshalJSON method
|
||||
// overrides the default representation). Existing usages of `omitempty` on a
|
||||
// Go bool, number, pointer, or interface value should migrate to specifying
|
||||
// `omitzero` instead (which is identically supported in both v1 and v2).
|
||||
//
|
||||
// - In v1, a Go struct field marked as `string` can be used to quote a
|
||||
// Go string, bool, or number as a JSON string. It does not recursively
|
||||
// take effect on composite Go types. In contrast, v2 restricts
|
||||
// the `string` option to only quote a Go number as a JSON string.
|
||||
// It does recursively take effect on Go numbers within a composite Go type.
|
||||
// The [StringifyWithLegacySemantics] option controls this behavior difference.
|
||||
//
|
||||
// - In v1, a nil Go slice or Go map is marshaled as a JSON null.
|
||||
// In contrast, v2 marshals a nil Go slice or Go map as
|
||||
// an empty JSON array or JSON object, respectively.
|
||||
// The [jsonv2.FormatNilSliceAsNull] and [jsonv2.FormatNilMapAsNull] options
|
||||
// control this behavior difference. To explicitly specify a Go struct field
|
||||
// to use a particular representation for nil, either the `format:emitempty`
|
||||
// or `format:emitnull` field option can be specified.
|
||||
// Field-specified options take precedence over caller-specified options.
|
||||
//
|
||||
// - In v1, a Go array may be unmarshaled from a JSON array of any length.
|
||||
// In contrast, in v2 a Go array must be unmarshaled from a JSON array
|
||||
// of the same length, otherwise it results in an error.
|
||||
// The [UnmarshalArrayFromAnyLength] option controls this behavior difference.
|
||||
//
|
||||
// - In v1, a Go byte array is represented as a JSON array of JSON numbers.
|
||||
// In contrast, in v2 a Go byte array is represented as a Base64-encoded JSON string.
|
||||
// The [FormatByteArrayAsArray] option controls this behavior difference.
|
||||
// To explicitly specify a Go struct field to use a particular representation,
|
||||
// either the `format:array` or `format:base64` field option can be specified.
|
||||
// Field-specified options take precedence over caller-specified options.
|
||||
//
|
||||
// - In v1, MarshalJSON methods declared on a pointer receiver are only called
|
||||
// if the Go value is addressable. In contrast, in v2 a MarshalJSON method
|
||||
// is always callable regardless of addressability.
|
||||
// The [CallMethodsWithLegacySemantics] option controls this behavior difference.
|
||||
//
|
||||
// - In v1, MarshalJSON and UnmarshalJSON methods are never called for Go map keys.
|
||||
// In contrast, in v2 a MarshalJSON or UnmarshalJSON method is eligible for
|
||||
// being called for Go map keys.
|
||||
// The [CallMethodsWithLegacySemantics] option controls this behavior difference.
|
||||
//
|
||||
// - In v1, a Go map is marshaled in a deterministic order.
|
||||
// In contrast, in v2 a Go map is marshaled in a non-deterministic order.
|
||||
// The [jsonv2.Deterministic] option controls this behavior difference.
|
||||
//
|
||||
// - In v1, JSON strings are encoded with HTML-specific or JavaScript-specific
|
||||
// characters being escaped. In contrast, in v2 JSON strings use the minimal
|
||||
// encoding and only escape if required by the JSON grammar.
|
||||
// The [jsontext.EscapeForHTML] and [jsontext.EscapeForJS] options
|
||||
// control this behavior difference.
|
||||
//
|
||||
// - In v1, bytes of invalid UTF-8 within a string are silently replaced with
|
||||
// the Unicode replacement character. In contrast, in v2 the presence of
|
||||
// invalid UTF-8 results in an error. The [jsontext.AllowInvalidUTF8] option
|
||||
// controls this behavior difference.
|
||||
//
|
||||
// - In v1, a JSON object with duplicate names is permitted.
|
||||
// In contrast, in v2 a JSON object with duplicate names results in an error.
|
||||
// The [jsontext.AllowDuplicateNames] option controls this behavior difference.
|
||||
//
|
||||
// - In v1, when unmarshaling a JSON null into a non-empty Go value it will
|
||||
// inconsistently either zero out the value or do nothing.
|
||||
// In contrast, in v2 unmarshaling a JSON null will consistently and always
|
||||
// zero out the underlying Go value. The [MergeWithLegacySemantics] option
|
||||
// controls this behavior difference.
|
||||
//
|
||||
// - In v1, when unmarshaling a JSON value into a non-zero Go value,
|
||||
// it merges into the original Go value for array elements, slice elements,
|
||||
// struct fields (but not map values),
|
||||
// pointer values, and interface values (only if a non-nil pointer).
|
||||
// In contrast, in v2 unmarshal merges into the Go value
|
||||
// for struct fields, map values, pointer values, and interface values.
|
||||
// In general, the v2 semantic merges when unmarshaling a JSON object,
|
||||
// otherwise it replaces the value. The [MergeWithLegacySemantics] option
|
||||
// controls this behavior difference.
|
||||
//
|
||||
// - In v1, a [time.Duration] is represented as a JSON number containing
|
||||
// the decimal number of nanoseconds. In contrast, in v2 a [time.Duration]
|
||||
// has no default representation and results in a runtime error.
|
||||
// The [FormatDurationAsNano] option controls this behavior difference.
|
||||
// To explicitly specify a Go struct field to use a particular representation,
|
||||
// either the `format:nano` or `format:units` field option can be specified.
|
||||
// Field-specified options take precedence over caller-specified options.
|
||||
//
|
||||
// - In v1, errors are never reported at runtime for Go struct types
|
||||
// that have some form of structural error (e.g., a malformed tag option).
|
||||
// In contrast, v2 reports a runtime error for Go types that are invalid
|
||||
// as they relate to JSON serialization. For example, a Go struct
|
||||
// with only unexported fields cannot be serialized.
|
||||
// The [ReportErrorsWithLegacySemantics] option controls this behavior difference.
|
||||
//
|
||||
// As mentioned, the entirety of v1 is implemented in terms of v2,
|
||||
// where options are implicitly specified to opt into legacy behavior.
|
||||
// For example, [Marshal] directly calls [jsonv2.Marshal] with [DefaultOptionsV1].
|
||||
// Similarly, [Unmarshal] directly calls [jsonv2.Unmarshal] with [DefaultOptionsV1].
|
||||
// The [DefaultOptionsV1] option represents the set of all options that specify
|
||||
// default v1 behavior.
|
||||
//
|
||||
// For many of the behavior differences, there are Go struct field options
|
||||
// that the author of a Go type can specify to control the behavior such that
|
||||
// the type is represented identically in JSON under either v1 or v2 semantics.
|
||||
//
|
||||
// The availability of [DefaultOptionsV1] and [jsonv2.DefaultOptionsV2],
|
||||
// where later options take precedence over former options allows for
|
||||
// a gradual migration from v1 to v2. For example:
|
||||
//
|
||||
// - jsonv1.Marshal(v)
|
||||
// uses default v1 semantics.
|
||||
//
|
||||
// - jsonv2.Marshal(v, jsonv1.DefaultOptionsV1())
|
||||
// is semantically equivalent to jsonv1.Marshal
|
||||
// and thus uses default v1 semantics.
|
||||
//
|
||||
// - jsonv2.Marshal(v, jsonv1.DefaultOptionsV1(), jsontext.AllowDuplicateNames(false))
|
||||
// uses mostly v1 semantics, but opts into one particular v2-specific behavior.
|
||||
//
|
||||
// - jsonv2.Marshal(v, jsonv1.CallMethodsWithLegacySemantics(true))
|
||||
// uses mostly v2 semantics, but opts into one particular v1-specific behavior.
|
||||
//
|
||||
// - jsonv2.Marshal(v, ..., jsonv2.DefaultOptionsV2())
|
||||
// is semantically equivalent to jsonv2.Marshal since
|
||||
// jsonv2.DefaultOptionsV2 overrides any options specified earlier
|
||||
// and thus uses default v2 semantics.
|
||||
//
|
||||
// - jsonv2.Marshal(v)
|
||||
// uses default v2 semantics.
|
||||
//
|
||||
// All new usages of "json" in Go should use the v2 package,
|
||||
// but the v1 package will forever remain supported.
|
||||
package json
|
||||
|
||||
// TODO(https://go.dev/issue/71631): Update the "Migrating to v2" documentation
|
||||
// with default v2 behavior for [time.Duration].
|
||||
|
||||
import (
|
||||
"encoding"
|
||||
|
||||
"next.orly.dev/pkg/json/internal/jsonflags"
|
||||
"next.orly.dev/pkg/json/internal/jsonopts"
|
||||
"next.orly.dev/pkg/json/jsontext"
|
||||
jsonv2 "next.orly.dev/pkg/json/v2"
|
||||
)
|
||||
|
||||
// Reference encoding, jsonv2, and jsontext packages to assist pkgsite
|
||||
// in being able to hotlink references to those packages.
|
||||
var (
|
||||
_ encoding.TextMarshaler
|
||||
_ encoding.TextUnmarshaler
|
||||
_ jsonv2.Options
|
||||
_ jsontext.Options
|
||||
)
|
||||
|
||||
// Options are a set of options to configure the v2 "json" package
|
||||
// to operate with v1 semantics for particular features.
|
||||
// Values of this type can be passed to v2 functions like
|
||||
// [jsonv2.Marshal] or [jsonv2.Unmarshal].
|
||||
// Instead of referencing this type, use [jsonv2.Options].
|
||||
//
|
||||
// See the "Migrating to v2" section for guidance on how to migrate usage
|
||||
// of "json" from using v1 to using v2 instead.
|
||||
type Options = jsonopts.Options
|
||||
|
||||
// DefaultOptionsV1 is the full set of all options that define v1 semantics.
|
||||
// It is equivalent to the following boolean options being set to true:
|
||||
//
|
||||
// - [CallMethodsWithLegacySemantics]
|
||||
// - [FormatByteArrayAsArray]
|
||||
// - [FormatBytesWithLegacySemantics]
|
||||
// - [FormatDurationAsNano]
|
||||
// - [MatchCaseSensitiveDelimiter]
|
||||
// - [MergeWithLegacySemantics]
|
||||
// - [OmitEmptyWithLegacySemantics]
|
||||
// - [ParseBytesWithLooseRFC4648]
|
||||
// - [ParseTimeWithLooseRFC3339]
|
||||
// - [ReportErrorsWithLegacySemantics]
|
||||
// - [StringifyWithLegacySemantics]
|
||||
// - [UnmarshalArrayFromAnyLength]
|
||||
// - [jsonv2.Deterministic]
|
||||
// - [jsonv2.FormatNilMapAsNull]
|
||||
// - [jsonv2.FormatNilSliceAsNull]
|
||||
// - [jsonv2.MatchCaseInsensitiveNames]
|
||||
// - [jsontext.AllowDuplicateNames]
|
||||
// - [jsontext.AllowInvalidUTF8]
|
||||
// - [jsontext.EscapeForHTML]
|
||||
// - [jsontext.EscapeForJS]
|
||||
// - [jsontext.PreserveRawStrings]
|
||||
//
|
||||
// All other boolean options are set to false.
|
||||
// All non-boolean options are set to the zero value,
|
||||
// except for [jsontext.WithIndent], which defaults to "\t".
|
||||
//
|
||||
// The [Marshal] and [Unmarshal] functions in this package are
|
||||
// semantically identical to calling the v2 equivalents with this option:
|
||||
//
|
||||
// jsonv2.Marshal(v, jsonv1.DefaultOptionsV1())
|
||||
// jsonv2.Unmarshal(b, v, jsonv1.DefaultOptionsV1())
|
||||
func DefaultOptionsV1() Options {
|
||||
return &jsonopts.DefaultOptionsV1
|
||||
}
|
||||
|
||||
// CallMethodsWithLegacySemantics specifies that calling of type-provided
|
||||
// marshal and unmarshal methods follow legacy semantics:
|
||||
//
|
||||
// - When marshaling, a marshal method declared on a pointer receiver
|
||||
// is only called if the Go value is addressable.
|
||||
// Values obtained from an interface or map element are not addressable.
|
||||
// Values obtained from a pointer or slice element are addressable.
|
||||
// Values obtained from an array element or struct field inherit
|
||||
// the addressability of the parent. In contrast, the v2 semantic
|
||||
// is to always call marshal methods regardless of addressability.
|
||||
//
|
||||
// - When marshaling or unmarshaling, the [Marshaler] or [Unmarshaler]
|
||||
// methods are ignored for map keys. However, [encoding.TextMarshaler]
|
||||
// or [encoding.TextUnmarshaler] are still callable.
|
||||
// In contrast, the v2 semantic is to serialize map keys
|
||||
// like any other value (with regard to calling methods),
|
||||
// which may include calling [Marshaler] or [Unmarshaler] methods,
|
||||
// where it is the implementation's responsibility to represent the
|
||||
// Go value as a JSON string (as required for JSON object names).
|
||||
//
|
||||
// - When marshaling, if a map key value implements a marshal method
|
||||
// and is a nil pointer, then it is serialized as an empty JSON string.
|
||||
// In contrast, the v2 semantic is to report an error.
|
||||
//
|
||||
// - When marshaling, if an interface type implements a marshal method
|
||||
// and the interface value is a nil pointer to a concrete type,
|
||||
// then the marshal method is always called.
|
||||
// In contrast, the v2 semantic is to never directly call methods
|
||||
// on interface values and to instead defer evaluation based upon
|
||||
// the underlying concrete value. Similar to non-interface values,
|
||||
// marshal methods are not called on nil pointers and
|
||||
// are instead serialized as a JSON null.
|
||||
//
|
||||
// This affects either marshaling or unmarshaling.
|
||||
// The v1 default is true.
|
||||
func CallMethodsWithLegacySemantics(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.CallMethodsWithLegacySemantics | 1
|
||||
} else {
|
||||
return jsonflags.CallMethodsWithLegacySemantics | 0
|
||||
}
|
||||
}
|
||||
|
||||
// FormatByteArrayAsArray specifies that a Go [N]byte is
|
||||
// formatted as as a normal Go array in contrast to the v2 default of
|
||||
// formatting [N]byte as using binary data encoding (RFC 4648).
|
||||
// If a struct field has a `format` tag option,
|
||||
// then the specified formatting takes precedence.
|
||||
//
|
||||
// This affects either marshaling or unmarshaling.
|
||||
// The v1 default is true.
|
||||
func FormatByteArrayAsArray(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.FormatByteArrayAsArray | 1
|
||||
} else {
|
||||
return jsonflags.FormatByteArrayAsArray | 0
|
||||
}
|
||||
}
|
||||
|
||||
// FormatBytesWithLegacySemantics specifies that handling of
|
||||
// []~byte and [N]~byte types follow legacy semantics:
|
||||
//
|
||||
// - A Go []~byte is to be treated as using some form of
|
||||
// binary data encoding (RFC 4648) in contrast to the v2 default
|
||||
// of only treating []byte as such. In particular, v2 does not
|
||||
// treat slices of named byte types as representing binary data.
|
||||
//
|
||||
// - When marshaling, if a named byte implements a marshal method,
|
||||
// then the slice is serialized as a JSON array of elements,
|
||||
// each of which call the marshal method.
|
||||
//
|
||||
// - When unmarshaling, if the input is a JSON array,
|
||||
// then unmarshal into the []~byte as if it were a normal Go slice.
|
||||
// In contrast, the v2 default is to report an error unmarshaling
|
||||
// a JSON array when expecting some form of binary data encoding.
|
||||
//
|
||||
// This affects either marshaling or unmarshaling.
|
||||
// The v1 default is true.
|
||||
func FormatBytesWithLegacySemantics(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.FormatBytesWithLegacySemantics | 1
|
||||
} else {
|
||||
return jsonflags.FormatBytesWithLegacySemantics | 0
|
||||
}
|
||||
}
|
||||
|
||||
// FormatDurationAsNano specifies that a [time.Duration] is
|
||||
// formatted as a JSON number representing the number of nanoseconds
|
||||
// in contrast to the v2 default of reporting an error.
|
||||
// If a duration field has a `format` tag option,
|
||||
// then the specified formatting takes precedence.
|
||||
//
|
||||
// This affects either marshaling or unmarshaling.
|
||||
// The v1 default is true.
|
||||
func FormatDurationAsNano(v bool) Options {
|
||||
// TODO(https://go.dev/issue/71631): Update documentation with v2 behavior.
|
||||
if v {
|
||||
return jsonflags.FormatDurationAsNano | 1
|
||||
} else {
|
||||
return jsonflags.FormatDurationAsNano | 0
|
||||
}
|
||||
}
|
||||
|
||||
// MatchCaseSensitiveDelimiter specifies that underscores and dashes are
|
||||
// not to be ignored when performing case-insensitive name matching which
|
||||
// occurs under [jsonv2.MatchCaseInsensitiveNames] or the `case:ignore` tag option.
|
||||
// Thus, case-insensitive name matching is identical to [strings.EqualFold].
|
||||
// Use of this option diminishes the ability of case-insensitive matching
|
||||
// to be able to match common case variants (e.g, "foo_bar" with "fooBar").
|
||||
//
|
||||
// This affects either marshaling or unmarshaling.
|
||||
// The v1 default is true.
|
||||
func MatchCaseSensitiveDelimiter(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.MatchCaseSensitiveDelimiter | 1
|
||||
} else {
|
||||
return jsonflags.MatchCaseSensitiveDelimiter | 0
|
||||
}
|
||||
}
|
||||
|
||||
// MergeWithLegacySemantics specifies that unmarshaling into a non-zero
|
||||
// Go value follows legacy semantics:
|
||||
//
|
||||
// - When unmarshaling a JSON null, this preserves the original Go value
|
||||
// if the kind is a bool, int, uint, float, string, array, or struct.
|
||||
// Otherwise, it zeros the Go value.
|
||||
// In contrast, the default v2 behavior is to consistently and always
|
||||
// zero the Go value when unmarshaling a JSON null into it.
|
||||
//
|
||||
// - When unmarshaling a JSON value other than null, this merges into
|
||||
// the original Go value for array elements, slice elements,
|
||||
// struct fields (but not map values),
|
||||
// pointer values, and interface values (only if a non-nil pointer).
|
||||
// In contrast, the default v2 behavior is to merge into the Go value
|
||||
// for struct fields, map values, pointer values, and interface values.
|
||||
// In general, the v2 semantic merges when unmarshaling a JSON object,
|
||||
// otherwise it replaces the original value.
|
||||
//
|
||||
// This only affects unmarshaling and is ignored when marshaling.
|
||||
// The v1 default is true.
|
||||
func MergeWithLegacySemantics(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.MergeWithLegacySemantics | 1
|
||||
} else {
|
||||
return jsonflags.MergeWithLegacySemantics | 0
|
||||
}
|
||||
}
|
||||
|
||||
// OmitEmptyWithLegacySemantics specifies that the `omitempty` tag option
|
||||
// follows a definition of empty where a field is omitted if the Go value is
|
||||
// false, 0, a nil pointer, a nil interface value,
|
||||
// or any empty array, slice, map, or string.
|
||||
// This overrides the v2 semantic where a field is empty if the value
|
||||
// marshals as a JSON null or an empty JSON string, object, or array.
|
||||
//
|
||||
// The v1 and v2 definitions of `omitempty` are practically the same for
|
||||
// Go strings, slices, arrays, and maps. Usages of `omitempty` on
|
||||
// Go bools, ints, uints floats, pointers, and interfaces should migrate to use
|
||||
// the `omitzero` tag option, which omits a field if it is the zero Go value.
|
||||
//
|
||||
// This only affects marshaling and is ignored when unmarshaling.
|
||||
// The v1 default is true.
|
||||
func OmitEmptyWithLegacySemantics(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.OmitEmptyWithLegacySemantics | 1
|
||||
} else {
|
||||
return jsonflags.OmitEmptyWithLegacySemantics | 0
|
||||
}
|
||||
}
|
||||
|
||||
// ParseBytesWithLooseRFC4648 specifies that when parsing
|
||||
// binary data encoded as "base32" or "base64",
|
||||
// to ignore the presence of '\r' and '\n' characters.
|
||||
// In contrast, the v2 default is to report an error in order to be
|
||||
// strictly compliant with RFC 4648, section 3.3,
|
||||
// which specifies that non-alphabet characters must be rejected.
|
||||
//
|
||||
// This only affects unmarshaling and is ignored when marshaling.
|
||||
// The v1 default is true.
|
||||
func ParseBytesWithLooseRFC4648(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.ParseBytesWithLooseRFC4648 | 1
|
||||
} else {
|
||||
return jsonflags.ParseBytesWithLooseRFC4648 | 0
|
||||
}
|
||||
}
|
||||
|
||||
// ParseTimeWithLooseRFC3339 specifies that a [time.Time]
|
||||
// parses according to loose adherence to RFC 3339.
|
||||
// In particular, it permits historically incorrect representations,
|
||||
// allowing for deviations in hour format, sub-second separator,
|
||||
// and timezone representation. In contrast, the default v2 behavior
|
||||
// is to strictly comply with the grammar specified in RFC 3339.
|
||||
//
|
||||
// This only affects unmarshaling and is ignored when marshaling.
|
||||
// The v1 default is true.
|
||||
func ParseTimeWithLooseRFC3339(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.ParseTimeWithLooseRFC3339 | 1
|
||||
} else {
|
||||
return jsonflags.ParseTimeWithLooseRFC3339 | 0
|
||||
}
|
||||
}
|
||||
|
||||
// ReportErrorsWithLegacySemantics specifies that Marshal and Unmarshal
|
||||
// should report errors with legacy semantics:
|
||||
//
|
||||
// - When marshaling or unmarshaling, the returned error values are
|
||||
// usually of types such as [SyntaxError], [MarshalerError],
|
||||
// [UnsupportedTypeError], [UnsupportedValueError],
|
||||
// [InvalidUnmarshalError], or [UnmarshalTypeError].
|
||||
// In contrast, the v2 semantic is to always return errors as either
|
||||
// [jsonv2.SemanticError] or [jsontext.SyntacticError].
|
||||
//
|
||||
// - When marshaling, if a user-defined marshal method reports an error,
|
||||
// it is always wrapped in a [MarshalerError], even if the error itself
|
||||
// is already a [MarshalerError], which may lead to multiple redundant
|
||||
// layers of wrapping. In contrast, the v2 semantic is to
|
||||
// always wrap an error within [jsonv2.SemanticError]
|
||||
// unless it is already a semantic error.
|
||||
//
|
||||
// - When unmarshaling, if a user-defined unmarshal method reports an error,
|
||||
// it is never wrapped and reported verbatim. In contrast, the v2 semantic
|
||||
// is to always wrap an error within [jsonv2.SemanticError]
|
||||
// unless it is already a semantic error.
|
||||
//
|
||||
// - When marshaling or unmarshaling, if a Go struct contains type errors
|
||||
// (e.g., conflicting names or malformed field tags), then such errors
|
||||
// are ignored and the Go struct uses a best-effort representation.
|
||||
// In contrast, the v2 semantic is to report a runtime error.
|
||||
//
|
||||
// - When unmarshaling, the syntactic structure of the JSON input
|
||||
// is fully validated before performing the semantic unmarshaling
|
||||
// of the JSON data into the Go value. Practically speaking,
|
||||
// this means that JSON input with syntactic errors do not result
|
||||
// in any mutations of the target Go value. In contrast, the v2 semantic
|
||||
// is to perform a streaming decode and gradually unmarshal the JSON input
|
||||
// into the target Go value, which means that the Go value may be
|
||||
// partially mutated when a syntactic error is encountered.
|
||||
//
|
||||
// - When unmarshaling, a semantic error does not immediately terminate the
|
||||
// unmarshal procedure, but rather evaluation continues.
|
||||
// When unmarshal returns, only the first semantic error is reported.
|
||||
// In contrast, the v2 semantic is to terminate unmarshal the moment
|
||||
// an error is encountered.
|
||||
//
|
||||
// This affects either marshaling or unmarshaling.
|
||||
// The v1 default is true.
|
||||
func ReportErrorsWithLegacySemantics(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.ReportErrorsWithLegacySemantics | 1
|
||||
} else {
|
||||
return jsonflags.ReportErrorsWithLegacySemantics | 0
|
||||
}
|
||||
}
|
||||
|
||||
// StringifyWithLegacySemantics specifies that the `string` tag option
|
||||
// may stringify bools and string values. It only takes effect on fields
|
||||
// where the top-level type is a bool, string, numeric kind, or a pointer to
|
||||
// such a kind. Specifically, `string` will not stringify bool, string,
|
||||
// or numeric kinds within a composite data type
|
||||
// (e.g., array, slice, struct, map, or interface).
|
||||
//
|
||||
// When marshaling, such Go values are serialized as their usual
|
||||
// JSON representation, but quoted within a JSON string.
|
||||
// When unmarshaling, such Go values must be deserialized from
|
||||
// a JSON string containing their usual JSON representation.
|
||||
// A JSON null quoted in a JSON string is a valid substitute for JSON null
|
||||
// while unmarshaling into a Go value that `string` takes effect on.
|
||||
//
|
||||
// This affects either marshaling or unmarshaling.
|
||||
// The v1 default is true.
|
||||
func StringifyWithLegacySemantics(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.StringifyWithLegacySemantics | 1
|
||||
} else {
|
||||
return jsonflags.StringifyWithLegacySemantics | 0
|
||||
}
|
||||
}
|
||||
|
||||
// UnmarshalArrayFromAnyLength specifies that Go arrays can be unmarshaled
|
||||
// from input JSON arrays of any length. If the JSON array is too short,
|
||||
// then the remaining Go array elements are zeroed. If the JSON array
|
||||
// is too long, then the excess JSON array elements are skipped over.
|
||||
//
|
||||
// This only affects unmarshaling and is ignored when marshaling.
|
||||
// The v1 default is true.
|
||||
func UnmarshalArrayFromAnyLength(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.UnmarshalArrayFromAnyLength | 1
|
||||
} else {
|
||||
return jsonflags.UnmarshalArrayFromAnyLength | 0
|
||||
}
|
||||
}
|
||||
|
||||
// unmarshalAnyWithRawNumber specifies that unmarshaling a JSON number into
|
||||
// an empty Go interface should use the Number type instead of a float64.
|
||||
func unmarshalAnyWithRawNumber(v bool) Options {
|
||||
if v {
|
||||
return jsonflags.UnmarshalAnyWithRawNumber | 1
|
||||
} else {
|
||||
return jsonflags.UnmarshalAnyWithRawNumber | 0
|
||||
}
|
||||
}
|
||||
@@ -1,86 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"next.orly.dev/pkg/json/internal"
|
||||
"next.orly.dev/pkg/json/internal/jsonflags"
|
||||
"next.orly.dev/pkg/json/jsontext"
|
||||
)
|
||||
|
||||
// export exposes internal functionality of the "jsontext" package.
|
||||
var export = jsontext.Internal.Export(&internal.AllowInternalUse)
|
||||
|
||||
// Valid reports whether data is a valid JSON encoding.
|
||||
func Valid(data []byte) bool {
|
||||
return checkValid(data) == nil
|
||||
}
|
||||
|
||||
func checkValid(data []byte) error {
|
||||
d := export.GetBufferedDecoder(data)
|
||||
defer export.PutBufferedDecoder(d)
|
||||
xd := export.Decoder(d)
|
||||
xd.Struct.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1)
|
||||
if _, err := d.ReadValue(); err != nil {
|
||||
if err == io.EOF {
|
||||
offset := d.InputOffset() + int64(len(d.UnreadBuffer()))
|
||||
err = &jsontext.SyntacticError{ByteOffset: offset, Err: io.ErrUnexpectedEOF}
|
||||
}
|
||||
return transformSyntacticError(err)
|
||||
}
|
||||
if err := xd.CheckEOF(); err != nil {
|
||||
return transformSyntacticError(err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// A SyntaxError is a description of a JSON syntax error.
|
||||
// [Unmarshal] will return a SyntaxError if the JSON can't be parsed.
|
||||
type SyntaxError struct {
|
||||
msg string // description of error
|
||||
Offset int64 // error occurred after reading Offset bytes
|
||||
}
|
||||
|
||||
func (e *SyntaxError) Error() string { return e.msg }
|
||||
|
||||
var errUnexpectedEnd = errors.New("unexpected end of JSON input")
|
||||
|
||||
func transformSyntacticError(err error) error {
|
||||
switch serr, ok := err.(*jsontext.SyntacticError); {
|
||||
case serr != nil:
|
||||
if serr.Err == io.ErrUnexpectedEOF {
|
||||
serr.Err = errUnexpectedEnd
|
||||
}
|
||||
msg := serr.Err.Error()
|
||||
if i := strings.Index(msg, " (expecting"); i >= 0 && !strings.Contains(msg, " in literal") {
|
||||
msg = msg[:i]
|
||||
}
|
||||
return &SyntaxError{Offset: serr.ByteOffset, msg: syntaxErrorReplacer.Replace(msg)}
|
||||
case ok:
|
||||
return (*SyntaxError)(nil)
|
||||
case export.IsIOError(err):
|
||||
return errors.Unwrap(err) // v1 historically did not wrap IO errors
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// syntaxErrorReplacer replaces certain string literals in the v2 error
|
||||
// to better match the historical string rendering of syntax errors.
|
||||
// In particular, v2 uses the terminology "object name" to match RFC 8259,
|
||||
// while v1 uses "object key", which is not a term found in JSON literature.
|
||||
var syntaxErrorReplacer = strings.NewReplacer(
|
||||
"object name", "object key",
|
||||
"at start of value", "looking for beginning of value",
|
||||
"at start of string", "looking for beginning of object key string",
|
||||
"after object value", "after object key:value pair",
|
||||
"in number", "in numeric literal",
|
||||
)
|
||||
@@ -1,231 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.jsonv2
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
|
||||
"next.orly.dev/pkg/json/jsontext"
|
||||
jsonv2 "next.orly.dev/pkg/json/v2"
|
||||
)
|
||||
|
||||
// A Decoder reads and decodes JSON values from an input stream.
|
||||
type Decoder struct {
|
||||
dec *jsontext.Decoder
|
||||
opts jsonv2.Options
|
||||
err error
|
||||
}
|
||||
|
||||
// NewDecoder returns a new decoder that reads from r.
|
||||
//
|
||||
// The decoder introduces its own buffering and may
|
||||
// read data from r beyond the JSON values requested.
|
||||
func NewDecoder(r io.Reader) *Decoder {
|
||||
// Hide bytes.Buffer from jsontext since it implements optimizations that
|
||||
// also limits certain ways it could be used. For example, one cannot write
|
||||
// to the bytes.Buffer while it is in use by jsontext.Decoder.
|
||||
if _, ok := r.(*bytes.Buffer); ok {
|
||||
r = struct{ io.Reader }{r}
|
||||
}
|
||||
|
||||
dec := new(Decoder)
|
||||
dec.opts = DefaultOptionsV1()
|
||||
dec.dec = jsontext.NewDecoder(r, dec.opts)
|
||||
return dec
|
||||
}
|
||||
|
||||
// UseNumber causes the Decoder to unmarshal a number into an
|
||||
// interface value as a [Number] instead of as a float64.
|
||||
func (dec *Decoder) UseNumber() {
|
||||
if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); !useNumber {
|
||||
dec.opts = jsonv2.JoinOptions(dec.opts, unmarshalAnyWithRawNumber(true))
|
||||
}
|
||||
}
|
||||
|
||||
// DisallowUnknownFields causes the Decoder to return an error when the destination
|
||||
// is a struct and the input contains object keys which do not match any
|
||||
// non-ignored, exported fields in the destination.
|
||||
func (dec *Decoder) DisallowUnknownFields() {
|
||||
if reject, _ := jsonv2.GetOption(dec.opts, jsonv2.RejectUnknownMembers); !reject {
|
||||
dec.opts = jsonv2.JoinOptions(dec.opts, jsonv2.RejectUnknownMembers(true))
|
||||
}
|
||||
}
|
||||
|
||||
// Decode reads the next JSON-encoded value from its
|
||||
// input and stores it in the value pointed to by v.
|
||||
//
|
||||
// See the documentation for [Unmarshal] for details about
|
||||
// the conversion of JSON into a Go value.
|
||||
func (dec *Decoder) Decode(v any) error {
|
||||
if dec.err != nil {
|
||||
return dec.err
|
||||
}
|
||||
b, err := dec.dec.ReadValue()
|
||||
if err != nil {
|
||||
dec.err = transformSyntacticError(err)
|
||||
if dec.err.Error() == errUnexpectedEnd.Error() {
|
||||
// NOTE: Decode has always been inconsistent with Unmarshal
|
||||
// with regard to the exact error value for truncated input.
|
||||
dec.err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return dec.err
|
||||
}
|
||||
return jsonv2.Unmarshal(b, v, dec.opts)
|
||||
}
|
||||
|
||||
// Buffered returns a reader of the data remaining in the Decoder's
|
||||
// buffer. The reader is valid until the next call to [Decoder.Decode].
|
||||
func (dec *Decoder) Buffered() io.Reader {
|
||||
return bytes.NewReader(dec.dec.UnreadBuffer())
|
||||
}
|
||||
|
||||
// An Encoder writes JSON values to an output stream.
|
||||
type Encoder struct {
|
||||
w io.Writer
|
||||
opts jsonv2.Options
|
||||
err error
|
||||
|
||||
buf bytes.Buffer
|
||||
indentBuf bytes.Buffer
|
||||
|
||||
indentPrefix string
|
||||
indentValue string
|
||||
}
|
||||
|
||||
// NewEncoder returns a new encoder that writes to w.
|
||||
func NewEncoder(w io.Writer) *Encoder {
|
||||
enc := new(Encoder)
|
||||
enc.w = w
|
||||
enc.opts = DefaultOptionsV1()
|
||||
return enc
|
||||
}
|
||||
|
||||
// Encode writes the JSON encoding of v to the stream,
|
||||
// followed by a newline character.
|
||||
//
|
||||
// See the documentation for [Marshal] for details about the
|
||||
// conversion of Go values to JSON.
|
||||
func (enc *Encoder) Encode(v any) error {
|
||||
if enc.err != nil {
|
||||
return enc.err
|
||||
}
|
||||
|
||||
buf := &enc.buf
|
||||
buf.Reset()
|
||||
if err := jsonv2.MarshalWrite(buf, v, enc.opts); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(enc.indentPrefix)+len(enc.indentValue) > 0 {
|
||||
enc.indentBuf.Reset()
|
||||
if err := Indent(&enc.indentBuf, buf.Bytes(), enc.indentPrefix, enc.indentValue); err != nil {
|
||||
return err
|
||||
}
|
||||
buf = &enc.indentBuf
|
||||
}
|
||||
buf.WriteByte('\n')
|
||||
|
||||
if _, err := enc.w.Write(buf.Bytes()); err != nil {
|
||||
enc.err = err
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetIndent instructs the encoder to format each subsequent encoded
|
||||
// value as if indented by the package-level function Indent(dst, src, prefix, indent).
|
||||
// Calling SetIndent("", "") disables indentation.
|
||||
func (enc *Encoder) SetIndent(prefix, indent string) {
|
||||
enc.indentPrefix = prefix
|
||||
enc.indentValue = indent
|
||||
}
|
||||
|
||||
// SetEscapeHTML specifies whether problematic HTML characters
|
||||
// should be escaped inside JSON quoted strings.
|
||||
// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
|
||||
// to avoid certain safety problems that can arise when embedding JSON in HTML.
|
||||
//
|
||||
// In non-HTML settings where the escaping interferes with the readability
|
||||
// of the output, SetEscapeHTML(false) disables this behavior.
|
||||
func (enc *Encoder) SetEscapeHTML(on bool) {
|
||||
if escape, _ := jsonv2.GetOption(enc.opts, jsontext.EscapeForHTML); escape != on {
|
||||
enc.opts = jsonv2.JoinOptions(enc.opts, jsontext.EscapeForHTML(on))
|
||||
}
|
||||
}
|
||||
|
||||
// RawMessage is a raw encoded JSON value.
|
||||
// It implements [Marshaler] and [Unmarshaler] and can
|
||||
// be used to delay JSON decoding or precompute a JSON encoding.
|
||||
type RawMessage = jsontext.Value
|
||||
|
||||
// A Token holds a value of one of these types:
|
||||
//
|
||||
// - [Delim], for the four JSON delimiters [ ] { }
|
||||
// - bool, for JSON booleans
|
||||
// - float64, for JSON numbers
|
||||
// - [Number], for JSON numbers
|
||||
// - string, for JSON string literals
|
||||
// - nil, for JSON null
|
||||
type Token any
|
||||
|
||||
// A Delim is a JSON array or object delimiter, one of [ ] { or }.
|
||||
type Delim rune
|
||||
|
||||
func (d Delim) String() string {
|
||||
return string(d)
|
||||
}
|
||||
|
||||
// Token returns the next JSON token in the input stream.
|
||||
// At the end of the input stream, Token returns nil, [io.EOF].
|
||||
//
|
||||
// Token guarantees that the delimiters [ ] { } it returns are
|
||||
// properly nested and matched: if Token encounters an unexpected
|
||||
// delimiter in the input, it will return an error.
|
||||
//
|
||||
// The input stream consists of basic JSON values—bool, string,
|
||||
// number, and null—along with delimiters [ ] { } of type [Delim]
|
||||
// to mark the start and end of arrays and objects.
|
||||
// Commas and colons are elided.
|
||||
func (dec *Decoder) Token() (Token, error) {
|
||||
tok, err := dec.dec.ReadToken()
|
||||
if err != nil {
|
||||
return nil, transformSyntacticError(err)
|
||||
}
|
||||
switch k := tok.Kind(); k {
|
||||
case 'n':
|
||||
return nil, nil
|
||||
case 'f':
|
||||
return false, nil
|
||||
case 't':
|
||||
return true, nil
|
||||
case '"':
|
||||
return tok.String(), nil
|
||||
case '0':
|
||||
if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); useNumber {
|
||||
return Number(tok.String()), nil
|
||||
}
|
||||
return tok.Float(), nil
|
||||
case '{', '}', '[', ']':
|
||||
return Delim(k), nil
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
// More reports whether there is another element in the
|
||||
// current array or object being parsed.
|
||||
func (dec *Decoder) More() bool {
|
||||
k := dec.dec.PeekKind()
|
||||
return k > 0 && k != ']' && k != '}'
|
||||
}
|
||||
|
||||
// InputOffset returns the input stream byte offset of the current decoder position.
|
||||
// The offset gives the location of the end of the most recently returned token
|
||||
// and the beginning of the next token.
|
||||
func (dec *Decoder) InputOffset() int64 {
|
||||
return dec.dec.InputOffset()
|
||||
}
|
||||
@@ -101,7 +101,6 @@ done
|
||||
configDir: tempDir,
|
||||
scriptPath: scriptPath,
|
||||
enabled: true,
|
||||
disabled: false,
|
||||
responseChan: make(chan PolicyResponse, 100),
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,9 @@ import (
|
||||
"next.orly.dev/pkg/encoders/hex"
|
||||
)
|
||||
|
||||
// Kinds defines the filter for events by kind; the whitelist overrides the blacklist if it has any fields, and the blacklist is ignored (implicitly all not-whitelisted are blacklisted)
|
||||
// Kinds defines whitelist and blacklist policies for event kinds.
|
||||
// Whitelist takes precedence over blacklist - if whitelist is present, only whitelisted kinds are allowed.
|
||||
// If only blacklist is present, all kinds except blacklisted ones are allowed.
|
||||
type Kinds struct {
|
||||
// Whitelist is a list of event kinds that are allowed to be written to the relay. If any are present, implicitly all others are denied.
|
||||
Whitelist []int `json:"whitelist,omitempty"`
|
||||
@@ -28,13 +30,16 @@ type Kinds struct {
|
||||
Blacklist []int `json:"blacklist,omitempty"`
|
||||
}
|
||||
|
||||
// Rule is a rule for an event kind.
|
||||
// Rule defines policy criteria for a specific event kind.
|
||||
//
|
||||
// If Script is present, it overrides all other criteria.
|
||||
// Rules are evaluated in the following order:
|
||||
// 1. If Script is present and running, it determines the outcome
|
||||
// 2. If Script fails or is not running, falls back to default_policy
|
||||
// 3. Otherwise, all specified criteria are evaluated as AND operations
|
||||
//
|
||||
// The criteria have mutual exclude semantics on pubkey white/blacklists, if whitelist has any fields, blacklist is ignored (implicitly all not-whitelisted are blacklisted).
|
||||
//
|
||||
// The other criteria are evaluated as AND operations, everything specified must match for the event to be allowed to be written to the relay.
|
||||
// For pubkey allow/deny lists: whitelist takes precedence over blacklist.
|
||||
// If whitelist has entries, only whitelisted pubkeys are allowed.
|
||||
// If only blacklist has entries, all pubkeys except blacklisted ones are allowed.
|
||||
type Rule struct {
|
||||
// Description is a human-readable description of the rule.
|
||||
Description string `json:"description"`
|
||||
@@ -66,14 +71,16 @@ type Rule struct {
|
||||
MaxAgeEventInFuture *int64 `json:"max_age_event_in_future,omitempty"`
|
||||
}
|
||||
|
||||
// PolicyEvent represents an event with additional context for policy scripts
|
||||
// PolicyEvent represents an event with additional context for policy scripts.
|
||||
// It embeds the Nostr event and adds authentication and network context.
|
||||
type PolicyEvent struct {
|
||||
*event.E
|
||||
LoggedInPubkey string `json:"logged_in_pubkey,omitempty"`
|
||||
IPAddress string `json:"ip_address,omitempty"`
|
||||
}
|
||||
|
||||
// MarshalJSON implements custom JSON marshaling for PolicyEvent
|
||||
// MarshalJSON implements custom JSON marshaling for PolicyEvent.
|
||||
// It safely serializes the embedded event and additional context fields.
|
||||
func (pe *PolicyEvent) MarshalJSON() ([]byte, error) {
|
||||
if pe.E == nil {
|
||||
return json.Marshal(map[string]interface{}{
|
||||
@@ -104,14 +111,17 @@ func (pe *PolicyEvent) MarshalJSON() ([]byte, error) {
|
||||
return json.Marshal(safeEvent)
|
||||
}
|
||||
|
||||
// PolicyResponse represents a response from the policy script
|
||||
// PolicyResponse represents a response from the policy script.
|
||||
// The script should return JSON with these fields to indicate its decision.
|
||||
type PolicyResponse struct {
|
||||
ID string `json:"id"`
|
||||
Action string `json:"action"` // accept, reject, or shadowReject
|
||||
Msg string `json:"msg"` // NIP-20 response message (only used for reject)
|
||||
}
|
||||
|
||||
// PolicyManager handles policy script execution and management
|
||||
// PolicyManager handles policy script execution and management.
|
||||
// It manages the lifecycle of policy scripts, handles communication with them,
|
||||
// and provides resilient operation with automatic restart capabilities.
|
||||
type PolicyManager struct {
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
@@ -122,14 +132,15 @@ type PolicyManager struct {
|
||||
mutex sync.RWMutex
|
||||
isRunning bool
|
||||
enabled bool
|
||||
disabled bool // true when policy is disabled due to failure
|
||||
stdin io.WriteCloser
|
||||
stdout io.ReadCloser
|
||||
stderr io.ReadCloser
|
||||
responseChan chan PolicyResponse
|
||||
}
|
||||
|
||||
// P is a policy for a relay's ACL.
|
||||
// P represents a complete policy configuration for a Nostr relay.
|
||||
// It defines access control rules, kind filtering, and default behavior.
|
||||
// Policies are evaluated in order: global rules, kind filtering, specific rules, then default policy.
|
||||
type P struct {
|
||||
// Kind is policies for accepting or rejecting events by kind number.
|
||||
Kind Kinds `json:"kind"`
|
||||
@@ -137,22 +148,47 @@ type P struct {
|
||||
Rules map[int]Rule `json:"rules"`
|
||||
// Global is a rule set that applies to all events.
|
||||
Global Rule `json:"global"`
|
||||
// DefaultPolicy determines the default behavior when no rules deny an event ("allow" or "deny", defaults to "allow")
|
||||
DefaultPolicy string `json:"default_policy"`
|
||||
// Manager handles policy script execution
|
||||
Manager *PolicyManager `json:"-"`
|
||||
}
|
||||
|
||||
// New creates a new policy from JSON configuration
|
||||
// New creates a new policy from JSON configuration.
|
||||
// If policyJSON is empty, returns a policy with default settings.
|
||||
// The default_policy field defaults to "allow" if not specified.
|
||||
func New(policyJSON []byte) (p *P, err error) {
|
||||
p = &P{}
|
||||
p = &P{
|
||||
DefaultPolicy: "allow", // Set default value
|
||||
}
|
||||
if len(policyJSON) > 0 {
|
||||
if err = json.Unmarshal(policyJSON, p); chk.E(err) {
|
||||
return nil, fmt.Errorf("failed to unmarshal policy JSON: %v", err)
|
||||
}
|
||||
}
|
||||
// Ensure default policy is valid
|
||||
if p.DefaultPolicy == "" {
|
||||
p.DefaultPolicy = "allow"
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// NewWithManager creates a new policy with a policy manager for script execution
|
||||
// getDefaultPolicyAction returns true if the default policy is "allow", false if "deny"
|
||||
func (p *P) getDefaultPolicyAction() (allowed bool) {
|
||||
switch p.DefaultPolicy {
|
||||
case "deny":
|
||||
return false
|
||||
case "allow", "":
|
||||
return true
|
||||
default:
|
||||
// Invalid value, default to allow
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// NewWithManager creates a new policy with a policy manager for script execution.
|
||||
// It initializes the policy manager, loads configuration from files, and starts
|
||||
// background processes for script management and periodic health checks.
|
||||
func NewWithManager(ctx context.Context, appName string, enabled bool) *P {
|
||||
configDir := filepath.Join(xdg.ConfigHome, appName)
|
||||
scriptPath := filepath.Join(configDir, "policy.sh")
|
||||
@@ -166,13 +202,13 @@ func NewWithManager(ctx context.Context, appName string, enabled bool) *P {
|
||||
configDir: configDir,
|
||||
scriptPath: scriptPath,
|
||||
enabled: enabled,
|
||||
disabled: false,
|
||||
responseChan: make(chan PolicyResponse, 100), // Buffered channel for responses
|
||||
}
|
||||
|
||||
// Load policy configuration from JSON file
|
||||
policy := &P{
|
||||
Manager: manager,
|
||||
DefaultPolicy: "allow", // Set default value
|
||||
Manager: manager,
|
||||
}
|
||||
|
||||
if enabled {
|
||||
@@ -192,7 +228,8 @@ func NewWithManager(ctx context.Context, appName string, enabled bool) *P {
|
||||
return policy
|
||||
}
|
||||
|
||||
// LoadFromFile loads policy configuration from a JSON file
|
||||
// LoadFromFile loads policy configuration from a JSON file.
|
||||
// Returns an error if the file doesn't exist, can't be read, or contains invalid JSON.
|
||||
func (p *P) LoadFromFile(configPath string) error {
|
||||
if _, err := os.Stat(configPath); os.IsNotExist(err) {
|
||||
return fmt.Errorf("policy configuration file does not exist: %s", configPath)
|
||||
@@ -214,7 +251,10 @@ func (p *P) LoadFromFile(configPath string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// CheckPolicy checks if an event is allowed to be written to the relay based on the policy. The access parameter is either "write" or "read", write is for accepting events and read is for filtering events to send back to the client.
|
||||
// CheckPolicy checks if an event is allowed based on the policy configuration.
|
||||
// The access parameter should be "write" for accepting events or "read" for filtering events.
|
||||
// Returns true if the event is allowed, false if denied, and an error if validation fails.
|
||||
// Policy evaluation order: global rules → kind filtering → specific rules → default policy.
|
||||
func (p *P) CheckPolicy(access string, ev *event.E, loggedInPubkey []byte, ipAddress string) (allowed bool, err error) {
|
||||
// Handle nil event
|
||||
if ev == nil {
|
||||
@@ -234,8 +274,8 @@ func (p *P) CheckPolicy(access string, ev *event.E, loggedInPubkey []byte, ipAdd
|
||||
// Get rule for this kind
|
||||
rule, hasRule := p.Rules[int(ev.Kind)]
|
||||
if !hasRule {
|
||||
// No specific rule for this kind, allow if global and kinds policy passed
|
||||
return true, nil
|
||||
// No specific rule for this kind, use default policy
|
||||
return p.getDefaultPolicyAction(), nil
|
||||
}
|
||||
|
||||
// Check if script is present and enabled
|
||||
@@ -408,8 +448,9 @@ func (p *P) checkRulePolicy(access string, ev *event.E, rule Rule, loggedInPubke
|
||||
// checkScriptPolicy runs the policy script to determine if event should be allowed
|
||||
func (p *P) checkScriptPolicy(access string, ev *event.E, scriptPath string, loggedInPubkey []byte, ipAddress string) (allowed bool, err error) {
|
||||
if p.Manager == nil || !p.Manager.IsRunning() {
|
||||
// If script is not running, default to allow
|
||||
return true, nil
|
||||
// If script is not running, fall back to default policy
|
||||
log.W.F("policy rule for kind %d is inactive (script not running), falling back to default policy (%s)", ev.Kind, p.DefaultPolicy)
|
||||
return p.getDefaultPolicyAction(), nil
|
||||
}
|
||||
|
||||
// Create policy event with additional context
|
||||
@@ -422,9 +463,9 @@ func (p *P) checkScriptPolicy(access string, ev *event.E, scriptPath string, log
|
||||
// Process event through policy script
|
||||
response, scriptErr := p.Manager.ProcessEvent(policyEvent)
|
||||
if chk.E(scriptErr) {
|
||||
log.E.F("policy script processing failed: %v", scriptErr)
|
||||
// Default to allow on script failure
|
||||
return true, nil
|
||||
log.E.F("policy rule for kind %d failed (script processing error: %v), falling back to default policy (%s)", ev.Kind, scriptErr, p.DefaultPolicy)
|
||||
// Fall back to default policy on script failure
|
||||
return p.getDefaultPolicyAction(), nil
|
||||
}
|
||||
|
||||
// Handle script response
|
||||
@@ -436,54 +477,18 @@ func (p *P) checkScriptPolicy(access string, ev *event.E, scriptPath string, log
|
||||
case "shadowReject":
|
||||
return false, nil // Treat as reject for policy purposes
|
||||
default:
|
||||
log.W.F("unknown policy script action: %s", response.Action)
|
||||
// Default to allow for unknown actions
|
||||
return true, nil
|
||||
log.W.F("policy rule for kind %d returned unknown action '%s', falling back to default policy (%s)", ev.Kind, response.Action, p.DefaultPolicy)
|
||||
// Fall back to default policy for unknown actions
|
||||
return p.getDefaultPolicyAction(), nil
|
||||
}
|
||||
}
|
||||
|
||||
// PolicyManager methods (similar to SprocketManager)
|
||||
|
||||
// disablePolicy disables policy due to failure
|
||||
func (pm *PolicyManager) disablePolicy() {
|
||||
pm.mutex.Lock()
|
||||
defer pm.mutex.Unlock()
|
||||
|
||||
if !pm.disabled {
|
||||
pm.disabled = true
|
||||
log.W.F("policy disabled due to failure - all events will be rejected (script location: %s)", pm.scriptPath)
|
||||
}
|
||||
}
|
||||
|
||||
// enablePolicy re-enables policy and attempts to start it
|
||||
func (pm *PolicyManager) enablePolicy() {
|
||||
pm.mutex.Lock()
|
||||
defer pm.mutex.Unlock()
|
||||
|
||||
if pm.disabled {
|
||||
pm.disabled = false
|
||||
log.I.F("policy re-enabled, attempting to start")
|
||||
|
||||
// Attempt to start policy in background
|
||||
go func() {
|
||||
if _, err := os.Stat(pm.scriptPath); err == nil {
|
||||
if err := pm.StartPolicy(); err != nil {
|
||||
log.E.F("failed to restart policy: %v", err)
|
||||
pm.disablePolicy()
|
||||
} else {
|
||||
log.I.F("policy restarted successfully")
|
||||
}
|
||||
} else {
|
||||
log.W.F("policy script still not found, keeping disabled")
|
||||
pm.disablePolicy()
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
// periodicCheck periodically checks if policy script becomes available
|
||||
// periodicCheck periodically checks if policy script becomes available and attempts to restart failed scripts.
|
||||
// Runs every 60 seconds (1 minute) to provide resilient script management.
|
||||
func (pm *PolicyManager) periodicCheck() {
|
||||
ticker := time.NewTicker(30 * time.Second) // Check every 30 seconds
|
||||
ticker := time.NewTicker(60 * time.Second) // Check every 60 seconds (1 minute)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
@@ -492,27 +497,20 @@ func (pm *PolicyManager) periodicCheck() {
|
||||
return
|
||||
case <-ticker.C:
|
||||
pm.mutex.RLock()
|
||||
disabled := pm.disabled
|
||||
running := pm.isRunning
|
||||
pm.mutex.RUnlock()
|
||||
|
||||
// Only check if policy is disabled or not running
|
||||
if disabled || !running {
|
||||
// Check if policy script is not running and try to start it
|
||||
if !running {
|
||||
if _, err := os.Stat(pm.scriptPath); err == nil {
|
||||
// Script is available, try to enable/restart
|
||||
if disabled {
|
||||
pm.enablePolicy()
|
||||
} else if !running {
|
||||
// Script exists but policy isn't running, try to start
|
||||
go func() {
|
||||
if err := pm.StartPolicy(); err != nil {
|
||||
log.E.F("failed to restart policy: %v", err)
|
||||
pm.disablePolicy()
|
||||
} else {
|
||||
log.I.F("policy restarted successfully")
|
||||
}
|
||||
}()
|
||||
}
|
||||
// Script exists but policy isn't running, try to start
|
||||
go func() {
|
||||
if err := pm.StartPolicy(); err != nil {
|
||||
log.E.F("failed to restart policy: %v, will retry in next cycle", err)
|
||||
} else {
|
||||
log.I.F("policy restarted successfully")
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -523,16 +521,17 @@ func (pm *PolicyManager) periodicCheck() {
|
||||
func (pm *PolicyManager) startPolicyIfExists() {
|
||||
if _, err := os.Stat(pm.scriptPath); err == nil {
|
||||
if err := pm.StartPolicy(); err != nil {
|
||||
log.E.F("failed to start policy: %v", err)
|
||||
pm.disablePolicy()
|
||||
log.E.F("failed to start policy: %v, will retry periodically", err)
|
||||
// Don't disable policy manager, just log the error and let periodic check retry
|
||||
}
|
||||
} else {
|
||||
log.W.F("policy script not found at %s, disabling policy", pm.scriptPath)
|
||||
pm.disablePolicy()
|
||||
log.W.F("policy script not found at %s, will retry periodically", pm.scriptPath)
|
||||
// Don't disable policy manager, just log and let periodic check retry
|
||||
}
|
||||
}
|
||||
|
||||
// StartPolicy starts the policy script
|
||||
// StartPolicy starts the policy script process.
|
||||
// Returns an error if the script doesn't exist, can't be executed, or is already running.
|
||||
func (pm *PolicyManager) StartPolicy() error {
|
||||
pm.mutex.Lock()
|
||||
defer pm.mutex.Unlock()
|
||||
@@ -609,7 +608,8 @@ func (pm *PolicyManager) StartPolicy() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// StopPolicy stops the policy script gracefully, with SIGKILL fallback
|
||||
// StopPolicy stops the policy script gracefully with SIGTERM, falling back to SIGKILL if needed.
|
||||
// Returns an error if the policy is not currently running.
|
||||
func (pm *PolicyManager) StopPolicy() error {
|
||||
pm.mutex.Lock()
|
||||
defer pm.mutex.Unlock()
|
||||
@@ -668,7 +668,8 @@ func (pm *PolicyManager) StopPolicy() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ProcessEvent sends an event to the policy script and waits for a response
|
||||
// ProcessEvent sends an event to the policy script and waits for a response.
|
||||
// Returns the script's decision or an error if the script is not running or communication fails.
|
||||
func (pm *PolicyManager) ProcessEvent(evt *PolicyEvent) (*PolicyResponse, error) {
|
||||
pm.mutex.RLock()
|
||||
if !pm.isRunning || pm.stdin == nil {
|
||||
@@ -772,35 +773,30 @@ func (pm *PolicyManager) monitorProcess() {
|
||||
pm.currentCancel = nil
|
||||
|
||||
if err != nil {
|
||||
log.E.F("policy process exited with error: %v", err)
|
||||
// Auto-disable policy on failure
|
||||
pm.disabled = true
|
||||
log.W.F("policy disabled due to process failure - all events will be rejected (script location: %s)", pm.scriptPath)
|
||||
log.E.F("policy process exited with error: %v, will retry periodically", err)
|
||||
// Don't disable policy manager, let periodic check handle restart
|
||||
log.W.F("policy script crashed - events will fall back to default policy until restart (script location: %s)", pm.scriptPath)
|
||||
} else {
|
||||
log.I.F("policy process exited normally")
|
||||
}
|
||||
}
|
||||
|
||||
// IsEnabled returns whether policy is enabled
|
||||
// IsEnabled returns whether the policy manager is enabled.
|
||||
// This is set during initialization and doesn't change during runtime.
|
||||
func (pm *PolicyManager) IsEnabled() bool {
|
||||
return pm.enabled
|
||||
}
|
||||
|
||||
// IsRunning returns whether policy is currently running
|
||||
// IsRunning returns whether the policy script is currently running.
|
||||
// This can change during runtime as scripts start, stop, or crash.
|
||||
func (pm *PolicyManager) IsRunning() bool {
|
||||
pm.mutex.RLock()
|
||||
defer pm.mutex.RUnlock()
|
||||
return pm.isRunning
|
||||
}
|
||||
|
||||
// IsDisabled returns whether policy is disabled due to failure
|
||||
func (pm *PolicyManager) IsDisabled() bool {
|
||||
pm.mutex.RLock()
|
||||
defer pm.mutex.RUnlock()
|
||||
return pm.disabled
|
||||
}
|
||||
|
||||
// Shutdown gracefully shuts down the policy manager
|
||||
// Shutdown gracefully shuts down the policy manager.
|
||||
// It cancels the context and stops any running policy script.
|
||||
func (pm *PolicyManager) Shutdown() {
|
||||
pm.cancel()
|
||||
if pm.isRunning {
|
||||
|
||||
@@ -593,9 +593,6 @@ func TestNewWithManager(t *testing.T) {
|
||||
t.Error("Expected policy manager to not be running initially")
|
||||
}
|
||||
|
||||
if policy.Manager.IsDisabled() {
|
||||
t.Error("Expected policy manager to not be disabled initially")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPolicyManagerLifecycle(t *testing.T) {
|
||||
@@ -609,7 +606,6 @@ func TestPolicyManagerLifecycle(t *testing.T) {
|
||||
configDir: "/tmp",
|
||||
scriptPath: "/tmp/policy.sh",
|
||||
enabled: true,
|
||||
disabled: false,
|
||||
responseChan: make(chan PolicyResponse, 100),
|
||||
}
|
||||
|
||||
@@ -622,10 +618,6 @@ func TestPolicyManagerLifecycle(t *testing.T) {
|
||||
t.Error("Expected policy manager to not be running initially")
|
||||
}
|
||||
|
||||
if manager.IsDisabled() {
|
||||
t.Error("Expected policy manager to not be disabled initially")
|
||||
}
|
||||
|
||||
// Test starting with non-existent script (should fail gracefully)
|
||||
err := manager.StartPolicy()
|
||||
if err == nil {
|
||||
@@ -650,7 +642,6 @@ func TestPolicyManagerProcessEvent(t *testing.T) {
|
||||
configDir: "/tmp",
|
||||
scriptPath: "/tmp/policy.sh",
|
||||
enabled: true,
|
||||
disabled: false,
|
||||
responseChan: make(chan PolicyResponse, 100),
|
||||
}
|
||||
|
||||
@@ -778,7 +769,6 @@ func TestEdgeCasesManagerWithInvalidScript(t *testing.T) {
|
||||
configDir: tempDir,
|
||||
scriptPath: scriptPath,
|
||||
enabled: true,
|
||||
disabled: false,
|
||||
responseChan: make(chan PolicyResponse, 100),
|
||||
}
|
||||
|
||||
@@ -797,7 +787,6 @@ func TestEdgeCasesManagerDoubleStart(t *testing.T) {
|
||||
configDir: "/tmp",
|
||||
scriptPath: "/tmp/policy.sh",
|
||||
enabled: true,
|
||||
disabled: false,
|
||||
responseChan: make(chan PolicyResponse, 100),
|
||||
}
|
||||
|
||||
@@ -1012,3 +1001,337 @@ func TestMaxAgeChecks(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestScriptPolicyNotRunningFallsBackToDefault(t *testing.T) {
|
||||
// Create a policy with a script rule but no running manager, default policy is "allow"
|
||||
policy := &P{
|
||||
DefaultPolicy: "allow",
|
||||
Rules: map[int]Rule{
|
||||
1: {
|
||||
Description: "script rule",
|
||||
Script: "policy.sh",
|
||||
},
|
||||
},
|
||||
Manager: &PolicyManager{
|
||||
enabled: true,
|
||||
isRunning: false, // Script is not running
|
||||
},
|
||||
}
|
||||
|
||||
// Create test event
|
||||
testEvent := createTestEvent("test-event-id", "test-pubkey", "test content", 1)
|
||||
|
||||
// Should allow the event when script is configured but not running (falls back to default "allow")
|
||||
allowed, err := policy.CheckPolicy("write", testEvent, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %v", err)
|
||||
}
|
||||
if !allowed {
|
||||
t.Error("Expected event to be allowed when script is not running (should fall back to default policy 'allow')")
|
||||
}
|
||||
|
||||
// Test with default policy "deny"
|
||||
policy.DefaultPolicy = "deny"
|
||||
allowed2, err2 := policy.CheckPolicy("write", testEvent, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err2 != nil {
|
||||
t.Errorf("Unexpected error: %v", err2)
|
||||
}
|
||||
if allowed2 {
|
||||
t.Error("Expected event to be denied when script is not running and default policy is 'deny'")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultPolicyAllow(t *testing.T) {
|
||||
// Test default policy "allow" behavior
|
||||
policy := &P{
|
||||
DefaultPolicy: "allow",
|
||||
Kind: Kinds{},
|
||||
Rules: map[int]Rule{}, // No specific rules
|
||||
}
|
||||
|
||||
// Create test event for kind 1 (no specific rule exists)
|
||||
testEvent := createTestEvent("test-event-id", "test-pubkey", "test content", 1)
|
||||
|
||||
// Should allow the event with default policy "allow"
|
||||
allowed, err := policy.CheckPolicy("write", testEvent, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %v", err)
|
||||
}
|
||||
if !allowed {
|
||||
t.Error("Expected event to be allowed with default_policy 'allow'")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultPolicyDeny(t *testing.T) {
|
||||
// Test default policy "deny" behavior
|
||||
policy := &P{
|
||||
DefaultPolicy: "deny",
|
||||
Kind: Kinds{},
|
||||
Rules: map[int]Rule{}, // No specific rules
|
||||
}
|
||||
|
||||
// Create test event for kind 1 (no specific rule exists)
|
||||
testEvent := createTestEvent("test-event-id", "test-pubkey", "test content", 1)
|
||||
|
||||
// Should deny the event with default policy "deny"
|
||||
allowed, err := policy.CheckPolicy("write", testEvent, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %v", err)
|
||||
}
|
||||
if allowed {
|
||||
t.Error("Expected event to be denied with default_policy 'deny'")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultPolicyEmpty(t *testing.T) {
|
||||
// Test empty default policy (should default to "allow")
|
||||
policy := &P{
|
||||
DefaultPolicy: "",
|
||||
Kind: Kinds{},
|
||||
Rules: map[int]Rule{}, // No specific rules
|
||||
}
|
||||
|
||||
// Create test event for kind 1 (no specific rule exists)
|
||||
testEvent := createTestEvent("test-event-id", "test-pubkey", "test content", 1)
|
||||
|
||||
// Should allow the event with empty default policy (defaults to "allow")
|
||||
allowed, err := policy.CheckPolicy("write", testEvent, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %v", err)
|
||||
}
|
||||
if !allowed {
|
||||
t.Error("Expected event to be allowed with empty default_policy (should default to 'allow')")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultPolicyInvalid(t *testing.T) {
|
||||
// Test invalid default policy (should default to "allow")
|
||||
policy := &P{
|
||||
DefaultPolicy: "invalid",
|
||||
Kind: Kinds{},
|
||||
Rules: map[int]Rule{}, // No specific rules
|
||||
}
|
||||
|
||||
// Create test event for kind 1 (no specific rule exists)
|
||||
testEvent := createTestEvent("test-event-id", "test-pubkey", "test content", 1)
|
||||
|
||||
// Should allow the event with invalid default policy (defaults to "allow")
|
||||
allowed, err := policy.CheckPolicy("write", testEvent, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %v", err)
|
||||
}
|
||||
if !allowed {
|
||||
t.Error("Expected event to be allowed with invalid default_policy (should default to 'allow')")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultPolicyWithSpecificRule(t *testing.T) {
|
||||
// Test that specific rules override default policy
|
||||
policy := &P{
|
||||
DefaultPolicy: "deny", // Default is deny
|
||||
Kind: Kinds{},
|
||||
Rules: map[int]Rule{
|
||||
1: {
|
||||
Description: "allow kind 1",
|
||||
WriteAllow: []string{}, // Allow all for kind 1
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Create test event for kind 1 (has specific rule)
|
||||
testEvent := createTestEvent("test-event-id", "test-pubkey", "test content", 1)
|
||||
|
||||
// Should allow the event because specific rule allows it, despite default policy being "deny"
|
||||
allowed, err := policy.CheckPolicy("write", testEvent, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %v", err)
|
||||
}
|
||||
if !allowed {
|
||||
t.Error("Expected event to be allowed by specific rule, despite default_policy 'deny'")
|
||||
}
|
||||
|
||||
// Create test event for kind 2 (no specific rule exists)
|
||||
testEvent2 := createTestEvent("test-event-id-2", "test-pubkey", "test content", 2)
|
||||
|
||||
// Should deny the event because no specific rule and default policy is "deny"
|
||||
allowed2, err2 := policy.CheckPolicy("write", testEvent2, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err2 != nil {
|
||||
t.Errorf("Unexpected error: %v", err2)
|
||||
}
|
||||
if allowed2 {
|
||||
t.Error("Expected event to be denied with default_policy 'deny' for kind without specific rule")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewPolicyDefaultsToAllow(t *testing.T) {
|
||||
// Test that New() function sets default policy to "allow"
|
||||
policy, err := New([]byte(`{}`))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create policy: %v", err)
|
||||
}
|
||||
|
||||
if policy.DefaultPolicy != "allow" {
|
||||
t.Errorf("Expected default policy to be 'allow', got '%s'", policy.DefaultPolicy)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewPolicyWithDefaultPolicyJSON(t *testing.T) {
|
||||
// Test loading default policy from JSON
|
||||
jsonConfig := `{"default_policy": "deny"}`
|
||||
policy, err := New([]byte(jsonConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create policy: %v", err)
|
||||
}
|
||||
|
||||
if policy.DefaultPolicy != "deny" {
|
||||
t.Errorf("Expected default policy to be 'deny', got '%s'", policy.DefaultPolicy)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScriptProcessingFailureFallsBackToDefault(t *testing.T) {
|
||||
// Test that script processing failures fall back to default policy
|
||||
// We'll test this by using a manager that's not running (simulating failure)
|
||||
policy := &P{
|
||||
DefaultPolicy: "allow",
|
||||
Rules: map[int]Rule{
|
||||
1: {
|
||||
Description: "script rule",
|
||||
Script: "policy.sh",
|
||||
},
|
||||
},
|
||||
Manager: &PolicyManager{
|
||||
enabled: true,
|
||||
isRunning: false, // Script is not running (simulating failure)
|
||||
},
|
||||
}
|
||||
|
||||
// Create test event
|
||||
testEvent := createTestEvent("test-event-id", "test-pubkey", "test content", 1)
|
||||
|
||||
// Should allow the event when script is not running (falls back to default "allow")
|
||||
allowed, err := policy.checkScriptPolicy("write", testEvent, "policy.sh", []byte("test-pubkey"), "127.0.0.1")
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %v", err)
|
||||
}
|
||||
if !allowed {
|
||||
t.Error("Expected event to be allowed when script is not running (should fall back to default policy 'allow')")
|
||||
}
|
||||
|
||||
// Test with default policy "deny"
|
||||
policy.DefaultPolicy = "deny"
|
||||
allowed2, err2 := policy.checkScriptPolicy("write", testEvent, "policy.sh", []byte("test-pubkey"), "127.0.0.1")
|
||||
if err2 != nil {
|
||||
t.Errorf("Unexpected error: %v", err2)
|
||||
}
|
||||
if allowed2 {
|
||||
t.Error("Expected event to be denied when script is not running and default policy is 'deny'")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultPolicyLogicWithRules(t *testing.T) {
|
||||
// Test that default policy logic works correctly with rules
|
||||
|
||||
// Test 1: default_policy "deny" - should only allow if rule explicitly allows
|
||||
policy1 := &P{
|
||||
DefaultPolicy: "deny",
|
||||
Kind: Kinds{
|
||||
Whitelist: []int{1, 2, 3}, // Allow kinds 1, 2, 3
|
||||
},
|
||||
Rules: map[int]Rule{
|
||||
1: {
|
||||
Description: "allow all for kind 1",
|
||||
WriteAllow: []string{}, // Empty means allow all
|
||||
},
|
||||
2: {
|
||||
Description: "deny specific pubkey for kind 2",
|
||||
WriteDeny: []string{"64656e6965642d7075626b6579"}, // hex of "denied-pubkey"
|
||||
},
|
||||
// No rule for kind 3
|
||||
},
|
||||
}
|
||||
|
||||
// Kind 1: has rule that allows all - should be allowed
|
||||
event1 := createTestEvent("test-1", "test-pubkey", "content", 1)
|
||||
allowed1, err1 := policy1.CheckPolicy("write", event1, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err1 != nil {
|
||||
t.Errorf("Unexpected error for kind 1: %v", err1)
|
||||
}
|
||||
if !allowed1 {
|
||||
t.Error("Expected kind 1 to be allowed (rule allows all)")
|
||||
}
|
||||
|
||||
// Kind 2: has rule that denies specific pubkey - should be allowed for other pubkeys
|
||||
event2 := createTestEvent("test-2", "test-pubkey", "content", 2)
|
||||
allowed2, err2 := policy1.CheckPolicy("write", event2, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err2 != nil {
|
||||
t.Errorf("Unexpected error for kind 2: %v", err2)
|
||||
}
|
||||
if !allowed2 {
|
||||
t.Error("Expected kind 2 to be allowed for non-denied pubkey")
|
||||
}
|
||||
|
||||
// Kind 2: denied pubkey should be denied
|
||||
event2Denied := createTestEvent("test-2-denied", "denied-pubkey", "content", 2)
|
||||
allowed2Denied, err2Denied := policy1.CheckPolicy("write", event2Denied, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err2Denied != nil {
|
||||
t.Errorf("Unexpected error for kind 2 denied: %v", err2Denied)
|
||||
}
|
||||
if allowed2Denied {
|
||||
t.Error("Expected kind 2 to be denied for denied pubkey")
|
||||
}
|
||||
|
||||
// Kind 3: whitelisted but no rule - should follow default policy (deny)
|
||||
event3 := createTestEvent("test-3", "test-pubkey", "content", 3)
|
||||
allowed3, err3 := policy1.CheckPolicy("write", event3, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err3 != nil {
|
||||
t.Errorf("Unexpected error for kind 3: %v", err3)
|
||||
}
|
||||
if allowed3 {
|
||||
t.Error("Expected kind 3 to be denied (no rule, default policy is deny)")
|
||||
}
|
||||
|
||||
// Test 2: default_policy "allow" - should allow unless rule explicitly denies
|
||||
policy2 := &P{
|
||||
DefaultPolicy: "allow",
|
||||
Kind: Kinds{
|
||||
Whitelist: []int{1, 2, 3}, // Allow kinds 1, 2, 3
|
||||
},
|
||||
Rules: map[int]Rule{
|
||||
1: {
|
||||
Description: "deny specific pubkey for kind 1",
|
||||
WriteDeny: []string{"64656e6965642d7075626b6579"}, // hex of "denied-pubkey"
|
||||
},
|
||||
// No rules for kind 2, 3
|
||||
},
|
||||
}
|
||||
|
||||
// Kind 1: has rule that denies specific pubkey - should be allowed for other pubkeys
|
||||
event1Allow := createTestEvent("test-1-allow", "test-pubkey", "content", 1)
|
||||
allowed1Allow, err1Allow := policy2.CheckPolicy("write", event1Allow, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err1Allow != nil {
|
||||
t.Errorf("Unexpected error for kind 1 allow: %v", err1Allow)
|
||||
}
|
||||
if !allowed1Allow {
|
||||
t.Error("Expected kind 1 to be allowed for non-denied pubkey")
|
||||
}
|
||||
|
||||
// Kind 1: denied pubkey should be denied
|
||||
event1Deny := createTestEvent("test-1-deny", "denied-pubkey", "content", 1)
|
||||
allowed1Deny, err1Deny := policy2.CheckPolicy("write", event1Deny, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err1Deny != nil {
|
||||
t.Errorf("Unexpected error for kind 1 deny: %v", err1Deny)
|
||||
}
|
||||
if allowed1Deny {
|
||||
t.Error("Expected kind 1 to be denied for denied pubkey")
|
||||
}
|
||||
|
||||
// Kind 2: whitelisted but no rule - should follow default policy (allow)
|
||||
event2Allow := createTestEvent("test-2-allow", "test-pubkey", "content", 2)
|
||||
allowed2Allow, err2Allow := policy2.CheckPolicy("write", event2Allow, []byte("test-pubkey"), "127.0.0.1")
|
||||
if err2Allow != nil {
|
||||
t.Errorf("Unexpected error for kind 2 allow: %v", err2Allow)
|
||||
}
|
||||
if !allowed2Allow {
|
||||
t.Error("Expected kind 2 to be allowed (no rule, default policy is allow)")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,457 +0,0 @@
|
||||
package spider
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"lol.mleku.dev/chk"
|
||||
"lol.mleku.dev/log"
|
||||
"next.orly.dev/app/config"
|
||||
"next.orly.dev/pkg/acl"
|
||||
"next.orly.dev/pkg/database"
|
||||
"next.orly.dev/pkg/database/indexes/types"
|
||||
"next.orly.dev/pkg/encoders/filter"
|
||||
"next.orly.dev/pkg/encoders/kind"
|
||||
"next.orly.dev/pkg/encoders/tag"
|
||||
"next.orly.dev/pkg/encoders/timestamp"
|
||||
"next.orly.dev/pkg/protocol/ws"
|
||||
"next.orly.dev/pkg/utils/normalize"
|
||||
)
|
||||
|
||||
const (
|
||||
OneTimeSpiderSyncMarker = "spider_one_time_sync_completed"
|
||||
SpiderLastScanMarker = "spider_last_scan_time"
|
||||
// MaxWebSocketMessageSize is the maximum size for WebSocket messages
|
||||
MaxWebSocketMessageSize = 100 * 1024 * 1024 // 100MB
|
||||
// PubkeyHexSize is the size of a hex-encoded pubkey (32 bytes = 64 hex chars)
|
||||
PubkeyHexSize = 64
|
||||
)
|
||||
|
||||
type Spider struct {
|
||||
db *database.D
|
||||
cfg *config.C
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
// Configured relay addresses for self-detection
|
||||
relayAddresses []string
|
||||
}
|
||||
|
||||
func New(
|
||||
db *database.D, cfg *config.C, ctx context.Context,
|
||||
cancel context.CancelFunc,
|
||||
) *Spider {
|
||||
return &Spider{
|
||||
db: db,
|
||||
cfg: cfg,
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
relayAddresses: cfg.RelayAddresses,
|
||||
}
|
||||
}
|
||||
|
||||
// Start initializes the spider functionality based on configuration
|
||||
func (s *Spider) Start() {
|
||||
if s.cfg.SpiderMode != "follows" {
|
||||
log.D.Ln("Spider mode is not set to 'follows', skipping spider functionality")
|
||||
return
|
||||
}
|
||||
|
||||
log.I.Ln("Starting spider in follow mode")
|
||||
|
||||
// Check if one-time sync has been completed
|
||||
if !s.db.HasMarker(OneTimeSpiderSyncMarker) {
|
||||
log.I.Ln("Performing one-time spider sync back one month")
|
||||
go s.performOneTimeSync()
|
||||
} else {
|
||||
log.D.Ln("One-time spider sync already completed, skipping")
|
||||
}
|
||||
|
||||
// Start periodic scanning
|
||||
go s.startPeriodicScanning()
|
||||
}
|
||||
|
||||
// performOneTimeSync performs the initial sync going back one month
|
||||
func (s *Spider) performOneTimeSync() {
|
||||
defer func() {
|
||||
// Mark the one-time sync as completed
|
||||
timestamp := strconv.FormatInt(time.Now().Unix(), 10)
|
||||
if err := s.db.SetMarker(
|
||||
OneTimeSpiderSyncMarker, []byte(timestamp),
|
||||
); err != nil {
|
||||
log.E.F("Failed to set one-time sync marker: %v", err)
|
||||
} else {
|
||||
log.I.Ln("One-time spider sync completed and marked")
|
||||
}
|
||||
}()
|
||||
|
||||
// Calculate the time one month ago
|
||||
oneMonthAgo := time.Now().AddDate(0, -1, 0)
|
||||
log.I.F("Starting one-time spider sync from %v", oneMonthAgo)
|
||||
|
||||
// Perform the sync (placeholder - would need actual implementation based on follows)
|
||||
if err := s.performSync(oneMonthAgo, time.Now()); err != nil {
|
||||
log.E.F("One-time spider sync failed: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
log.I.Ln("One-time spider sync completed successfully")
|
||||
}
|
||||
|
||||
// startPeriodicScanning starts the regular scanning process
|
||||
func (s *Spider) startPeriodicScanning() {
|
||||
ticker := time.NewTicker(s.cfg.SpiderFrequency)
|
||||
defer ticker.Stop()
|
||||
|
||||
log.I.F("Starting periodic spider scanning every %v", s.cfg.SpiderFrequency)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-s.ctx.Done():
|
||||
log.D.Ln("Spider periodic scanning stopped due to context cancellation")
|
||||
return
|
||||
case <-ticker.C:
|
||||
s.performPeriodicScan()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// performPeriodicScan performs the regular scan of the last two hours (double the frequency window)
|
||||
func (s *Spider) performPeriodicScan() {
|
||||
// Calculate the scanning window (double the frequency period)
|
||||
scanWindow := s.cfg.SpiderFrequency * 2
|
||||
scanStart := time.Now().Add(-scanWindow)
|
||||
scanEnd := time.Now()
|
||||
|
||||
log.D.F(
|
||||
"Performing periodic spider scan from %v to %v (window: %v)", scanStart,
|
||||
scanEnd, scanWindow,
|
||||
)
|
||||
|
||||
if err := s.performSync(scanStart, scanEnd); err != nil {
|
||||
log.E.F("Periodic spider scan failed: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Update the last scan marker
|
||||
timestamp := strconv.FormatInt(time.Now().Unix(), 10)
|
||||
if err := s.db.SetMarker(
|
||||
SpiderLastScanMarker, []byte(timestamp),
|
||||
); err != nil {
|
||||
log.E.F("Failed to update last scan marker: %v", err)
|
||||
}
|
||||
|
||||
log.D.F("Periodic spider scan completed successfully")
|
||||
}
|
||||
|
||||
// performSync performs the actual sync operation for the given time range
|
||||
func (s *Spider) performSync(startTime, endTime time.Time) error {
|
||||
log.D.F(
|
||||
"Spider sync from %v to %v - starting implementation", startTime,
|
||||
endTime,
|
||||
)
|
||||
|
||||
// 1. Check ACL mode is set to "follows"
|
||||
if s.cfg.ACLMode != "follows" {
|
||||
log.D.F(
|
||||
"Spider sync skipped - ACL mode is not 'follows' (current: %s)",
|
||||
s.cfg.ACLMode,
|
||||
)
|
||||
return nil
|
||||
}
|
||||
|
||||
// 2. Get the list of followed users from the ACL system
|
||||
followedPubkeys, err := s.getFollowedPubkeys()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(followedPubkeys) == 0 {
|
||||
log.D.Ln("Spider sync: no followed pubkeys found")
|
||||
return nil
|
||||
}
|
||||
|
||||
log.D.F("Spider sync: found %d followed pubkeys", len(followedPubkeys))
|
||||
|
||||
// 3. Discover relay lists from followed users
|
||||
relayURLs, err := s.discoverRelays(followedPubkeys)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(relayURLs) == 0 {
|
||||
log.W.Ln("Spider sync: no relays discovered from followed users")
|
||||
return nil
|
||||
}
|
||||
|
||||
log.I.F("Spider sync: discovered %d relay URLs", len(relayURLs))
|
||||
|
||||
// 4. Query each relay for events from followed pubkeys in the time range
|
||||
eventsFound := 0
|
||||
for _, relayURL := range relayURLs {
|
||||
log.I.F("Spider sync: fetching follow lists from relay %s", relayURL)
|
||||
count, err := s.queryRelayForEvents(
|
||||
relayURL, followedPubkeys, startTime, endTime,
|
||||
)
|
||||
if err != nil {
|
||||
log.E.F("Spider sync: error querying relay %s: %v", relayURL, err)
|
||||
continue
|
||||
}
|
||||
log.I.F("Spider sync: completed fetching from relay %s, found %d events", relayURL, count)
|
||||
eventsFound += count
|
||||
}
|
||||
|
||||
log.I.F(
|
||||
"Spider sync completed: found %d new events from %d relays",
|
||||
eventsFound, len(relayURLs),
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// getFollowedPubkeys retrieves the list of followed pubkeys from the ACL system
|
||||
func (s *Spider) getFollowedPubkeys() ([][]byte, error) {
|
||||
// Access the ACL registry to get the current ACL instance
|
||||
var followedPubkeys [][]byte
|
||||
|
||||
// Get all ACL instances and find the active one
|
||||
for _, aclInstance := range acl.Registry.ACL {
|
||||
if aclInstance.Type() == acl.Registry.Active.Load() {
|
||||
// Cast to *Follows to access the follows field
|
||||
if followsACL, ok := aclInstance.(*acl.Follows); ok {
|
||||
followedPubkeys = followsACL.GetFollowedPubkeys()
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return followedPubkeys, nil
|
||||
}
|
||||
|
||||
// discoverRelays discovers relay URLs from kind 10002 events of followed users
|
||||
func (s *Spider) discoverRelays(followedPubkeys [][]byte) ([]string, error) {
|
||||
seen := make(map[string]struct{})
|
||||
var urls []string
|
||||
|
||||
for _, pubkey := range followedPubkeys {
|
||||
// Query for kind 10002 (RelayListMetadata) events from this pubkey
|
||||
fl := &filter.F{
|
||||
Authors: tag.NewFromAny(pubkey),
|
||||
Kinds: kind.NewS(kind.New(kind.RelayListMetadata.K)),
|
||||
}
|
||||
|
||||
idxs, err := database.GetIndexesFromFilter(fl)
|
||||
if chk.E(err) {
|
||||
continue
|
||||
}
|
||||
|
||||
var sers types.Uint40s
|
||||
for _, idx := range idxs {
|
||||
s, err := s.db.GetSerialsByRange(idx)
|
||||
if chk.E(err) {
|
||||
continue
|
||||
}
|
||||
sers = append(sers, s...)
|
||||
}
|
||||
|
||||
for _, ser := range sers {
|
||||
ev, err := s.db.FetchEventBySerial(ser)
|
||||
if chk.E(err) || ev == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Extract relay URLs from 'r' tags
|
||||
for _, v := range ev.Tags.GetAll([]byte("r")) {
|
||||
u := string(v.Value())
|
||||
n := string(normalize.URL(u))
|
||||
if n == "" {
|
||||
continue
|
||||
}
|
||||
// Skip if this relay is one of the configured relay addresses
|
||||
skipRelay := false
|
||||
for _, relayAddr := range s.relayAddresses {
|
||||
if n == relayAddr {
|
||||
log.D.F("spider: skipping configured relay address: %s", n)
|
||||
skipRelay = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if skipRelay {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[n]; ok {
|
||||
continue
|
||||
}
|
||||
seen[n] = struct{}{}
|
||||
urls = append(urls, n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return urls, nil
|
||||
}
|
||||
|
||||
// calculateOptimalChunkSize calculates the optimal chunk size for pubkeys to stay under message size limit
|
||||
func (s *Spider) calculateOptimalChunkSize() int {
|
||||
// Estimate the size of a filter with timestamps and other fields
|
||||
// Base filter overhead: ~200 bytes for timestamps, limits, etc.
|
||||
baseFilterSize := 200
|
||||
|
||||
// Calculate how many pubkeys we can fit in the remaining space
|
||||
availableSpace := MaxWebSocketMessageSize - baseFilterSize
|
||||
maxPubkeys := availableSpace / PubkeyHexSize
|
||||
|
||||
// Use a conservative chunk size (80% of max to be safe)
|
||||
chunkSize := int(float64(maxPubkeys) * 0.8)
|
||||
|
||||
// Ensure minimum chunk size of 10
|
||||
if chunkSize < 10 {
|
||||
chunkSize = 10
|
||||
}
|
||||
|
||||
log.D.F(
|
||||
"Spider: calculated optimal chunk size: %d pubkeys (max would be %d)",
|
||||
chunkSize, maxPubkeys,
|
||||
)
|
||||
return chunkSize
|
||||
}
|
||||
|
||||
// queryRelayForEvents connects to a relay and queries for events from followed pubkeys
|
||||
func (s *Spider) queryRelayForEvents(
|
||||
relayURL string, followedPubkeys [][]byte, startTime, endTime time.Time,
|
||||
) (int, error) {
|
||||
log.T.F(
|
||||
"Spider sync: querying relay %s with %d pubkeys", relayURL,
|
||||
len(followedPubkeys),
|
||||
)
|
||||
|
||||
// Connect to the relay with a timeout context
|
||||
ctx, cancel := context.WithTimeout(s.ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
client, err := ws.RelayConnect(ctx, relayURL)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
// Break pubkeys into chunks to avoid 32KB message limit
|
||||
chunkSize := s.calculateOptimalChunkSize()
|
||||
totalEventsSaved := 0
|
||||
|
||||
for i := 0; i < len(followedPubkeys); i += chunkSize {
|
||||
end := i + chunkSize
|
||||
if end > len(followedPubkeys) {
|
||||
end = len(followedPubkeys)
|
||||
}
|
||||
|
||||
chunk := followedPubkeys[i:end]
|
||||
log.T.F(
|
||||
"Spider sync: processing chunk %d-%d (%d pubkeys) for relay %s",
|
||||
i, end-1, len(chunk), relayURL,
|
||||
)
|
||||
|
||||
// Create filter for this chunk of pubkeys
|
||||
f := &filter.F{
|
||||
Authors: tag.NewFromBytesSlice(chunk...),
|
||||
Since: timestamp.FromUnix(startTime.Unix()),
|
||||
Until: timestamp.FromUnix(endTime.Unix()),
|
||||
Limit: func() *uint { l := uint(500); return &l }(), // Limit to avoid overwhelming
|
||||
}
|
||||
|
||||
// Subscribe to get events for this chunk
|
||||
sub, err := client.Subscribe(ctx, filter.NewS(f))
|
||||
if err != nil {
|
||||
log.E.F(
|
||||
"Spider sync: failed to subscribe to chunk %d-%d for relay %s: %v",
|
||||
i, end-1, relayURL, err,
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
chunkEventsSaved := 0
|
||||
chunkEventsCount := 0
|
||||
timeout := time.After(10 * time.Second) // Timeout for receiving events
|
||||
|
||||
chunkDone := false
|
||||
for !chunkDone {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.T.F(
|
||||
"Spider sync: context done for relay %s chunk %d-%d, saved %d/%d events",
|
||||
relayURL, i, end-1, chunkEventsSaved, chunkEventsCount,
|
||||
)
|
||||
chunkDone = true
|
||||
case <-timeout:
|
||||
log.T.F(
|
||||
"Spider sync: timeout for relay %s chunk %d-%d, saved %d/%d events",
|
||||
relayURL, i, end-1, chunkEventsSaved, chunkEventsCount,
|
||||
)
|
||||
chunkDone = true
|
||||
case <-sub.EndOfStoredEvents:
|
||||
log.T.F(
|
||||
"Spider sync: end of stored events for relay %s chunk %d-%d, saved %d/%d events",
|
||||
relayURL, i, end-1, chunkEventsSaved, chunkEventsCount,
|
||||
)
|
||||
chunkDone = true
|
||||
case ev := <-sub.Events:
|
||||
if ev == nil {
|
||||
continue
|
||||
}
|
||||
chunkEventsCount++
|
||||
|
||||
// Verify the event signature
|
||||
if ok, err := ev.Verify(); !ok || err != nil {
|
||||
log.T.F(
|
||||
"Spider sync: invalid event signature from relay %s",
|
||||
relayURL,
|
||||
)
|
||||
ev.Free()
|
||||
continue
|
||||
}
|
||||
|
||||
// Save the event to the database
|
||||
if _, err := s.db.SaveEvent(s.ctx, ev); err != nil {
|
||||
if !strings.HasPrefix(err.Error(), "blocked:") {
|
||||
log.T.F(
|
||||
"Spider sync: error saving event from relay %s: %v",
|
||||
relayURL, err,
|
||||
)
|
||||
}
|
||||
// Event might already exist, which is fine for deduplication
|
||||
} else {
|
||||
chunkEventsSaved++
|
||||
if chunkEventsSaved%10 == 0 {
|
||||
log.T.F(
|
||||
"Spider sync: saved %d events from relay %s chunk %d-%d",
|
||||
chunkEventsSaved, relayURL, i, end-1,
|
||||
)
|
||||
}
|
||||
}
|
||||
ev.Free()
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up subscription
|
||||
sub.Unsub()
|
||||
totalEventsSaved += chunkEventsSaved
|
||||
|
||||
log.T.F(
|
||||
"Spider sync: completed chunk %d-%d for relay %s, saved %d events",
|
||||
i, end-1, relayURL, chunkEventsSaved,
|
||||
)
|
||||
}
|
||||
|
||||
log.T.F(
|
||||
"Spider sync: completed all chunks for relay %s, total saved %d events",
|
||||
relayURL, totalEventsSaved,
|
||||
)
|
||||
return totalEventsSaved, nil
|
||||
}
|
||||
|
||||
// Stop stops the spider functionality
|
||||
func (s *Spider) Stop() {
|
||||
log.D.Ln("Stopping spider")
|
||||
s.cancel()
|
||||
}
|
||||
@@ -1 +1 @@
|
||||
v0.17.8
|
||||
v0.17.12
|
||||
67
readme.adoc
67
readme.adoc
@@ -612,70 +612,3 @@ The relay will automatically:
|
||||
- Provide read-only access to everyone else
|
||||
- Update follow lists dynamically as admins modify their follows
|
||||
|
||||
== relay sync spider
|
||||
|
||||
The relay sync spider is an intelligent synchronization system that discovers and syncs events from other Nostr relays based on social relationships. It works in conjunction with the follows ACL to create a distributed network of synchronized content.
|
||||
|
||||
=== how it works
|
||||
|
||||
The spider operates in two phases:
|
||||
|
||||
1. **Relay Discovery**:
|
||||
- Finds relay lists (kind 10002 events) from followed users
|
||||
- Builds a list of relays used by people in your social network
|
||||
- Prioritizes relays mentioned by admin users
|
||||
|
||||
2. **Event Synchronization**:
|
||||
- Queries discovered relays for events from followed users
|
||||
- Performs one-time historical sync (default: 1 month back)
|
||||
- Runs periodic syncs to stay current with new events
|
||||
- Validates and stores events locally
|
||||
|
||||
=== configuration
|
||||
|
||||
Enable the spider by setting the spider mode to "follows":
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
export ORLY_SPIDER_MODE=follows
|
||||
export ORLY_SPIDER_FREQUENCY=1h
|
||||
----
|
||||
|
||||
Configuration options:
|
||||
|
||||
* `ORLY_SPIDER_MODE` - Spider mode: "none" (disabled) or "follow" (enabled)
|
||||
* `ORLY_SPIDER_FREQUENCY` - How often to sync (default: 1h)
|
||||
|
||||
=== usage example
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
# Enable both follows ACL and spider sync
|
||||
export ORLY_ACL_MODE=follows
|
||||
export ORLY_SPIDER_MODE=follows
|
||||
export ORLY_SPIDER_FREQUENCY=30m
|
||||
export ORLY_ADMINS=npub1fjqqy4a93z5zsjwsfxqhc2764kvykfdyttvldkkkdera8dr78vhsmmleku
|
||||
|
||||
# Start the relay
|
||||
./orly
|
||||
----
|
||||
|
||||
The spider will:
|
||||
- Perform a one-time sync of the last month's events
|
||||
- Discover relays from followed users' relay lists
|
||||
- Sync events from those relays every 30 minutes
|
||||
- Only sync events from users in the follow network
|
||||
|
||||
=== benefits
|
||||
|
||||
* **Decentralized Content**: Automatically aggregates content from your social network
|
||||
* **Reduced Relay Dependency**: Less reliance on single large relays
|
||||
* **Improved User Experience**: Users see content from their social circle even when offline from other relays
|
||||
* **Network Resilience**: Content remains accessible even if origin relays go offline
|
||||
|
||||
=== technical notes
|
||||
|
||||
* The spider only runs when `ORLY_ACL_MODE=follows` to ensure proper authorization
|
||||
* One-time sync is marked to prevent repeated historical syncs on restart
|
||||
* Event validation ensures only properly signed events are stored
|
||||
* Sync windows are configurable to balance freshness with resource usage
|
||||
|
||||
@@ -51,7 +51,6 @@ echo "[run-relay-pprof] Starting relay with CPU profiling ..."
|
||||
ORLY_PORT=3334 \
|
||||
ORLY_ADMINS=npub1fjqqy4a93z5zsjwsfxqhc2764kvykfdyttvldkkkdera8dr78vhsmmleku \
|
||||
ORLY_ACL_MODE=follows \
|
||||
ORLY_SPIDER_MODE=none \
|
||||
ORLY_RELAY_ADDRESSES=test.orly.dev \
|
||||
ORLY_IP_BLACKLIST=192.71.213.188 \
|
||||
ORLY_HEALTH_PORT="$HEALTH_PORT" \
|
||||
|
||||
Reference in New Issue
Block a user