1
// Copyright 2018 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
14
"google.golang.org/protobuf/internal/errors"
17
// call specifies which Decoder method was invoked.
25
const unexpectedFmt = "unexpected token %s"
27
// ErrUnexpectedEOF means that EOF was encountered in the middle of the input.
28
var ErrUnexpectedEOF = errors.New("%v", io.ErrUnexpectedEOF)
30
// Decoder is a token-based JSON decoder.
32
// lastCall is last method called, either readCall or peekCall.
33
// Initial value is readCall.
36
// lastToken contains the last read token.
39
// lastErr contains the last read error.
42
// openStack is a stack containing ObjectOpen and ArrayOpen values. The
43
// top of stack represents the object or the array the current value is
44
// directly located in.
47
// orig is used in reporting line and column.
49
// in contains the unconsumed input.
53
// NewDecoder returns a Decoder to read the given []byte.
54
func NewDecoder(b []byte) *Decoder {
55
return &Decoder{orig: b, in: b}
58
// Peek looks ahead and returns the next token kind without advancing a read.
59
func (d *Decoder) Peek() (Token, error) {
60
defer func() { d.lastCall = peekCall }()
61
if d.lastCall == readCall {
62
d.lastToken, d.lastErr = d.Read()
64
return d.lastToken, d.lastErr
67
// Read returns the next JSON token.
68
// It will return an error if there is no valid token.
69
func (d *Decoder) Read() (Token, error) {
70
const scalar = Null | Bool | Number | String
72
defer func() { d.lastCall = readCall }()
73
if d.lastCall == peekCall {
74
return d.lastToken, d.lastErr
77
tok, err := d.parseNext()
84
if len(d.openStack) != 0 ||
85
d.lastToken.kind&scalar|ObjectClose|ArrayClose == 0 {
86
return Token{}, ErrUnexpectedEOF
91
return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
96
return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
103
// This string token should only be for a field name.
104
if d.lastToken.kind&(ObjectOpen|comma) == 0 {
105
return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
108
return Token{}, ErrUnexpectedEOF
110
if c := d.in[0]; c != ':' {
111
return Token{}, d.newSyntaxError(d.currPos(), `unexpected character %s, missing ":" after field name`, string(c))
116
case ObjectOpen, ArrayOpen:
117
if !d.isValueNext() {
118
return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
120
d.openStack = append(d.openStack, tok.kind)
123
if len(d.openStack) == 0 ||
124
d.lastToken.kind&(Name|comma) != 0 ||
125
d.openStack[len(d.openStack)-1] != ObjectOpen {
126
return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
128
d.openStack = d.openStack[:len(d.openStack)-1]
131
if len(d.openStack) == 0 ||
132
d.lastToken.kind == comma ||
133
d.openStack[len(d.openStack)-1] != ArrayOpen {
134
return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
136
d.openStack = d.openStack[:len(d.openStack)-1]
139
if len(d.openStack) == 0 ||
140
d.lastToken.kind&(scalar|ObjectClose|ArrayClose) == 0 {
141
return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
145
// Update d.lastToken only after validating token to be in the right sequence.
148
if d.lastToken.kind == comma {
154
// Any sequence that looks like a non-delimiter (for error reporting).
155
var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
157
// parseNext parses for the next JSON token. It returns a Token object for
158
// different types, except for Name. It does not handle whether the next token
159
// is in a valid sequence or not.
160
func (d *Decoder) parseNext() (Token, error) {
161
// Trim leading spaces.
166
return d.consumeToken(EOF, 0), nil
171
if n := matchWithDelim("null", in); n != 0 {
172
return d.consumeToken(Null, n), nil
176
if n := matchWithDelim("true", in); n != 0 {
177
return d.consumeBoolToken(true, n), nil
181
if n := matchWithDelim("false", in); n != 0 {
182
return d.consumeBoolToken(false, n), nil
185
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
186
if n, ok := parseNumber(in); ok {
187
return d.consumeToken(Number, n), nil
191
s, n, err := d.parseString(in)
195
return d.consumeStringToken(s, n), nil
198
return d.consumeToken(ObjectOpen, 1), nil
201
return d.consumeToken(ObjectClose, 1), nil
204
return d.consumeToken(ArrayOpen, 1), nil
207
return d.consumeToken(ArrayClose, 1), nil
210
return d.consumeToken(comma, 1), nil
212
return Token{}, d.newSyntaxError(d.currPos(), "invalid value %s", errRegexp.Find(in))
215
// newSyntaxError returns an error with line and column information useful for
217
func (d *Decoder) newSyntaxError(pos int, f string, x ...interface{}) error {
218
e := errors.New(f, x...)
219
line, column := d.Position(pos)
220
return errors.New("syntax error (line %d:%d): %v", line, column, e)
223
// Position returns line and column number of given index of the original input.
224
// It will panic if index is out of range.
225
func (d *Decoder) Position(idx int) (line int, column int) {
227
line = bytes.Count(b, []byte("\n")) + 1
228
if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
231
column = utf8.RuneCount(b) + 1 // ignore multi-rune characters
235
// currPos returns the current index position of d.in from d.orig.
236
func (d *Decoder) currPos() int {
237
return len(d.orig) - len(d.in)
240
// matchWithDelim matches s with the input b and verifies that the match
241
// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
242
// As a special case, EOF is considered a delimiter. It returns the length of s
243
// if there is a match, else 0.
244
func matchWithDelim(s string, b []byte) int {
245
if !bytes.HasPrefix(b, []byte(s)) {
250
if n < len(b) && isNotDelim(b[n]) {
256
// isNotDelim returns true if given byte is a not delimiter character.
257
func isNotDelim(c byte) bool {
258
return (c == '-' || c == '+' || c == '.' || c == '_' ||
259
('a' <= c && c <= 'z') ||
260
('A' <= c && c <= 'Z') ||
261
('0' <= c && c <= '9'))
264
// consume consumes n bytes of input and any subsequent whitespace.
265
func (d *Decoder) consume(n int) {
269
case ' ', '\n', '\r', '\t':
277
// isValueNext returns true if next type should be a JSON value: Null,
278
// Number, String or Bool.
279
func (d *Decoder) isValueNext() bool {
280
if len(d.openStack) == 0 {
281
return d.lastToken.kind == 0
284
start := d.openStack[len(d.openStack)-1]
287
return d.lastToken.kind&Name != 0
289
return d.lastToken.kind&(ArrayOpen|comma) != 0
292
"unreachable logic in Decoder.isValueNext, lastToken.kind: %v, openStack: %v",
293
d.lastToken.kind, start))
296
// consumeToken constructs a Token for given Kind with raw value derived from
297
// current d.in and given size, and consumes the given size-length of it.
298
func (d *Decoder) consumeToken(kind Kind, size int) Token {
302
pos: len(d.orig) - len(d.in),
308
// consumeBoolToken constructs a Token for a Bool kind with raw value derived from
309
// current d.in and given size.
310
func (d *Decoder) consumeBoolToken(b bool, size int) Token {
314
pos: len(d.orig) - len(d.in),
321
// consumeStringToken constructs a Token for a String kind with raw value derived
322
// from current d.in and given size.
323
func (d *Decoder) consumeStringToken(s string, size int) Token {
327
pos: len(d.orig) - len(d.in),
334
// Clone returns a copy of the Decoder for use in reading ahead the next JSON
335
// object, array or other values without affecting current Decoder.
336
func (d *Decoder) Clone() *Decoder {
338
ret.openStack = append([]Kind(nil), ret.openStack...)