1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8	"bytes"
9	"fmt"
10	"io"
11	"regexp"
12	"unicode/utf8"
13
14	"google.golang.org/protobuf/internal/errors"
15)
16
17// call specifies which Decoder method was invoked.
18type call uint8
19
20const (
21	readCall call = iota
22	peekCall
23)
24
25const unexpectedFmt = "unexpected token %s"
26
27// ErrUnexpectedEOF means that EOF was encountered in the middle of the input.
28var ErrUnexpectedEOF = errors.New("%v", io.ErrUnexpectedEOF)
29
30// Decoder is a token-based JSON decoder.
31type Decoder struct {
32	// lastCall is last method called, either readCall or peekCall.
33	// Initial value is readCall.
34	lastCall call
35
36	// lastToken contains the last read token.
37	lastToken Token
38
39	// lastErr contains the last read error.
40	lastErr error
41
42	// openStack is a stack containing ObjectOpen and ArrayOpen values. The
43	// top of stack represents the object or the array the current value is
44	// directly located in.
45	openStack []Kind
46
47	// orig is used in reporting line and column.
48	orig []byte
49	// in contains the unconsumed input.
50	in []byte
51}
52
53// NewDecoder returns a Decoder to read the given []byte.
54func NewDecoder(b []byte) *Decoder {
55	return &Decoder{orig: b, in: b}
56}
57
58// Peek looks ahead and returns the next token kind without advancing a read.
59func (d *Decoder) Peek() (Token, error) {
60	defer func() { d.lastCall = peekCall }()
61	if d.lastCall == readCall {
62		d.lastToken, d.lastErr = d.Read()
63	}
64	return d.lastToken, d.lastErr
65}
66
67// Read returns the next JSON token.
68// It will return an error if there is no valid token.
69func (d *Decoder) Read() (Token, error) {
70	const scalar = Null | Bool | Number | String
71
72	defer func() { d.lastCall = readCall }()
73	if d.lastCall == peekCall {
74		return d.lastToken, d.lastErr
75	}
76
77	tok, err := d.parseNext()
78	if err != nil {
79		return Token{}, err
80	}
81
82	switch tok.kind {
83	case EOF:
84		if len(d.openStack) != 0 ||
85			d.lastToken.kind&scalar|ObjectClose|ArrayClose == 0 {
86			return Token{}, ErrUnexpectedEOF
87		}
88
89	case Null:
90		if !d.isValueNext() {
91			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
92		}
93
94	case Bool, Number:
95		if !d.isValueNext() {
96			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
97		}
98
99	case String:
100		if d.isValueNext() {
101			break
102		}
103		// This string token should only be for a field name.
104		if d.lastToken.kind&(ObjectOpen|comma) == 0 {
105			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
106		}
107		if len(d.in) == 0 {
108			return Token{}, ErrUnexpectedEOF
109		}
110		if c := d.in[0]; c != ':' {
111			return Token{}, d.newSyntaxError(d.currPos(), `unexpected character %s, missing ":" after field name`, string(c))
112		}
113		tok.kind = Name
114		d.consume(1)
115
116	case ObjectOpen, ArrayOpen:
117		if !d.isValueNext() {
118			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
119		}
120		d.openStack = append(d.openStack, tok.kind)
121
122	case ObjectClose:
123		if len(d.openStack) == 0 ||
124			d.lastToken.kind == comma ||
125			d.openStack[len(d.openStack)-1] != ObjectOpen {
126			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
127		}
128		d.openStack = d.openStack[:len(d.openStack)-1]
129
130	case ArrayClose:
131		if len(d.openStack) == 0 ||
132			d.lastToken.kind == comma ||
133			d.openStack[len(d.openStack)-1] != ArrayOpen {
134			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
135		}
136		d.openStack = d.openStack[:len(d.openStack)-1]
137
138	case comma:
139		if len(d.openStack) == 0 ||
140			d.lastToken.kind&(scalar|ObjectClose|ArrayClose) == 0 {
141			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
142		}
143	}
144
145	// Update d.lastToken only after validating token to be in the right sequence.
146	d.lastToken = tok
147
148	if d.lastToken.kind == comma {
149		return d.Read()
150	}
151	return tok, nil
152}
153
154// Any sequence that looks like a non-delimiter (for error reporting).
155var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
156
157// parseNext parses for the next JSON token. It returns a Token object for
158// different types, except for Name. It does not handle whether the next token
159// is in a valid sequence or not.
160func (d *Decoder) parseNext() (Token, error) {
161	// Trim leading spaces.
162	d.consume(0)
163
164	in := d.in
165	if len(in) == 0 {
166		return d.consumeToken(EOF, 0), nil
167	}
168
169	switch in[0] {
170	case 'n':
171		if n := matchWithDelim("null", in); n != 0 {
172			return d.consumeToken(Null, n), nil
173		}
174
175	case 't':
176		if n := matchWithDelim("true", in); n != 0 {
177			return d.consumeBoolToken(true, n), nil
178		}
179
180	case 'f':
181		if n := matchWithDelim("false", in); n != 0 {
182			return d.consumeBoolToken(false, n), nil
183		}
184
185	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
186		if n, ok := parseNumber(in); ok {
187			return d.consumeToken(Number, n), nil
188		}
189
190	case '"':
191		s, n, err := d.parseString(in)
192		if err != nil {
193			return Token{}, err
194		}
195		return d.consumeStringToken(s, n), nil
196
197	case '{':
198		return d.consumeToken(ObjectOpen, 1), nil
199
200	case '}':
201		return d.consumeToken(ObjectClose, 1), nil
202
203	case '[':
204		return d.consumeToken(ArrayOpen, 1), nil
205
206	case ']':
207		return d.consumeToken(ArrayClose, 1), nil
208
209	case ',':
210		return d.consumeToken(comma, 1), nil
211	}
212	return Token{}, d.newSyntaxError(d.currPos(), "invalid value %s", errRegexp.Find(in))
213}
214
215// newSyntaxError returns an error with line and column information useful for
216// syntax errors.
217func (d *Decoder) newSyntaxError(pos int, f string, x ...interface{}) error {
218	e := errors.New(f, x...)
219	line, column := d.Position(pos)
220	return errors.New("syntax error (line %d:%d): %v", line, column, e)
221}
222
223// Position returns line and column number of given index of the original input.
224// It will panic if index is out of range.
225func (d *Decoder) Position(idx int) (line int, column int) {
226	b := d.orig[:idx]
227	line = bytes.Count(b, []byte("\n")) + 1
228	if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
229		b = b[i+1:]
230	}
231	column = utf8.RuneCount(b) + 1 // ignore multi-rune characters
232	return line, column
233}
234
235// currPos returns the current index position of d.in from d.orig.
236func (d *Decoder) currPos() int {
237	return len(d.orig) - len(d.in)
238}
239
240// matchWithDelim matches s with the input b and verifies that the match
241// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
242// As a special case, EOF is considered a delimiter. It returns the length of s
243// if there is a match, else 0.
244func matchWithDelim(s string, b []byte) int {
245	if !bytes.HasPrefix(b, []byte(s)) {
246		return 0
247	}
248
249	n := len(s)
250	if n < len(b) && isNotDelim(b[n]) {
251		return 0
252	}
253	return n
254}
255
256// isNotDelim returns true if given byte is a not delimiter character.
257func isNotDelim(c byte) bool {
258	return (c == '-' || c == '+' || c == '.' || c == '_' ||
259		('a' <= c && c <= 'z') ||
260		('A' <= c && c <= 'Z') ||
261		('0' <= c && c <= '9'))
262}
263
264// consume consumes n bytes of input and any subsequent whitespace.
265func (d *Decoder) consume(n int) {
266	d.in = d.in[n:]
267	for len(d.in) > 0 {
268		switch d.in[0] {
269		case ' ', '\n', '\r', '\t':
270			d.in = d.in[1:]
271		default:
272			return
273		}
274	}
275}
276
277// isValueNext returns true if next type should be a JSON value: Null,
278// Number, String or Bool.
279func (d *Decoder) isValueNext() bool {
280	if len(d.openStack) == 0 {
281		return d.lastToken.kind == 0
282	}
283
284	start := d.openStack[len(d.openStack)-1]
285	switch start {
286	case ObjectOpen:
287		return d.lastToken.kind&Name != 0
288	case ArrayOpen:
289		return d.lastToken.kind&(ArrayOpen|comma) != 0
290	}
291	panic(fmt.Sprintf(
292		"unreachable logic in Decoder.isValueNext, lastToken.kind: %v, openStack: %v",
293		d.lastToken.kind, start))
294}
295
296// consumeToken constructs a Token for given Kind with raw value derived from
297// current d.in and given size, and consumes the given size-lenght of it.
298func (d *Decoder) consumeToken(kind Kind, size int) Token {
299	tok := Token{
300		kind: kind,
301		raw:  d.in[:size],
302		pos:  len(d.orig) - len(d.in),
303	}
304	d.consume(size)
305	return tok
306}
307
308// consumeBoolToken constructs a Token for a Bool kind with raw value derived from
309// current d.in and given size.
310func (d *Decoder) consumeBoolToken(b bool, size int) Token {
311	tok := Token{
312		kind: Bool,
313		raw:  d.in[:size],
314		pos:  len(d.orig) - len(d.in),
315		boo:  b,
316	}
317	d.consume(size)
318	return tok
319}
320
321// consumeStringToken constructs a Token for a String kind with raw value derived
322// from current d.in and given size.
323func (d *Decoder) consumeStringToken(s string, size int) Token {
324	tok := Token{
325		kind: String,
326		raw:  d.in[:size],
327		pos:  len(d.orig) - len(d.in),
328		str:  s,
329	}
330	d.consume(size)
331	return tok
332}
333
334// Clone returns a copy of the Decoder for use in reading ahead the next JSON
335// object, array or other values without affecting current Decoder.
336func (d *Decoder) Clone() *Decoder {
337	ret := *d
338	ret.openStack = append([]Kind(nil), ret.openStack...)
339	return &ret
340}
341