1// Copyright (c) 2009 The Go Authors. All rights reserved.
2//
3// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are
5// met:
6//
7//    * Redistributions of source code must retain the above copyright
8// notice, this list of conditions and the following disclaimer.
9//    * Redistributions in binary form must reproduce the above
10// copyright notice, this list of conditions and the following disclaimer
11// in the documentation and/or other materials provided with the
12// distribution.
13//    * Neither the name of Google Inc. nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29// Package json provides a JSON value parser state machine.
30// This package is almost entirely copied from the Go stdlib.
31// Changes made to it permit users of the package to tell
32// if some slice of bytes is a valid beginning of a json string.
33package json
34
35import "fmt"
36
37type (
38	context    int
39	scanStatus int
40)
41
42const (
43	contextKey context = iota
44	contextObj
45	contextArr
46
47	scanContinue     scanStatus = iota // uninteresting byte
48	scanBeginLiteral                   // end implied by next result != scanContinue
49	scanBeginObject                    // begin object
50	scanObjectKey                      // just finished object key (string)
51	scanObjectValue                    // just finished non-last object value
52	scanEndObject                      // end object (implies scanObjectValue if possible)
53	scanBeginArray                     // begin array
54	scanArrayValue                     // just finished array value
55	scanEndArray                       // end array (implies scanArrayValue if possible)
56	scanSkipSpace                      // space byte; can skip; known to be last "continue" result
57	scanEnd                            // top-level value ended *before* this byte; known to be first "stop" result
58	scanError                          // hit an error, scanner.err.
59)
60
61type (
62	scanner struct {
63		step     func(*scanner, byte) scanStatus
64		contexts []context
65		endTop   bool
66		err      error
67		index    int
68	}
69)
70
71// Scan returns the number of bytes scanned and if there was any error
72// in trying to reach the end of data
73func Scan(data []byte) (int, error) {
74	s := &scanner{}
75	_ = checkValid(data, s)
76	return s.index, s.err
77}
78
79// checkValid verifies that data is valid JSON-encoded data.
80// scan is passed in for use by checkValid to avoid an allocation.
81func checkValid(data []byte, scan *scanner) error {
82	scan.reset()
83	for _, c := range data {
84		scan.index++
85		if scan.step(scan, c) == scanError {
86			return scan.err
87		}
88	}
89	if scan.eof() == scanError {
90		return scan.err
91	}
92	return nil
93}
94
95func isSpace(c byte) bool {
96	return c == ' ' || c == '\t' || c == '\r' || c == '\n'
97}
98
99func (s *scanner) reset() {
100	s.step = stateBeginValue
101	s.contexts = s.contexts[0:0]
102	s.err = nil
103}
104
105// eof tells the scanner that the end of input has been reached.
106// It returns a scan status just as s.step does.
107func (s *scanner) eof() scanStatus {
108	if s.err != nil {
109		return scanError
110	}
111	if s.endTop {
112		return scanEnd
113	}
114	s.step(s, ' ')
115	if s.endTop {
116		return scanEnd
117	}
118	if s.err == nil {
119		s.err = fmt.Errorf("unexpected end of JSON input")
120	}
121	return scanError
122}
123
124// pushContext pushes a new parse state p onto the parse stack.
125func (s *scanner) pushParseState(p context) {
126	s.contexts = append(s.contexts, p)
127}
128
129// popParseState pops a parse state (already obtained) off the stack
130// and updates s.step accordingly.
131func (s *scanner) popParseState() {
132	n := len(s.contexts) - 1
133	s.contexts = s.contexts[0:n]
134	if n == 0 {
135		s.step = stateEndTop
136		s.endTop = true
137	} else {
138		s.step = stateEndValue
139	}
140}
141
142// stateBeginValueOrEmpty is the state after reading `[`.
143func stateBeginValueOrEmpty(s *scanner, c byte) scanStatus {
144	if c <= ' ' && isSpace(c) {
145		return scanSkipSpace
146	}
147	if c == ']' {
148		return stateEndValue(s, c)
149	}
150	return stateBeginValue(s, c)
151}
152
153// stateBeginValue is the state at the beginning of the input.
154func stateBeginValue(s *scanner, c byte) scanStatus {
155	if c <= ' ' && isSpace(c) {
156		return scanSkipSpace
157	}
158	switch c {
159	case '{':
160		s.step = stateBeginStringOrEmpty
161		s.pushParseState(contextKey)
162		return scanBeginObject
163	case '[':
164		s.step = stateBeginValueOrEmpty
165		s.pushParseState(contextArr)
166		return scanBeginArray
167	case '"':
168		s.step = stateInString
169		return scanBeginLiteral
170	case '-':
171		s.step = stateNeg
172		return scanBeginLiteral
173	case '0': // beginning of 0.123
174		s.step = state0
175		return scanBeginLiteral
176	case 't': // beginning of true
177		s.step = stateT
178		return scanBeginLiteral
179	case 'f': // beginning of false
180		s.step = stateF
181		return scanBeginLiteral
182	case 'n': // beginning of null
183		s.step = stateN
184		return scanBeginLiteral
185	}
186	if '1' <= c && c <= '9' { // beginning of 1234.5
187		s.step = state1
188		return scanBeginLiteral
189	}
190	return s.error(c, "looking for beginning of value")
191}
192
193// stateBeginStringOrEmpty is the state after reading `{`.
194func stateBeginStringOrEmpty(s *scanner, c byte) scanStatus {
195	if c <= ' ' && isSpace(c) {
196		return scanSkipSpace
197	}
198	if c == '}' {
199		n := len(s.contexts)
200		s.contexts[n-1] = contextObj
201		return stateEndValue(s, c)
202	}
203	return stateBeginString(s, c)
204}
205
206// stateBeginString is the state after reading `{"key": value,`.
207func stateBeginString(s *scanner, c byte) scanStatus {
208	if c <= ' ' && isSpace(c) {
209		return scanSkipSpace
210	}
211	if c == '"' {
212		s.step = stateInString
213		return scanBeginLiteral
214	}
215	return s.error(c, "looking for beginning of object key string")
216}
217
218// stateEndValue is the state after completing a value,
219// such as after reading `{}` or `true` or `["x"`.
220func stateEndValue(s *scanner, c byte) scanStatus {
221	n := len(s.contexts)
222	if n == 0 {
223		// Completed top-level before the current byte.
224		s.step = stateEndTop
225		s.endTop = true
226		return stateEndTop(s, c)
227	}
228	if c <= ' ' && isSpace(c) {
229		s.step = stateEndValue
230		return scanSkipSpace
231	}
232	ps := s.contexts[n-1]
233	switch ps {
234	case contextKey:
235		if c == ':' {
236			s.contexts[n-1] = contextObj
237			s.step = stateBeginValue
238			return scanObjectKey
239		}
240		return s.error(c, "after object key")
241	case contextObj:
242		if c == ',' {
243			s.contexts[n-1] = contextKey
244			s.step = stateBeginString
245			return scanObjectValue
246		}
247		if c == '}' {
248			s.popParseState()
249			return scanEndObject
250		}
251		return s.error(c, "after object key:value pair")
252	case contextArr:
253		if c == ',' {
254			s.step = stateBeginValue
255			return scanArrayValue
256		}
257		if c == ']' {
258			s.popParseState()
259			return scanEndArray
260		}
261		return s.error(c, "after array element")
262	}
263	return s.error(c, "")
264}
265
266// stateEndTop is the state after finishing the top-level value,
267// such as after reading `{}` or `[1,2,3]`.
268// Only space characters should be seen now.
269func stateEndTop(s *scanner, c byte) scanStatus {
270	if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
271		// Complain about non-space byte on next call.
272		s.error(c, "after top-level value")
273	}
274	return scanEnd
275}
276
277// stateInString is the state after reading `"`.
278func stateInString(s *scanner, c byte) scanStatus {
279	if c == '"' {
280		s.step = stateEndValue
281		return scanContinue
282	}
283	if c == '\\' {
284		s.step = stateInStringEsc
285		return scanContinue
286	}
287	if c < 0x20 {
288		return s.error(c, "in string literal")
289	}
290	return scanContinue
291}
292
293// stateInStringEsc is the state after reading `"\` during a quoted string.
294func stateInStringEsc(s *scanner, c byte) scanStatus {
295	switch c {
296	case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
297		s.step = stateInString
298		return scanContinue
299	case 'u':
300		s.step = stateInStringEscU
301		return scanContinue
302	}
303	return s.error(c, "in string escape code")
304}
305
306// stateInStringEscU is the state after reading `"\u` during a quoted string.
307func stateInStringEscU(s *scanner, c byte) scanStatus {
308	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
309		s.step = stateInStringEscU1
310		return scanContinue
311	}
312	// numbers
313	return s.error(c, "in \\u hexadecimal character escape")
314}
315
316// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
317func stateInStringEscU1(s *scanner, c byte) scanStatus {
318	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
319		s.step = stateInStringEscU12
320		return scanContinue
321	}
322	// numbers
323	return s.error(c, "in \\u hexadecimal character escape")
324}
325
326// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
327func stateInStringEscU12(s *scanner, c byte) scanStatus {
328	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
329		s.step = stateInStringEscU123
330		return scanContinue
331	}
332	// numbers
333	return s.error(c, "in \\u hexadecimal character escape")
334}
335
336// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
337func stateInStringEscU123(s *scanner, c byte) scanStatus {
338	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
339		s.step = stateInString
340		return scanContinue
341	}
342	// numbers
343	return s.error(c, "in \\u hexadecimal character escape")
344}
345
346// stateNeg is the state after reading `-` during a number.
347func stateNeg(s *scanner, c byte) scanStatus {
348	if c == '0' {
349		s.step = state0
350		return scanContinue
351	}
352	if '1' <= c && c <= '9' {
353		s.step = state1
354		return scanContinue
355	}
356	return s.error(c, "in numeric literal")
357}
358
359// state1 is the state after reading a non-zero integer during a number,
360// such as after reading `1` or `100` but not `0`.
361func state1(s *scanner, c byte) scanStatus {
362	if '0' <= c && c <= '9' {
363		s.step = state1
364		return scanContinue
365	}
366	return state0(s, c)
367}
368
369// state0 is the state after reading `0` during a number.
370func state0(s *scanner, c byte) scanStatus {
371	if c == '.' {
372		s.step = stateDot
373		return scanContinue
374	}
375	if c == 'e' || c == 'E' {
376		s.step = stateE
377		return scanContinue
378	}
379	return stateEndValue(s, c)
380}
381
382// stateDot is the state after reading the integer and decimal point in a number,
383// such as after reading `1.`.
384func stateDot(s *scanner, c byte) scanStatus {
385	if '0' <= c && c <= '9' {
386		s.step = stateDot0
387		return scanContinue
388	}
389	return s.error(c, "after decimal point in numeric literal")
390}
391
392// stateDot0 is the state after reading the integer, decimal point, and subsequent
393// digits of a number, such as after reading `3.14`.
394func stateDot0(s *scanner, c byte) scanStatus {
395	if '0' <= c && c <= '9' {
396		return scanContinue
397	}
398	if c == 'e' || c == 'E' {
399		s.step = stateE
400		return scanContinue
401	}
402	return stateEndValue(s, c)
403}
404
405// stateE is the state after reading the mantissa and e in a number,
406// such as after reading `314e` or `0.314e`.
407func stateE(s *scanner, c byte) scanStatus {
408	if c == '+' || c == '-' {
409		s.step = stateESign
410		return scanContinue
411	}
412	return stateESign(s, c)
413}
414
415// stateESign is the state after reading the mantissa, e, and sign in a number,
416// such as after reading `314e-` or `0.314e+`.
417func stateESign(s *scanner, c byte) scanStatus {
418	if '0' <= c && c <= '9' {
419		s.step = stateE0
420		return scanContinue
421	}
422	return s.error(c, "in exponent of numeric literal")
423}
424
425// stateE0 is the state after reading the mantissa, e, optional sign,
426// and at least one digit of the exponent in a number,
427// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
428func stateE0(s *scanner, c byte) scanStatus {
429	if '0' <= c && c <= '9' {
430		return scanContinue
431	}
432	return stateEndValue(s, c)
433}
434
435// stateT is the state after reading `t`.
436func stateT(s *scanner, c byte) scanStatus {
437	if c == 'r' {
438		s.step = stateTr
439		return scanContinue
440	}
441	return s.error(c, "in literal true (expecting 'r')")
442}
443
444// stateTr is the state after reading `tr`.
445func stateTr(s *scanner, c byte) scanStatus {
446	if c == 'u' {
447		s.step = stateTru
448		return scanContinue
449	}
450	return s.error(c, "in literal true (expecting 'u')")
451}
452
453// stateTru is the state after reading `tru`.
454func stateTru(s *scanner, c byte) scanStatus {
455	if c == 'e' {
456		s.step = stateEndValue
457		return scanContinue
458	}
459	return s.error(c, "in literal true (expecting 'e')")
460}
461
462// stateF is the state after reading `f`.
463func stateF(s *scanner, c byte) scanStatus {
464	if c == 'a' {
465		s.step = stateFa
466		return scanContinue
467	}
468	return s.error(c, "in literal false (expecting 'a')")
469}
470
471// stateFa is the state after reading `fa`.
472func stateFa(s *scanner, c byte) scanStatus {
473	if c == 'l' {
474		s.step = stateFal
475		return scanContinue
476	}
477	return s.error(c, "in literal false (expecting 'l')")
478}
479
480// stateFal is the state after reading `fal`.
481func stateFal(s *scanner, c byte) scanStatus {
482	if c == 's' {
483		s.step = stateFals
484		return scanContinue
485	}
486	return s.error(c, "in literal false (expecting 's')")
487}
488
489// stateFals is the state after reading `fals`.
490func stateFals(s *scanner, c byte) scanStatus {
491	if c == 'e' {
492		s.step = stateEndValue
493		return scanContinue
494	}
495	return s.error(c, "in literal false (expecting 'e')")
496}
497
498// stateN is the state after reading `n`.
499func stateN(s *scanner, c byte) scanStatus {
500	if c == 'u' {
501		s.step = stateNu
502		return scanContinue
503	}
504	return s.error(c, "in literal null (expecting 'u')")
505}
506
507// stateNu is the state after reading `nu`.
508func stateNu(s *scanner, c byte) scanStatus {
509	if c == 'l' {
510		s.step = stateNul
511		return scanContinue
512	}
513	return s.error(c, "in literal null (expecting 'l')")
514}
515
516// stateNul is the state after reading `nul`.
517func stateNul(s *scanner, c byte) scanStatus {
518	if c == 'l' {
519		s.step = stateEndValue
520		return scanContinue
521	}
522	return s.error(c, "in literal null (expecting 'l')")
523}
524
525// stateError is the state after reaching a syntax error,
526// such as after reading `[1}` or `5.1.2`.
527func stateError(s *scanner, c byte) scanStatus {
528	return scanError
529}
530
531// error records an error and switches to the error state.
532func (s *scanner) error(c byte, context string) scanStatus {
533	s.step = stateError
534	s.err = fmt.Errorf("invalid character <<%c>> %s", c, context)
535	return scanError
536}
537