1// Copyright 2010 The Go Authors.  All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7// JSON value parser state machine.
8// Just about at the limit of what is reasonable to write by hand.
9// Some parts are a bit tedious, but overall it nicely factors out the
10// otherwise common code from the multiple scanning functions
11// in this package (Compact, Indent, checkValid, nextValue, etc).
12//
13// This file starts with two simple examples using the scanner
14// before diving into the scanner itself.
15
16import "strconv"
17
18// checkValid verifies that data is valid JSON-encoded data.
19// scan is passed in for use by checkValid to avoid an allocation.
20func checkValid(data []byte, scan *scanner) error {
21	scan.reset()
22	for _, c := range data {
23		scan.bytes++
24		if scan.step(scan, c) == scanError {
25			return scan.err
26		}
27	}
28	if scan.eof() == scanError {
29		return scan.err
30	}
31	return nil
32}
33
34// nextValue splits data after the next whole JSON value,
35// returning that value and the bytes that follow it as separate slices.
36// scan is passed in for use by nextValue to avoid an allocation.
37func nextValue(data []byte, scan *scanner) (value, rest []byte, err error) {
38	scan.reset()
39	for i, c := range data {
40		v := scan.step(scan, c)
41		if v >= scanEndObject {
42			switch v {
43			// probe the scanner with a space to determine whether we will
44			// get scanEnd on the next character. Otherwise, if the next character
45			// is not a space, scanEndTop allocates a needless error.
46			case scanEndObject, scanEndArray:
47				if scan.step(scan, ' ') == scanEnd {
48					return data[:i+1], data[i+1:], nil
49				}
50			case scanError:
51				return nil, nil, scan.err
52			case scanEnd:
53				return data[:i], data[i:], nil
54			}
55		}
56	}
57	if scan.eof() == scanError {
58		return nil, nil, scan.err
59	}
60	return data, nil, nil
61}
62
63// A SyntaxError is a description of a JSON syntax error.
64type SyntaxError struct {
65	msg    string // description of error
66	Offset int64  // error occurred after reading Offset bytes
67}
68
69func (e *SyntaxError) Error() string { return e.msg }
70
71// A scanner is a JSON scanning state machine.
72// Callers call scan.reset() and then pass bytes in one at a time
73// by calling scan.step(&scan, c) for each byte.
74// The return value, referred to as an opcode, tells the
75// caller about significant parsing events like beginning
76// and ending literals, objects, and arrays, so that the
77// caller can follow along if it wishes.
78// The return value scanEnd indicates that a single top-level
79// JSON value has been completed, *before* the byte that
80// just got passed in.  (The indication must be delayed in order
81// to recognize the end of numbers: is 123 a whole value or
82// the beginning of 12345e+6?).
83type scanner struct {
84	// The step is a func to be called to execute the next transition.
85	// Also tried using an integer constant and a single func
86	// with a switch, but using the func directly was 10% faster
87	// on a 64-bit Mac Mini, and it's nicer to read.
88	step func(*scanner, byte) int
89
90	// Reached end of top-level value.
91	endTop bool
92
93	// Stack of what we're in the middle of - array values, object keys, object values.
94	parseState []int
95
96	// Error that happened, if any.
97	err error
98
99	// 1-byte redo (see undo method)
100	redo      bool
101	redoCode  int
102	redoState func(*scanner, byte) int
103
104	// total bytes consumed, updated by decoder.Decode
105	bytes int64
106}
107
108// These values are returned by the state transition functions
109// assigned to scanner.state and the method scanner.eof.
110// They give details about the current state of the scan that
111// callers might be interested to know about.
112// It is okay to ignore the return value of any particular
113// call to scanner.state: if one call returns scanError,
114// every subsequent call will return scanError too.
115const (
116	// Continue.
117	scanContinue     = iota // uninteresting byte
118	scanBeginLiteral        // end implied by next result != scanContinue
119	scanBeginObject         // begin object
120	scanObjectKey           // just finished object key (string)
121	scanObjectValue         // just finished non-last object value
122	scanEndObject           // end object (implies scanObjectValue if possible)
123	scanBeginArray          // begin array
124	scanArrayValue          // just finished array value
125	scanEndArray            // end array (implies scanArrayValue if possible)
126	scanSkipSpace           // space byte; can skip; known to be last "continue" result
127
128	// Stop.
129	scanEnd   // top-level value ended *before* this byte; known to be first "stop" result
130	scanError // hit an error, scanner.err.
131)
132
133// These values are stored in the parseState stack.
134// They give the current state of a composite value
135// being scanned.  If the parser is inside a nested value
136// the parseState describes the nested state, outermost at entry 0.
137const (
138	parseObjectKey   = iota // parsing object key (before colon)
139	parseObjectValue        // parsing object value (after colon)
140	parseArrayValue         // parsing array value
141)
142
143// reset prepares the scanner for use.
144// It must be called before calling s.step.
145func (s *scanner) reset() {
146	s.step = stateBeginValue
147	s.parseState = s.parseState[0:0]
148	s.err = nil
149	s.redo = false
150	s.endTop = false
151}
152
153// eof tells the scanner that the end of input has been reached.
154// It returns a scan status just as s.step does.
155func (s *scanner) eof() int {
156	if s.err != nil {
157		return scanError
158	}
159	if s.endTop {
160		return scanEnd
161	}
162	s.step(s, ' ')
163	if s.endTop {
164		return scanEnd
165	}
166	if s.err == nil {
167		s.err = &SyntaxError{"unexpected end of JSON input", s.bytes}
168	}
169	return scanError
170}
171
172// pushParseState pushes a new parse state p onto the parse stack.
173func (s *scanner) pushParseState(p int) {
174	s.parseState = append(s.parseState, p)
175}
176
177// popParseState pops a parse state (already obtained) off the stack
178// and updates s.step accordingly.
179func (s *scanner) popParseState() {
180	n := len(s.parseState) - 1
181	s.parseState = s.parseState[0:n]
182	s.redo = false
183	if n == 0 {
184		s.step = stateEndTop
185		s.endTop = true
186	} else {
187		s.step = stateEndValue
188	}
189}
190
191func isSpace(c byte) bool {
192	return c == ' ' || c == '\t' || c == '\r' || c == '\n'
193}
194
195// stateBeginValueOrEmpty is the state after reading `[`.
196func stateBeginValueOrEmpty(s *scanner, c byte) int {
197	if c <= ' ' && isSpace(c) {
198		return scanSkipSpace
199	}
200	if c == ']' {
201		return stateEndValue(s, c)
202	}
203	return stateBeginValue(s, c)
204}
205
206// stateBeginValue is the state at the beginning of the input.
207func stateBeginValue(s *scanner, c byte) int {
208	if c <= ' ' && isSpace(c) {
209		return scanSkipSpace
210	}
211	switch c {
212	case '{':
213		s.step = stateBeginStringOrEmpty
214		s.pushParseState(parseObjectKey)
215		return scanBeginObject
216	case '[':
217		s.step = stateBeginValueOrEmpty
218		s.pushParseState(parseArrayValue)
219		return scanBeginArray
220	case '"':
221		s.step = stateInString
222		return scanBeginLiteral
223	case '-':
224		s.step = stateNeg
225		return scanBeginLiteral
226	case '0': // beginning of 0.123
227		s.step = state0
228		return scanBeginLiteral
229	case 't': // beginning of true
230		s.step = stateT
231		return scanBeginLiteral
232	case 'f': // beginning of false
233		s.step = stateF
234		return scanBeginLiteral
235	case 'n': // beginning of null
236		s.step = stateN
237		return scanBeginLiteral
238	}
239	if '1' <= c && c <= '9' { // beginning of 1234.5
240		s.step = state1
241		return scanBeginLiteral
242	}
243	return s.error(c, "looking for beginning of value")
244}
245
246// stateBeginStringOrEmpty is the state after reading `{`.
247func stateBeginStringOrEmpty(s *scanner, c byte) int {
248	if c <= ' ' && isSpace(c) {
249		return scanSkipSpace
250	}
251	if c == '}' {
252		n := len(s.parseState)
253		s.parseState[n-1] = parseObjectValue
254		return stateEndValue(s, c)
255	}
256	return stateBeginString(s, c)
257}
258
259// stateBeginString is the state after reading `{"key": value,`.
260func stateBeginString(s *scanner, c byte) int {
261	if c <= ' ' && isSpace(c) {
262		return scanSkipSpace
263	}
264	if c == '"' {
265		s.step = stateInString
266		return scanBeginLiteral
267	}
268	return s.error(c, "looking for beginning of object key string")
269}
270
271// stateEndValue is the state after completing a value,
272// such as after reading `{}` or `true` or `["x"`.
273func stateEndValue(s *scanner, c byte) int {
274	n := len(s.parseState)
275	if n == 0 {
276		// Completed top-level before the current byte.
277		s.step = stateEndTop
278		s.endTop = true
279		return stateEndTop(s, c)
280	}
281	if c <= ' ' && isSpace(c) {
282		s.step = stateEndValue
283		return scanSkipSpace
284	}
285	ps := s.parseState[n-1]
286	switch ps {
287	case parseObjectKey:
288		if c == ':' {
289			s.parseState[n-1] = parseObjectValue
290			s.step = stateBeginValue
291			return scanObjectKey
292		}
293		return s.error(c, "after object key")
294	case parseObjectValue:
295		if c == ',' {
296			s.parseState[n-1] = parseObjectKey
297			s.step = stateBeginString
298			return scanObjectValue
299		}
300		if c == '}' {
301			s.popParseState()
302			return scanEndObject
303		}
304		return s.error(c, "after object key:value pair")
305	case parseArrayValue:
306		if c == ',' {
307			s.step = stateBeginValue
308			return scanArrayValue
309		}
310		if c == ']' {
311			s.popParseState()
312			return scanEndArray
313		}
314		return s.error(c, "after array element")
315	}
316	return s.error(c, "")
317}
318
319// stateEndTop is the state after finishing the top-level value,
320// such as after reading `{}` or `[1,2,3]`.
321// Only space characters should be seen now.
322func stateEndTop(s *scanner, c byte) int {
323	if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
324		// Complain about non-space byte on next call.
325		s.error(c, "after top-level value")
326	}
327	return scanEnd
328}
329
330// stateInString is the state after reading `"`.
331func stateInString(s *scanner, c byte) int {
332	if c == '"' {
333		s.step = stateEndValue
334		return scanContinue
335	}
336	if c == '\\' {
337		s.step = stateInStringEsc
338		return scanContinue
339	}
340	if c < 0x20 {
341		return s.error(c, "in string literal")
342	}
343	return scanContinue
344}
345
346// stateInStringEsc is the state after reading `"\` during a quoted string.
347func stateInStringEsc(s *scanner, c byte) int {
348	switch c {
349	case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
350		s.step = stateInString
351		return scanContinue
352	case 'u':
353		s.step = stateInStringEscU
354		return scanContinue
355	}
356	return s.error(c, "in string escape code")
357}
358
359// stateInStringEscU is the state after reading `"\u` during a quoted string.
360func stateInStringEscU(s *scanner, c byte) int {
361	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
362		s.step = stateInStringEscU1
363		return scanContinue
364	}
365	// numbers
366	return s.error(c, "in \\u hexadecimal character escape")
367}
368
369// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
370func stateInStringEscU1(s *scanner, c byte) int {
371	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
372		s.step = stateInStringEscU12
373		return scanContinue
374	}
375	// numbers
376	return s.error(c, "in \\u hexadecimal character escape")
377}
378
379// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
380func stateInStringEscU12(s *scanner, c byte) int {
381	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
382		s.step = stateInStringEscU123
383		return scanContinue
384	}
385	// numbers
386	return s.error(c, "in \\u hexadecimal character escape")
387}
388
389// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
390func stateInStringEscU123(s *scanner, c byte) int {
391	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
392		s.step = stateInString
393		return scanContinue
394	}
395	// numbers
396	return s.error(c, "in \\u hexadecimal character escape")
397}
398
399// stateNeg is the state after reading `-` during a number.
400func stateNeg(s *scanner, c byte) int {
401	if c == '0' {
402		s.step = state0
403		return scanContinue
404	}
405	if '1' <= c && c <= '9' {
406		s.step = state1
407		return scanContinue
408	}
409	return s.error(c, "in numeric literal")
410}
411
412// state1 is the state after reading a non-zero integer during a number,
413// such as after reading `1` or `100` but not `0`.
414func state1(s *scanner, c byte) int {
415	if '0' <= c && c <= '9' {
416		s.step = state1
417		return scanContinue
418	}
419	return state0(s, c)
420}
421
422// state0 is the state after reading `0` during a number.
423func state0(s *scanner, c byte) int {
424	if c == '.' {
425		s.step = stateDot
426		return scanContinue
427	}
428	if c == 'e' || c == 'E' {
429		s.step = stateE
430		return scanContinue
431	}
432	return stateEndValue(s, c)
433}
434
435// stateDot is the state after reading the integer and decimal point in a number,
436// such as after reading `1.`.
437func stateDot(s *scanner, c byte) int {
438	if '0' <= c && c <= '9' {
439		s.step = stateDot0
440		return scanContinue
441	}
442	return s.error(c, "after decimal point in numeric literal")
443}
444
445// stateDot0 is the state after reading the integer, decimal point, and subsequent
446// digits of a number, such as after reading `3.14`.
447func stateDot0(s *scanner, c byte) int {
448	if '0' <= c && c <= '9' {
449		return scanContinue
450	}
451	if c == 'e' || c == 'E' {
452		s.step = stateE
453		return scanContinue
454	}
455	return stateEndValue(s, c)
456}
457
458// stateE is the state after reading the mantissa and e in a number,
459// such as after reading `314e` or `0.314e`.
460func stateE(s *scanner, c byte) int {
461	if c == '+' || c == '-' {
462		s.step = stateESign
463		return scanContinue
464	}
465	return stateESign(s, c)
466}
467
468// stateESign is the state after reading the mantissa, e, and sign in a number,
469// such as after reading `314e-` or `0.314e+`.
470func stateESign(s *scanner, c byte) int {
471	if '0' <= c && c <= '9' {
472		s.step = stateE0
473		return scanContinue
474	}
475	return s.error(c, "in exponent of numeric literal")
476}
477
478// stateE0 is the state after reading the mantissa, e, optional sign,
479// and at least one digit of the exponent in a number,
480// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
481func stateE0(s *scanner, c byte) int {
482	if '0' <= c && c <= '9' {
483		return scanContinue
484	}
485	return stateEndValue(s, c)
486}
487
488// stateT is the state after reading `t`.
489func stateT(s *scanner, c byte) int {
490	if c == 'r' {
491		s.step = stateTr
492		return scanContinue
493	}
494	return s.error(c, "in literal true (expecting 'r')")
495}
496
497// stateTr is the state after reading `tr`.
498func stateTr(s *scanner, c byte) int {
499	if c == 'u' {
500		s.step = stateTru
501		return scanContinue
502	}
503	return s.error(c, "in literal true (expecting 'u')")
504}
505
506// stateTru is the state after reading `tru`.
507func stateTru(s *scanner, c byte) int {
508	if c == 'e' {
509		s.step = stateEndValue
510		return scanContinue
511	}
512	return s.error(c, "in literal true (expecting 'e')")
513}
514
515// stateF is the state after reading `f`.
516func stateF(s *scanner, c byte) int {
517	if c == 'a' {
518		s.step = stateFa
519		return scanContinue
520	}
521	return s.error(c, "in literal false (expecting 'a')")
522}
523
524// stateFa is the state after reading `fa`.
525func stateFa(s *scanner, c byte) int {
526	if c == 'l' {
527		s.step = stateFal
528		return scanContinue
529	}
530	return s.error(c, "in literal false (expecting 'l')")
531}
532
533// stateFal is the state after reading `fal`.
534func stateFal(s *scanner, c byte) int {
535	if c == 's' {
536		s.step = stateFals
537		return scanContinue
538	}
539	return s.error(c, "in literal false (expecting 's')")
540}
541
542// stateFals is the state after reading `fals`.
543func stateFals(s *scanner, c byte) int {
544	if c == 'e' {
545		s.step = stateEndValue
546		return scanContinue
547	}
548	return s.error(c, "in literal false (expecting 'e')")
549}
550
551// stateN is the state after reading `n`.
552func stateN(s *scanner, c byte) int {
553	if c == 'u' {
554		s.step = stateNu
555		return scanContinue
556	}
557	return s.error(c, "in literal null (expecting 'u')")
558}
559
560// stateNu is the state after reading `nu`.
561func stateNu(s *scanner, c byte) int {
562	if c == 'l' {
563		s.step = stateNul
564		return scanContinue
565	}
566	return s.error(c, "in literal null (expecting 'l')")
567}
568
569// stateNul is the state after reading `nul`.
570func stateNul(s *scanner, c byte) int {
571	if c == 'l' {
572		s.step = stateEndValue
573		return scanContinue
574	}
575	return s.error(c, "in literal null (expecting 'l')")
576}
577
578// stateError is the state after reaching a syntax error,
579// such as after reading `[1}` or `5.1.2`.
580func stateError(s *scanner, c byte) int {
581	return scanError
582}
583
584// error records an error and switches to the error state.
585func (s *scanner) error(c byte, context string) int {
586	s.step = stateError
587	s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes}
588	return scanError
589}
590
591// quoteChar formats c as a quoted character literal
592func quoteChar(c byte) string {
593	// special cases - different from quoted strings
594	if c == '\'' {
595		return `'\''`
596	}
597	if c == '"' {
598		return `'"'`
599	}
600
601	// use quoted string with different quotation marks
602	s := strconv.Quote(string(c))
603	return "'" + s[1:len(s)-1] + "'"
604}
605
606// undo causes the scanner to return scanCode from the next state transition.
607// This gives callers a simple 1-byte undo mechanism.
608func (s *scanner) undo(scanCode int) {
609	if s.redo {
610		panic("json: invalid use of scanner")
611	}
612	s.redoCode = scanCode
613	s.redoState = s.step
614	s.step = stateRedo
615	s.redo = true
616}
617
618// stateRedo helps implement the scanner's 1-byte undo.
619func stateRedo(s *scanner, c byte) int {
620	s.redo = false
621	s.step = s.redoState
622	return s.redoCode
623}
624