1// Copyright (c) 2020, Peter Ohler, All rights reserved.
2
3package gen
4
5import (
6	"bytes"
7	"fmt"
8	"io"
9	"math"
10	"unicode/utf8"
11)
12
13const (
14	stackInitSize = 32 // for container stack { or [
15	tmpInitSize   = 32 // for tokens and numbers
16	mapInitSize   = 8
17	readBufSize   = 4096
18)
19
20// Parser is a reusable JSON parser. It can be reused for multiple parsings
21// which allows buffer reuse for a performance advantage.
22type Parser struct {
23	tmp        []byte // used for numbers and strings
24	runeBytes  []byte
25	stack      []Node
26	starts     []int
27	maps       []Object
28	cb         func(Node) bool
29	resultChan chan Node
30	line       int
31	noff       int // Offset of last newline from start of buf. Can be negative when using a reader.
32	ri         int // read index for null, false, and true
33	mi         int
34	num        Number
35	rn         rune
36	result     Node
37	mode       string
38	nextMode   string
39
40	// OnlyOne returns an error if more than one JSON is in the string or stream.
41	OnlyOne bool
42
43	// Reuse maps. Previously returned maps will no longer be valid or rather
44	// could be modified during parsing.
45	Reuse bool
46}
47
48// Parse a JSON string in to simple types. An error is returned if not valid JSON.
49func (p *Parser) Parse(buf []byte, args ...interface{}) (Node, error) {
50	for _, a := range args {
51		switch ta := a.(type) {
52		case func(Node) bool:
53			p.cb = ta
54			p.OnlyOne = false
55		case chan Node:
56			p.resultChan = ta
57			p.OnlyOne = false
58			p.Reuse = false
59		default:
60			return nil, fmt.Errorf("a %T is not a valid option type", a)
61		}
62	}
63	if p.cb == nil && p.resultChan == nil {
64		p.OnlyOne = true
65	}
66	if p.stack == nil {
67		p.stack = make([]Node, 0, stackInitSize)
68		p.tmp = make([]byte, 0, tmpInitSize)
69		p.starts = make([]int, 0, 16)
70		p.maps = make([]Object, 0, 16)
71	} else {
72		p.stack = p.stack[:0]
73		p.tmp = p.tmp[:0]
74		p.starts = p.starts[:0]
75	}
76	p.result = nil
77	p.noff = -1
78	p.line = 1
79	p.mode = valueMap
80	p.mi = 0
81	var err error
82	// Skip BOM if present.
83	if 3 < len(buf) && buf[0] == 0xEF {
84		if buf[1] == 0xBB && buf[2] == 0xBF {
85			err = p.parseBuffer(buf[3:], true)
86		} else {
87			return nil, fmt.Errorf("expected BOM at 1:3")
88		}
89	} else {
90		err = p.parseBuffer(buf, true)
91	}
92	p.stack = p.stack[:cap(p.stack)]
93	for i := len(p.stack) - 1; 0 <= i; i-- {
94		p.stack[i] = nil
95	}
96	p.stack = p.stack[:0]
97
98	return p.result, err
99}
100
101// ParseReader a JSON io.Reader. An error is returned if not valid JSON.
102func (p *Parser) ParseReader(r io.Reader, args ...interface{}) (data Node, err error) {
103	for _, a := range args {
104		switch ta := a.(type) {
105		case func(Node) bool:
106			p.cb = ta
107			p.OnlyOne = false
108		case chan Node:
109			p.resultChan = ta
110			p.OnlyOne = false
111			p.Reuse = false
112		default:
113			return nil, fmt.Errorf("a %T is not a valid option type", a)
114		}
115	}
116	if p.cb == nil && p.resultChan == nil {
117		p.OnlyOne = true
118	}
119	if p.stack == nil {
120		p.stack = make([]Node, 0, stackInitSize)
121		p.tmp = make([]byte, 0, tmpInitSize)
122		p.starts = make([]int, 0, 16)
123		p.maps = make([]Object, 0, 16)
124	} else {
125		p.stack = p.stack[:0]
126		p.tmp = p.tmp[:0]
127		p.starts = p.starts[:0]
128	}
129	p.result = nil
130	p.noff = -1
131	p.line = 1
132	p.mi = 0
133	buf := make([]byte, readBufSize)
134	eof := false
135	var cnt int
136	cnt, err = r.Read(buf)
137	buf = buf[:cnt]
138	p.mode = valueMap
139	if err != nil {
140		if err != io.EOF {
141			return
142		}
143		eof = true
144	}
145	var skip int
146	// Skip BOM if present.
147	if 3 < len(buf) && buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF {
148		skip = 3
149	}
150	for {
151		if 0 < skip {
152			err = p.parseBuffer(buf[skip:], eof)
153		} else {
154			err = p.parseBuffer(buf, eof)
155		}
156		if err != nil {
157			p.stack = p.stack[:cap(p.stack)]
158			for i := len(p.stack) - 1; 0 <= i; i-- {
159				p.stack[i] = nil
160			}
161			p.stack = p.stack[:0]
162
163			return
164		}
165		skip = 0
166		if eof {
167			break
168		}
169		buf = buf[:cap(buf)]
170		cnt, err = r.Read(buf)
171		buf = buf[:cnt]
172		if err != nil {
173			if err != io.EOF {
174				return
175			}
176			eof = true
177		}
178	}
179	data = p.result
180
181	return
182}
183
184func (p *Parser) parseBuffer(buf []byte, last bool) error {
185	var b byte
186	var i int
187	var off int
188	depth := len(p.starts)
189	for off = 0; off < len(buf); off++ {
190		b = buf[off]
191		switch p.mode[b] {
192		case skipNewline:
193			p.line++
194			p.noff = off
195			for i, b = range buf[off+1:] {
196				if spaceMap[b] != skipChar {
197					break
198				}
199			}
200			off += i
201			continue
202		case colonColon:
203			p.mode = valueMap
204			continue
205		case skipChar: // skip and continue
206			continue
207		case strOk:
208			p.tmp = append(p.tmp, b)
209		case keyQuote:
210			start := off + 1
211			if len(buf) <= start {
212				p.tmp = p.tmp[:0]
213				p.mode = stringMap
214				p.nextMode = colonMap
215				continue
216			}
217			for i, b = range buf[off+1:] {
218				if stringMap[b] != strOk {
219					break
220				}
221			}
222			off += i
223			if b == '"' {
224				off++
225				p.stack = append(p.stack, Key(buf[start:off]))
226				p.mode = colonMap
227			} else {
228				p.tmp = p.tmp[:0]
229				p.tmp = append(p.tmp, buf[start:off+1]...)
230				p.mode = stringMap
231				p.nextMode = colonMap
232			}
233			continue
234		case afterComma:
235			if 0 < len(p.starts) && p.starts[len(p.starts)-1] == -1 {
236				p.mode = keyMap
237			} else {
238				p.mode = commaMap
239			}
240			continue
241		case valQuote:
242			start := off + 1
243			if len(buf) <= start {
244				p.tmp = p.tmp[:0]
245				p.mode = stringMap
246				p.nextMode = afterMap
247				continue
248			}
249			for i, b = range buf[off+1:] {
250				if stringMap[b] != strOk {
251					break
252				}
253			}
254			off += i
255			if b == '"' {
256				off++
257				p.add(String(buf[start:off]))
258				p.mode = afterMap
259			} else {
260				p.tmp = p.tmp[:0]
261				p.tmp = append(p.tmp, buf[start:off+1]...)
262				p.mode = stringMap
263				p.nextMode = afterMap
264				continue
265			}
266		case numComma:
267			p.add(p.num.AsNode())
268			if 0 < len(p.starts) && p.starts[len(p.starts)-1] == -1 {
269				p.mode = keyMap
270			} else {
271				p.mode = commaMap
272			}
273		case strSlash:
274			p.mode = escMap
275			continue
276		case escOk:
277			p.tmp = append(p.tmp, escByteMap[b])
278			p.mode = stringMap
279			continue
280		case openObject:
281			p.starts = append(p.starts, -1)
282			p.mode = key1Map
283			var m Object
284			if p.Reuse {
285				if p.mi < len(p.maps) {
286					m = p.maps[p.mi]
287					for k := range m {
288						delete(m, k)
289					}
290				} else {
291					m = make(Object, mapInitSize)
292					p.maps = append(p.maps, m)
293				}
294				p.mi++
295			} else {
296				m = make(Object, mapInitSize)
297			}
298			p.stack = append(p.stack, m)
299			depth++
300			continue
301		case closeObject:
302			depth--
303			if depth < 0 || 0 <= p.starts[depth] {
304				return p.newError(off, "unexpected object close")
305			}
306			if 256 < len(p.mode) && p.mode[256] == 'n' {
307				p.add(p.num.AsNode())
308			}
309			p.starts = p.starts[0:depth]
310			n := p.stack[len(p.stack)-1]
311			p.stack = p.stack[:len(p.stack)-1]
312			p.add(n)
313			p.mode = afterMap
314		case val0:
315			p.mode = zeroMap
316			p.num.Reset()
317		case valDigit:
318			p.num.Reset()
319			p.mode = digitMap
320			p.num.I = uint64(b - '0')
321			for i, b = range buf[off+1:] {
322				if digitMap[b] != numDigit {
323					break
324				}
325				p.num.I = p.num.I*10 + uint64(b-'0')
326				if math.MaxInt64 < p.num.I {
327					p.num.FillBig()
328					break
329				}
330			}
331			if digitMap[b] == numDigit {
332				off++
333			}
334			off += i
335		case valNeg:
336			p.mode = negMap
337			p.num.Reset()
338			p.num.Neg = true
339			continue
340		case escU:
341			p.mode = uMap
342			p.rn = 0
343			p.ri = 0
344			continue
345		case openArray:
346			p.starts = append(p.starts, len(p.stack))
347			p.stack = append(p.stack, EmptyArray)
348			p.mode = valueMap
349			depth++
350			continue
351		case closeArray:
352			depth--
353			if depth < 0 || p.starts[depth] < 0 {
354				return p.newError(off, "unexpected array close")
355			}
356			// Only modes with a close array are value, after, and numbers
357			// which are all over 256 long.
358			if p.mode[256] == 'n' {
359				p.add(p.num.AsNode())
360			}
361			start := p.starts[len(p.starts)-1] + 1
362			p.starts = p.starts[:len(p.starts)-1]
363			size := len(p.stack) - start
364			n := make(Array, size)
365			copy(n, p.stack[start:len(p.stack)])
366			p.stack = p.stack[0 : start-1]
367			p.add(n)
368			p.mode = afterMap
369		case valNull:
370			if off+4 <= len(buf) && string(buf[off:off+4]) == "null" {
371				off += 3
372				p.mode = afterMap
373				p.add(nil)
374			} else {
375				p.mode = nullMap
376				p.ri = 0
377			}
378		case valTrue:
379			if off+4 <= len(buf) && string(buf[off:off+4]) == "true" {
380				off += 3
381				p.mode = afterMap
382				p.add(True)
383			} else {
384				p.mode = trueMap
385				p.ri = 0
386			}
387		case valFalse:
388			if off+5 <= len(buf) && string(buf[off:off+5]) == "false" {
389				off += 4
390				p.mode = afterMap
391				p.add(False)
392			} else {
393				p.mode = falseMap
394				p.ri = 0
395			}
396		case numDot:
397			if 0 < len(p.num.BigBuf) {
398				p.num.BigBuf = append(p.num.BigBuf, b)
399				p.mode = dotMap
400				continue
401			}
402			for i, b = range buf[off+1:] {
403				if digitMap[b] != numDigit {
404					break
405				}
406				p.num.Frac = p.num.Frac*10 + uint64(b-'0')
407				p.num.Div *= 10.0
408				if math.MaxInt64 < p.num.Frac {
409					p.num.FillBig()
410					break
411				}
412			}
413			off += i
414			if digitMap[b] == numDigit {
415				off++
416			}
417			p.mode = fracMap
418		case numFrac:
419			p.num.AddFrac(b)
420			p.mode = fracMap
421		case fracE:
422			if 0 < len(p.num.BigBuf) {
423				p.num.BigBuf = append(p.num.BigBuf, b)
424			}
425			p.mode = expSignMap
426			continue
427		case strQuote:
428			p.mode = p.nextMode
429			if p.mode[':'] == colonColon {
430				p.stack = append(p.stack, Key(p.tmp))
431			} else {
432				p.add(String(p.tmp))
433			}
434		case numZero:
435			p.mode = zeroMap
436		case numDigit:
437			p.num.AddDigit(b)
438		case negDigit:
439			p.num.AddDigit(b)
440			p.mode = digitMap
441		case numSpc:
442			p.add(p.num.AsNode())
443			p.mode = afterMap
444		case numNewline:
445			p.add(p.num.AsNode())
446			p.line++
447			p.noff = off
448			p.mode = afterMap
449			for i, b = range buf[off+1:] {
450				if spaceMap[b] != skipChar {
451					break
452				}
453			}
454			off += i
455		case expSign:
456			p.mode = expZeroMap
457			if b == '-' {
458				p.num.NegExp = true
459			}
460			continue
461		case expDigit:
462			p.num.AddExp(b)
463			p.mode = expMap
464		case uOk:
465			p.ri++
466			switch b {
467			case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
468				p.rn = p.rn<<4 | rune(b-'0')
469			case 'a', 'b', 'c', 'd', 'e', 'f':
470				p.rn = p.rn<<4 | rune(b-'a'+10)
471			case 'A', 'B', 'C', 'D', 'E', 'F':
472				p.rn = p.rn<<4 | rune(b-'A'+10)
473			}
474			if p.ri == 4 {
475				if len(p.runeBytes) < 6 {
476					p.runeBytes = make([]byte, 6)
477				}
478				n := utf8.EncodeRune(p.runeBytes, p.rn)
479				p.tmp = append(p.tmp, p.runeBytes[:n]...)
480				p.mode = stringMap
481			}
482			continue
483		case tokenOk:
484			switch {
485			case p.mode['r'] == tokenOk:
486				p.ri++
487				if "true"[p.ri] != b {
488					return p.newError(off, "expected true")
489				}
490				if 3 <= p.ri {
491					p.add(True)
492					p.mode = afterMap
493				}
494			case p.mode['a'] == tokenOk:
495				p.ri++
496				if "false"[p.ri] != b {
497					return p.newError(off, "expected false")
498				}
499				if 4 <= p.ri {
500					p.add(False)
501					p.mode = afterMap
502				}
503			case p.mode['u'] == tokenOk && p.mode['l'] == tokenOk:
504				p.ri++
505				if "null"[p.ri] != b {
506					return p.newError(off, "expected null")
507				}
508				if 3 <= p.ri {
509					p.add(nil)
510					p.mode = afterMap
511				}
512			}
513		case charErr:
514			return p.byteError(off, p.mode, b, bytes.Runes(buf[off:])[0])
515		}
516		if depth == 0 && 256 < len(p.mode) && p.mode[256] == 'a' {
517			if p.cb == nil && p.resultChan == nil {
518				p.result = p.stack[0]
519			} else {
520				if p.cb != nil {
521					p.cb(p.stack[0])
522				}
523				if p.resultChan != nil {
524					p.resultChan <- p.stack[0]
525				}
526			}
527			p.stack = p.stack[:0]
528			p.mi = 0
529			if p.OnlyOne {
530				p.mode = spaceMap
531			} else {
532				p.mode = valueMap
533			}
534		}
535	}
536	if last {
537		if len(p.mode) == 256 { // valid finishing maps are one byte longer
538			return p.newError(off, "incomplete JSON")
539		}
540		if p.mode[256] == 'n' {
541			p.add(p.num.AsNode())
542			if p.cb == nil && p.resultChan == nil {
543				p.result = p.stack[0]
544			} else {
545				if p.cb != nil {
546					p.cb(p.stack[0])
547				}
548				if p.resultChan != nil {
549					p.resultChan <- p.stack[0]
550				}
551			}
552		}
553	}
554	return nil
555}
556
557func (p *Parser) add(n Node) {
558	if 2 <= len(p.stack) {
559		if k, ok := p.stack[len(p.stack)-1].(Key); ok {
560			obj, _ := p.stack[len(p.stack)-2].(Object)
561			obj[string(k)] = n
562			p.stack = p.stack[0 : len(p.stack)-1]
563
564			return
565		}
566	}
567	p.stack = append(p.stack, n)
568}
569
570func (p *Parser) newError(off int, format string, args ...interface{}) error {
571	return &ParseError{
572		Message: fmt.Sprintf(format, args...),
573		Line:    p.line,
574		Column:  off - p.noff,
575	}
576}
577
578func (p *Parser) byteError(off int, mode string, b byte, r rune) error {
579	err := &ParseError{
580		Line:   p.line,
581		Column: off - p.noff,
582	}
583	switch mode {
584	case nullMap:
585		err.Message = "expected null"
586	case trueMap:
587		err.Message = "expected true"
588	case falseMap:
589		err.Message = "expected false"
590	case afterMap:
591		err.Message = fmt.Sprintf("expected a comma or close, not '%c'", r)
592	case key1Map:
593		err.Message = fmt.Sprintf("expected a string start or object close, not '%c'", r)
594	case keyMap:
595		err.Message = fmt.Sprintf("expected a string start, not '%c'", r)
596	case colonMap:
597		err.Message = fmt.Sprintf("expected a colon, not '%c'", r)
598	case negMap, zeroMap, digitMap, dotMap, fracMap, expSignMap, expZeroMap, expMap:
599		err.Message = "invalid number"
600	case stringMap:
601		err.Message = fmt.Sprintf("invalid JSON character 0x%02x", b)
602	case escMap:
603		err.Message = fmt.Sprintf("invalid JSON escape character '\\%c'", r)
604	case uMap:
605		err.Message = fmt.Sprintf("invalid JSON unicode character '%c'", r)
606	case spaceMap:
607		err.Message = fmt.Sprintf("extra characters after close, '%c'", r)
608	default:
609		err.Message = fmt.Sprintf("unexpected character '%c'", r)
610	}
611	return err
612}
613