1/*
2Copyright 2015 The Kubernetes Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17package jsonpath
18
19import (
20	"errors"
21	"fmt"
22	"regexp"
23	"strconv"
24	"strings"
25	"unicode"
26	"unicode/utf8"
27)
28
29const eof = -1
30
31const (
32	leftDelim  = "{"
33	rightDelim = "}"
34)
35
36type Parser struct {
37	Name  string
38	Root  *ListNode
39	input string
40	cur   *ListNode
41	pos   int
42	start int
43	width int
44}
45
46var (
47	ErrSyntax        = errors.New("invalid syntax")
48	dictKeyRex       = regexp.MustCompile(`^'([^']*)'$`)
49	sliceOperatorRex = regexp.MustCompile(`^(-?[\d]*)(:-?[\d]*)?(:[\d]*)?$`)
50)
51
52// Parse parsed the given text and return a node Parser.
53// If an error is encountered, parsing stops and an empty
54// Parser is returned with the error
55func Parse(name, text string) (*Parser, error) {
56	p := NewParser(name)
57	err := p.Parse(text)
58	if err != nil {
59		p = nil
60	}
61	return p, err
62}
63
64func NewParser(name string) *Parser {
65	return &Parser{
66		Name: name,
67	}
68}
69
70// parseAction parsed the expression inside delimiter
71func parseAction(name, text string) (*Parser, error) {
72	p, err := Parse(name, fmt.Sprintf("%s%s%s", leftDelim, text, rightDelim))
73	// when error happens, p will be nil, so we need to return here
74	if err != nil {
75		return p, err
76	}
77	p.Root = p.Root.Nodes[0].(*ListNode)
78	return p, nil
79}
80
81func (p *Parser) Parse(text string) error {
82	p.input = text
83	p.Root = newList()
84	p.pos = 0
85	return p.parseText(p.Root)
86}
87
88// consumeText return the parsed text since last cosumeText
89func (p *Parser) consumeText() string {
90	value := p.input[p.start:p.pos]
91	p.start = p.pos
92	return value
93}
94
95// next returns the next rune in the input.
96func (p *Parser) next() rune {
97	if int(p.pos) >= len(p.input) {
98		p.width = 0
99		return eof
100	}
101	r, w := utf8.DecodeRuneInString(p.input[p.pos:])
102	p.width = w
103	p.pos += p.width
104	return r
105}
106
107// peek returns but does not consume the next rune in the input.
108func (p *Parser) peek() rune {
109	r := p.next()
110	p.backup()
111	return r
112}
113
114// backup steps back one rune. Can only be called once per call of next.
115func (p *Parser) backup() {
116	p.pos -= p.width
117}
118
119func (p *Parser) parseText(cur *ListNode) error {
120	for {
121		if strings.HasPrefix(p.input[p.pos:], leftDelim) {
122			if p.pos > p.start {
123				cur.append(newText(p.consumeText()))
124			}
125			return p.parseLeftDelim(cur)
126		}
127		if p.next() == eof {
128			break
129		}
130	}
131	// Correctly reached EOF.
132	if p.pos > p.start {
133		cur.append(newText(p.consumeText()))
134	}
135	return nil
136}
137
138// parseLeftDelim scans the left delimiter, which is known to be present.
139func (p *Parser) parseLeftDelim(cur *ListNode) error {
140	p.pos += len(leftDelim)
141	p.consumeText()
142	newNode := newList()
143	cur.append(newNode)
144	cur = newNode
145	return p.parseInsideAction(cur)
146}
147
148func (p *Parser) parseInsideAction(cur *ListNode) error {
149	prefixMap := map[string]func(*ListNode) error{
150		rightDelim: p.parseRightDelim,
151		"[?(":      p.parseFilter,
152		"..":       p.parseRecursive,
153	}
154	for prefix, parseFunc := range prefixMap {
155		if strings.HasPrefix(p.input[p.pos:], prefix) {
156			return parseFunc(cur)
157		}
158	}
159
160	switch r := p.next(); {
161	case r == eof || isEndOfLine(r):
162		return fmt.Errorf("unclosed action")
163	case r == ' ':
164		p.consumeText()
165	case r == '@' || r == '$': //the current object, just pass it
166		p.consumeText()
167	case r == '[':
168		return p.parseArray(cur)
169	case r == '"' || r == '\'':
170		return p.parseQuote(cur, r)
171	case r == '.':
172		return p.parseField(cur)
173	case r == '+' || r == '-' || unicode.IsDigit(r):
174		p.backup()
175		return p.parseNumber(cur)
176	case isAlphaNumeric(r):
177		p.backup()
178		return p.parseIdentifier(cur)
179	default:
180		return fmt.Errorf("unrecognized character in action: %#U", r)
181	}
182	return p.parseInsideAction(cur)
183}
184
185// parseRightDelim scans the right delimiter, which is known to be present.
186func (p *Parser) parseRightDelim(cur *ListNode) error {
187	p.pos += len(rightDelim)
188	p.consumeText()
189	cur = p.Root
190	return p.parseText(cur)
191}
192
193// parseIdentifier scans build-in keywords, like "range" "end"
194func (p *Parser) parseIdentifier(cur *ListNode) error {
195	var r rune
196	for {
197		r = p.next()
198		if isTerminator(r) {
199			p.backup()
200			break
201		}
202	}
203	value := p.consumeText()
204
205	if isBool(value) {
206		v, err := strconv.ParseBool(value)
207		if err != nil {
208			return fmt.Errorf("can not parse bool '%s': %s", value, err.Error())
209		}
210
211		cur.append(newBool(v))
212	} else {
213		cur.append(newIdentifier(value))
214	}
215
216	return p.parseInsideAction(cur)
217}
218
219// parseRecursive scans the recursive desent operator ..
220func (p *Parser) parseRecursive(cur *ListNode) error {
221	p.pos += len("..")
222	p.consumeText()
223	cur.append(newRecursive())
224	if r := p.peek(); isAlphaNumeric(r) {
225		return p.parseField(cur)
226	}
227	return p.parseInsideAction(cur)
228}
229
230// parseNumber scans number
231func (p *Parser) parseNumber(cur *ListNode) error {
232	r := p.peek()
233	if r == '+' || r == '-' {
234		r = p.next()
235	}
236	for {
237		r = p.next()
238		if r != '.' && !unicode.IsDigit(r) {
239			p.backup()
240			break
241		}
242	}
243	value := p.consumeText()
244	i, err := strconv.Atoi(value)
245	if err == nil {
246		cur.append(newInt(i))
247		return p.parseInsideAction(cur)
248	}
249	d, err := strconv.ParseFloat(value, 64)
250	if err == nil {
251		cur.append(newFloat(d))
252		return p.parseInsideAction(cur)
253	}
254	return fmt.Errorf("cannot parse number %s", value)
255}
256
257// parseArray scans array index selection
258func (p *Parser) parseArray(cur *ListNode) error {
259Loop:
260	for {
261		switch p.next() {
262		case eof, '\n':
263			return fmt.Errorf("unterminated array")
264		case ']':
265			break Loop
266		}
267	}
268	text := p.consumeText()
269	text = string(text[1 : len(text)-1])
270	if text == "*" {
271		text = ":"
272	}
273
274	//union operator
275	strs := strings.Split(text, ",")
276	if len(strs) > 1 {
277		union := []*ListNode{}
278		for _, str := range strs {
279			parser, err := parseAction("union", fmt.Sprintf("[%s]", strings.Trim(str, " ")))
280			if err != nil {
281				return err
282			}
283			union = append(union, parser.Root)
284		}
285		cur.append(newUnion(union))
286		return p.parseInsideAction(cur)
287	}
288
289	// dict key
290	value := dictKeyRex.FindStringSubmatch(text)
291	if value != nil {
292		parser, err := parseAction("arraydict", fmt.Sprintf(".%s", value[1]))
293		if err != nil {
294			return err
295		}
296		for _, node := range parser.Root.Nodes {
297			cur.append(node)
298		}
299		return p.parseInsideAction(cur)
300	}
301
302	//slice operator
303	value = sliceOperatorRex.FindStringSubmatch(text)
304	if value == nil {
305		return fmt.Errorf("invalid array index %s", text)
306	}
307	value = value[1:]
308	params := [3]ParamsEntry{}
309	for i := 0; i < 3; i++ {
310		if value[i] != "" {
311			if i > 0 {
312				value[i] = value[i][1:]
313			}
314			if i > 0 && value[i] == "" {
315				params[i].Known = false
316			} else {
317				var err error
318				params[i].Known = true
319				params[i].Value, err = strconv.Atoi(value[i])
320				if err != nil {
321					return fmt.Errorf("array index %s is not a number", value[i])
322				}
323			}
324		} else {
325			if i == 1 {
326				params[i].Known = true
327				params[i].Value = params[0].Value + 1
328			} else {
329				params[i].Known = false
330				params[i].Value = 0
331			}
332		}
333	}
334	cur.append(newArray(params))
335	return p.parseInsideAction(cur)
336}
337
338// parseFilter scans filter inside array selection
339func (p *Parser) parseFilter(cur *ListNode) error {
340	p.pos += len("[?(")
341	p.consumeText()
342	begin := false
343	end := false
344	var pair rune
345
346Loop:
347	for {
348		r := p.next()
349		switch r {
350		case eof, '\n':
351			return fmt.Errorf("unterminated filter")
352		case '"', '\'':
353			if begin == false {
354				//save the paired rune
355				begin = true
356				pair = r
357				continue
358			}
359			//only add when met paired rune
360			if p.input[p.pos-2] != '\\' && r == pair {
361				end = true
362			}
363		case ')':
364			//in rightParser below quotes only appear zero or once
365			//and must be paired at the beginning and end
366			if begin == end {
367				break Loop
368			}
369		}
370	}
371	if p.next() != ']' {
372		return fmt.Errorf("unclosed array expect ]")
373	}
374	reg := regexp.MustCompile(`^([^!<>=]+)([!<>=]+)(.+?)$`)
375	text := p.consumeText()
376	text = string(text[:len(text)-2])
377	value := reg.FindStringSubmatch(text)
378	if value == nil {
379		parser, err := parseAction("text", text)
380		if err != nil {
381			return err
382		}
383		cur.append(newFilter(parser.Root, newList(), "exists"))
384	} else {
385		leftParser, err := parseAction("left", value[1])
386		if err != nil {
387			return err
388		}
389		rightParser, err := parseAction("right", value[3])
390		if err != nil {
391			return err
392		}
393		cur.append(newFilter(leftParser.Root, rightParser.Root, value[2]))
394	}
395	return p.parseInsideAction(cur)
396}
397
398// parseQuote unquotes string inside double or single quote
399func (p *Parser) parseQuote(cur *ListNode, end rune) error {
400Loop:
401	for {
402		switch p.next() {
403		case eof, '\n':
404			return fmt.Errorf("unterminated quoted string")
405		case end:
406			//if it's not escape break the Loop
407			if p.input[p.pos-2] != '\\' {
408				break Loop
409			}
410		}
411	}
412	value := p.consumeText()
413	s, err := UnquoteExtend(value)
414	if err != nil {
415		return fmt.Errorf("unquote string %s error %v", value, err)
416	}
417	cur.append(newText(s))
418	return p.parseInsideAction(cur)
419}
420
421// parseField scans a field until a terminator
422func (p *Parser) parseField(cur *ListNode) error {
423	p.consumeText()
424	for p.advance() {
425	}
426	value := p.consumeText()
427	if value == "*" {
428		cur.append(newWildcard())
429	} else {
430		cur.append(newField(strings.Replace(value, "\\", "", -1)))
431	}
432	return p.parseInsideAction(cur)
433}
434
435// advance scans until next non-escaped terminator
436func (p *Parser) advance() bool {
437	r := p.next()
438	if r == '\\' {
439		p.next()
440	} else if isTerminator(r) {
441		p.backup()
442		return false
443	}
444	return true
445}
446
447// isTerminator reports whether the input is at valid termination character to appear after an identifier.
448func isTerminator(r rune) bool {
449	if isSpace(r) || isEndOfLine(r) {
450		return true
451	}
452	switch r {
453	case eof, '.', ',', '[', ']', '$', '@', '{', '}':
454		return true
455	}
456	return false
457}
458
459// isSpace reports whether r is a space character.
460func isSpace(r rune) bool {
461	return r == ' ' || r == '\t'
462}
463
464// isEndOfLine reports whether r is an end-of-line character.
465func isEndOfLine(r rune) bool {
466	return r == '\r' || r == '\n'
467}
468
469// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
470func isAlphaNumeric(r rune) bool {
471	return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
472}
473
474// isBool reports whether s is a boolean value.
475func isBool(s string) bool {
476	return s == "true" || s == "false"
477}
478
479//UnquoteExtend is almost same as strconv.Unquote(), but it support parse single quotes as a string
480func UnquoteExtend(s string) (string, error) {
481	n := len(s)
482	if n < 2 {
483		return "", ErrSyntax
484	}
485	quote := s[0]
486	if quote != s[n-1] {
487		return "", ErrSyntax
488	}
489	s = s[1 : n-1]
490
491	if quote != '"' && quote != '\'' {
492		return "", ErrSyntax
493	}
494
495	// Is it trivial?  Avoid allocation.
496	if !contains(s, '\\') && !contains(s, quote) {
497		return s, nil
498	}
499
500	var runeTmp [utf8.UTFMax]byte
501	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
502	for len(s) > 0 {
503		c, multibyte, ss, err := strconv.UnquoteChar(s, quote)
504		if err != nil {
505			return "", err
506		}
507		s = ss
508		if c < utf8.RuneSelf || !multibyte {
509			buf = append(buf, byte(c))
510		} else {
511			n := utf8.EncodeRune(runeTmp[:], c)
512			buf = append(buf, runeTmp[:n]...)
513		}
514	}
515	return string(buf), nil
516}
517
518func contains(s string, c byte) bool {
519	for i := 0; i < len(s); i++ {
520		if s[i] == c {
521			return true
522		}
523	}
524	return false
525}
526