1/*
2Copyright 2015 The Kubernetes Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17package jsonpath
18
19import (
20	"errors"
21	"fmt"
22	"regexp"
23	"strconv"
24	"strings"
25	"unicode"
26	"unicode/utf8"
27)
28
29const eof = -1
30
31const (
32	leftDelim  = "{"
33	rightDelim = "}"
34)
35
36type Parser struct {
37	Name  string
38	Root  *ListNode
39	input string
40	pos   int
41	start int
42	width int
43}
44
45var (
46	ErrSyntax        = errors.New("invalid syntax")
47	dictKeyRex       = regexp.MustCompile(`^'([^']*)'$`)
48	sliceOperatorRex = regexp.MustCompile(`^(-?[\d]*)(:-?[\d]*)?(:-?[\d]*)?$`)
49)
50
51// Parse parsed the given text and return a node Parser.
52// If an error is encountered, parsing stops and an empty
53// Parser is returned with the error
54func Parse(name, text string) (*Parser, error) {
55	p := NewParser(name)
56	err := p.Parse(text)
57	if err != nil {
58		p = nil
59	}
60	return p, err
61}
62
63func NewParser(name string) *Parser {
64	return &Parser{
65		Name: name,
66	}
67}
68
69// parseAction parsed the expression inside delimiter
70func parseAction(name, text string) (*Parser, error) {
71	p, err := Parse(name, fmt.Sprintf("%s%s%s", leftDelim, text, rightDelim))
72	// when error happens, p will be nil, so we need to return here
73	if err != nil {
74		return p, err
75	}
76	p.Root = p.Root.Nodes[0].(*ListNode)
77	return p, nil
78}
79
80func (p *Parser) Parse(text string) error {
81	p.input = text
82	p.Root = newList()
83	p.pos = 0
84	return p.parseText(p.Root)
85}
86
87// consumeText return the parsed text since last cosumeText
88func (p *Parser) consumeText() string {
89	value := p.input[p.start:p.pos]
90	p.start = p.pos
91	return value
92}
93
94// next returns the next rune in the input.
95func (p *Parser) next() rune {
96	if p.pos >= len(p.input) {
97		p.width = 0
98		return eof
99	}
100	r, w := utf8.DecodeRuneInString(p.input[p.pos:])
101	p.width = w
102	p.pos += p.width
103	return r
104}
105
106// peek returns but does not consume the next rune in the input.
107func (p *Parser) peek() rune {
108	r := p.next()
109	p.backup()
110	return r
111}
112
113// backup steps back one rune. Can only be called once per call of next.
114func (p *Parser) backup() {
115	p.pos -= p.width
116}
117
118func (p *Parser) parseText(cur *ListNode) error {
119	for {
120		if strings.HasPrefix(p.input[p.pos:], leftDelim) {
121			if p.pos > p.start {
122				cur.append(newText(p.consumeText()))
123			}
124			return p.parseLeftDelim(cur)
125		}
126		if p.next() == eof {
127			break
128		}
129	}
130	// Correctly reached EOF.
131	if p.pos > p.start {
132		cur.append(newText(p.consumeText()))
133	}
134	return nil
135}
136
137// parseLeftDelim scans the left delimiter, which is known to be present.
138func (p *Parser) parseLeftDelim(cur *ListNode) error {
139	p.pos += len(leftDelim)
140	p.consumeText()
141	newNode := newList()
142	cur.append(newNode)
143	cur = newNode
144	return p.parseInsideAction(cur)
145}
146
147func (p *Parser) parseInsideAction(cur *ListNode) error {
148	prefixMap := map[string]func(*ListNode) error{
149		rightDelim: p.parseRightDelim,
150		"[?(":      p.parseFilter,
151		"..":       p.parseRecursive,
152	}
153	for prefix, parseFunc := range prefixMap {
154		if strings.HasPrefix(p.input[p.pos:], prefix) {
155			return parseFunc(cur)
156		}
157	}
158
159	switch r := p.next(); {
160	case r == eof || isEndOfLine(r):
161		return fmt.Errorf("unclosed action")
162	case r == ' ':
163		p.consumeText()
164	case r == '@' || r == '$': //the current object, just pass it
165		p.consumeText()
166	case r == '[':
167		return p.parseArray(cur)
168	case r == '"' || r == '\'':
169		return p.parseQuote(cur, r)
170	case r == '.':
171		return p.parseField(cur)
172	case r == '+' || r == '-' || unicode.IsDigit(r):
173		p.backup()
174		return p.parseNumber(cur)
175	case isAlphaNumeric(r):
176		p.backup()
177		return p.parseIdentifier(cur)
178	default:
179		return fmt.Errorf("unrecognized character in action: %#U", r)
180	}
181	return p.parseInsideAction(cur)
182}
183
184// parseRightDelim scans the right delimiter, which is known to be present.
185func (p *Parser) parseRightDelim(cur *ListNode) error {
186	p.pos += len(rightDelim)
187	p.consumeText()
188	return p.parseText(p.Root)
189}
190
191// parseIdentifier scans build-in keywords, like "range" "end"
192func (p *Parser) parseIdentifier(cur *ListNode) error {
193	var r rune
194	for {
195		r = p.next()
196		if isTerminator(r) {
197			p.backup()
198			break
199		}
200	}
201	value := p.consumeText()
202
203	if isBool(value) {
204		v, err := strconv.ParseBool(value)
205		if err != nil {
206			return fmt.Errorf("can not parse bool '%s': %s", value, err.Error())
207		}
208
209		cur.append(newBool(v))
210	} else {
211		cur.append(newIdentifier(value))
212	}
213
214	return p.parseInsideAction(cur)
215}
216
217// parseRecursive scans the recursive desent operator ..
218func (p *Parser) parseRecursive(cur *ListNode) error {
219	p.pos += len("..")
220	p.consumeText()
221	cur.append(newRecursive())
222	if r := p.peek(); isAlphaNumeric(r) {
223		return p.parseField(cur)
224	}
225	return p.parseInsideAction(cur)
226}
227
228// parseNumber scans number
229func (p *Parser) parseNumber(cur *ListNode) error {
230	r := p.peek()
231	if r == '+' || r == '-' {
232		p.next()
233	}
234	for {
235		r = p.next()
236		if r != '.' && !unicode.IsDigit(r) {
237			p.backup()
238			break
239		}
240	}
241	value := p.consumeText()
242	i, err := strconv.Atoi(value)
243	if err == nil {
244		cur.append(newInt(i))
245		return p.parseInsideAction(cur)
246	}
247	d, err := strconv.ParseFloat(value, 64)
248	if err == nil {
249		cur.append(newFloat(d))
250		return p.parseInsideAction(cur)
251	}
252	return fmt.Errorf("cannot parse number %s", value)
253}
254
255// parseArray scans array index selection
256func (p *Parser) parseArray(cur *ListNode) error {
257Loop:
258	for {
259		switch p.next() {
260		case eof, '\n':
261			return fmt.Errorf("unterminated array")
262		case ']':
263			break Loop
264		}
265	}
266	text := p.consumeText()
267	text = text[1 : len(text)-1]
268	if text == "*" {
269		text = ":"
270	}
271
272	//union operator
273	strs := strings.Split(text, ",")
274	if len(strs) > 1 {
275		union := []*ListNode{}
276		for _, str := range strs {
277			parser, err := parseAction("union", fmt.Sprintf("[%s]", strings.Trim(str, " ")))
278			if err != nil {
279				return err
280			}
281			union = append(union, parser.Root)
282		}
283		cur.append(newUnion(union))
284		return p.parseInsideAction(cur)
285	}
286
287	// dict key
288	value := dictKeyRex.FindStringSubmatch(text)
289	if value != nil {
290		parser, err := parseAction("arraydict", fmt.Sprintf(".%s", value[1]))
291		if err != nil {
292			return err
293		}
294		for _, node := range parser.Root.Nodes {
295			cur.append(node)
296		}
297		return p.parseInsideAction(cur)
298	}
299
300	//slice operator
301	value = sliceOperatorRex.FindStringSubmatch(text)
302	if value == nil {
303		return fmt.Errorf("invalid array index %s", text)
304	}
305	value = value[1:]
306	params := [3]ParamsEntry{}
307	for i := 0; i < 3; i++ {
308		if value[i] != "" {
309			if i > 0 {
310				value[i] = value[i][1:]
311			}
312			if i > 0 && value[i] == "" {
313				params[i].Known = false
314			} else {
315				var err error
316				params[i].Known = true
317				params[i].Value, err = strconv.Atoi(value[i])
318				if err != nil {
319					return fmt.Errorf("array index %s is not a number", value[i])
320				}
321			}
322		} else {
323			if i == 1 {
324				params[i].Known = true
325				params[i].Value = params[0].Value + 1
326				params[i].Derived = true
327			} else {
328				params[i].Known = false
329				params[i].Value = 0
330			}
331		}
332	}
333	cur.append(newArray(params))
334	return p.parseInsideAction(cur)
335}
336
337// parseFilter scans filter inside array selection
338func (p *Parser) parseFilter(cur *ListNode) error {
339	p.pos += len("[?(")
340	p.consumeText()
341	begin := false
342	end := false
343	var pair rune
344
345Loop:
346	for {
347		r := p.next()
348		switch r {
349		case eof, '\n':
350			return fmt.Errorf("unterminated filter")
351		case '"', '\'':
352			if begin == false {
353				//save the paired rune
354				begin = true
355				pair = r
356				continue
357			}
358			//only add when met paired rune
359			if p.input[p.pos-2] != '\\' && r == pair {
360				end = true
361			}
362		case ')':
363			//in rightParser below quotes only appear zero or once
364			//and must be paired at the beginning and end
365			if begin == end {
366				break Loop
367			}
368		}
369	}
370	if p.next() != ']' {
371		return fmt.Errorf("unclosed array expect ]")
372	}
373	reg := regexp.MustCompile(`^([^!<>=]+)([!<>=]+)(.+?)$`)
374	text := p.consumeText()
375	text = text[:len(text)-2]
376	value := reg.FindStringSubmatch(text)
377	if value == nil {
378		parser, err := parseAction("text", text)
379		if err != nil {
380			return err
381		}
382		cur.append(newFilter(parser.Root, newList(), "exists"))
383	} else {
384		leftParser, err := parseAction("left", value[1])
385		if err != nil {
386			return err
387		}
388		rightParser, err := parseAction("right", value[3])
389		if err != nil {
390			return err
391		}
392		cur.append(newFilter(leftParser.Root, rightParser.Root, value[2]))
393	}
394	return p.parseInsideAction(cur)
395}
396
397// parseQuote unquotes string inside double or single quote
398func (p *Parser) parseQuote(cur *ListNode, end rune) error {
399Loop:
400	for {
401		switch p.next() {
402		case eof, '\n':
403			return fmt.Errorf("unterminated quoted string")
404		case end:
405			//if it's not escape break the Loop
406			if p.input[p.pos-2] != '\\' {
407				break Loop
408			}
409		}
410	}
411	value := p.consumeText()
412	s, err := UnquoteExtend(value)
413	if err != nil {
414		return fmt.Errorf("unquote string %s error %v", value, err)
415	}
416	cur.append(newText(s))
417	return p.parseInsideAction(cur)
418}
419
420// parseField scans a field until a terminator
421func (p *Parser) parseField(cur *ListNode) error {
422	p.consumeText()
423	for p.advance() {
424	}
425	value := p.consumeText()
426	if value == "*" {
427		cur.append(newWildcard())
428	} else {
429		cur.append(newField(strings.Replace(value, "\\", "", -1)))
430	}
431	return p.parseInsideAction(cur)
432}
433
434// advance scans until next non-escaped terminator
435func (p *Parser) advance() bool {
436	r := p.next()
437	if r == '\\' {
438		p.next()
439	} else if isTerminator(r) {
440		p.backup()
441		return false
442	}
443	return true
444}
445
446// isTerminator reports whether the input is at valid termination character to appear after an identifier.
447func isTerminator(r rune) bool {
448	if isSpace(r) || isEndOfLine(r) {
449		return true
450	}
451	switch r {
452	case eof, '.', ',', '[', ']', '$', '@', '{', '}':
453		return true
454	}
455	return false
456}
457
458// isSpace reports whether r is a space character.
459func isSpace(r rune) bool {
460	return r == ' ' || r == '\t'
461}
462
463// isEndOfLine reports whether r is an end-of-line character.
464func isEndOfLine(r rune) bool {
465	return r == '\r' || r == '\n'
466}
467
468// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
469func isAlphaNumeric(r rune) bool {
470	return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
471}
472
473// isBool reports whether s is a boolean value.
474func isBool(s string) bool {
475	return s == "true" || s == "false"
476}
477
478//UnquoteExtend is almost same as strconv.Unquote(), but it support parse single quotes as a string
479func UnquoteExtend(s string) (string, error) {
480	n := len(s)
481	if n < 2 {
482		return "", ErrSyntax
483	}
484	quote := s[0]
485	if quote != s[n-1] {
486		return "", ErrSyntax
487	}
488	s = s[1 : n-1]
489
490	if quote != '"' && quote != '\'' {
491		return "", ErrSyntax
492	}
493
494	// Is it trivial?  Avoid allocation.
495	if !contains(s, '\\') && !contains(s, quote) {
496		return s, nil
497	}
498
499	var runeTmp [utf8.UTFMax]byte
500	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
501	for len(s) > 0 {
502		c, multibyte, ss, err := strconv.UnquoteChar(s, quote)
503		if err != nil {
504			return "", err
505		}
506		s = ss
507		if c < utf8.RuneSelf || !multibyte {
508			buf = append(buf, byte(c))
509		} else {
510			n := utf8.EncodeRune(runeTmp[:], c)
511			buf = append(buf, runeTmp[:n]...)
512		}
513	}
514	return string(buf), nil
515}
516
517func contains(s string, c byte) bool {
518	for i := 0; i < len(s); i++ {
519		if s[i] == c {
520			return true
521		}
522	}
523	return false
524}
525