1/*
2Copyright 2015 The Kubernetes Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17package jsonpath
18
19import (
20	"errors"
21	"fmt"
22	"regexp"
23	"strconv"
24	"strings"
25	"unicode"
26	"unicode/utf8"
27)
28
29const eof = -1
30
31const (
32	leftDelim  = "{"
33	rightDelim = "}"
34)
35
36type Parser struct {
37	Name  string
38	Root  *ListNode
39	input string
40	pos   int
41	start int
42	width int
43}
44
45var (
46	ErrSyntax        = errors.New("invalid syntax")
47	dictKeyRex       = regexp.MustCompile(`^'([^']*)'$`)
48	sliceOperatorRex = regexp.MustCompile(`^(-?[\d]*)(:-?[\d]*)?(:-?[\d]*)?$`)
49)
50
51// Parse parsed the given text and return a node Parser.
52// If an error is encountered, parsing stops and an empty
53// Parser is returned with the error
54func Parse(name, text string) (*Parser, error) {
55	p := NewParser(name)
56	err := p.Parse(text)
57	if err != nil {
58		p = nil
59	}
60	return p, err
61}
62
63func NewParser(name string) *Parser {
64	return &Parser{
65		Name: name,
66	}
67}
68
69// parseAction parsed the expression inside delimiter
70func parseAction(name, text string) (*Parser, error) {
71	p, err := Parse(name, fmt.Sprintf("%s%s%s", leftDelim, text, rightDelim))
72	// when error happens, p will be nil, so we need to return here
73	if err != nil {
74		return p, err
75	}
76	p.Root = p.Root.Nodes[0].(*ListNode)
77	return p, nil
78}
79
80func (p *Parser) Parse(text string) error {
81	p.input = text
82	p.Root = newList()
83	p.pos = 0
84	return p.parseText(p.Root)
85}
86
87// consumeText return the parsed text since last cosumeText
88func (p *Parser) consumeText() string {
89	value := p.input[p.start:p.pos]
90	p.start = p.pos
91	return value
92}
93
94// next returns the next rune in the input.
95func (p *Parser) next() rune {
96	if p.pos >= len(p.input) {
97		p.width = 0
98		return eof
99	}
100	r, w := utf8.DecodeRuneInString(p.input[p.pos:])
101	p.width = w
102	p.pos += p.width
103	return r
104}
105
106// peek returns but does not consume the next rune in the input.
107func (p *Parser) peek() rune {
108	r := p.next()
109	p.backup()
110	return r
111}
112
113// backup steps back one rune. Can only be called once per call of next.
114func (p *Parser) backup() {
115	p.pos -= p.width
116}
117
118func (p *Parser) parseText(cur *ListNode) error {
119	for {
120		if strings.HasPrefix(p.input[p.pos:], leftDelim) {
121			if p.pos > p.start {
122				cur.append(newText(p.consumeText()))
123			}
124			return p.parseLeftDelim(cur)
125		}
126		if p.next() == eof {
127			break
128		}
129	}
130	// Correctly reached EOF.
131	if p.pos > p.start {
132		cur.append(newText(p.consumeText()))
133	}
134	return nil
135}
136
137// parseLeftDelim scans the left delimiter, which is known to be present.
138func (p *Parser) parseLeftDelim(cur *ListNode) error {
139	p.pos += len(leftDelim)
140	p.consumeText()
141	newNode := newList()
142	cur.append(newNode)
143	cur = newNode
144	return p.parseInsideAction(cur)
145}
146
147func (p *Parser) parseInsideAction(cur *ListNode) error {
148	prefixMap := map[string]func(*ListNode) error{
149		rightDelim: p.parseRightDelim,
150		"[?(":      p.parseFilter,
151		"..":       p.parseRecursive,
152	}
153	for prefix, parseFunc := range prefixMap {
154		if strings.HasPrefix(p.input[p.pos:], prefix) {
155			return parseFunc(cur)
156		}
157	}
158
159	switch r := p.next(); {
160	case r == eof || isEndOfLine(r):
161		return fmt.Errorf("unclosed action")
162	case r == ' ':
163		p.consumeText()
164	case r == '@' || r == '$': //the current object, just pass it
165		p.consumeText()
166	case r == '[':
167		return p.parseArray(cur)
168	case r == '"' || r == '\'':
169		return p.parseQuote(cur, r)
170	case r == '.':
171		return p.parseField(cur)
172	case r == '+' || r == '-' || unicode.IsDigit(r):
173		p.backup()
174		return p.parseNumber(cur)
175	case isAlphaNumeric(r):
176		p.backup()
177		return p.parseIdentifier(cur)
178	default:
179		return fmt.Errorf("unrecognized character in action: %#U", r)
180	}
181	return p.parseInsideAction(cur)
182}
183
184// parseRightDelim scans the right delimiter, which is known to be present.
185func (p *Parser) parseRightDelim(cur *ListNode) error {
186	p.pos += len(rightDelim)
187	p.consumeText()
188	return p.parseText(p.Root)
189}
190
191// parseIdentifier scans build-in keywords, like "range" "end"
192func (p *Parser) parseIdentifier(cur *ListNode) error {
193	var r rune
194	for {
195		r = p.next()
196		if isTerminator(r) {
197			p.backup()
198			break
199		}
200	}
201	value := p.consumeText()
202
203	if isBool(value) {
204		v, err := strconv.ParseBool(value)
205		if err != nil {
206			return fmt.Errorf("can not parse bool '%s': %s", value, err.Error())
207		}
208
209		cur.append(newBool(v))
210	} else {
211		cur.append(newIdentifier(value))
212	}
213
214	return p.parseInsideAction(cur)
215}
216
217// parseRecursive scans the recursive descent operator ..
218func (p *Parser) parseRecursive(cur *ListNode) error {
219	if lastIndex := len(cur.Nodes) - 1; lastIndex >= 0 && cur.Nodes[lastIndex].Type() == NodeRecursive {
220		return fmt.Errorf("invalid multiple recursive descent")
221	}
222	p.pos += len("..")
223	p.consumeText()
224	cur.append(newRecursive())
225	if r := p.peek(); isAlphaNumeric(r) {
226		return p.parseField(cur)
227	}
228	return p.parseInsideAction(cur)
229}
230
231// parseNumber scans number
232func (p *Parser) parseNumber(cur *ListNode) error {
233	r := p.peek()
234	if r == '+' || r == '-' {
235		p.next()
236	}
237	for {
238		r = p.next()
239		if r != '.' && !unicode.IsDigit(r) {
240			p.backup()
241			break
242		}
243	}
244	value := p.consumeText()
245	i, err := strconv.Atoi(value)
246	if err == nil {
247		cur.append(newInt(i))
248		return p.parseInsideAction(cur)
249	}
250	d, err := strconv.ParseFloat(value, 64)
251	if err == nil {
252		cur.append(newFloat(d))
253		return p.parseInsideAction(cur)
254	}
255	return fmt.Errorf("cannot parse number %s", value)
256}
257
258// parseArray scans array index selection
259func (p *Parser) parseArray(cur *ListNode) error {
260Loop:
261	for {
262		switch p.next() {
263		case eof, '\n':
264			return fmt.Errorf("unterminated array")
265		case ']':
266			break Loop
267		}
268	}
269	text := p.consumeText()
270	text = text[1 : len(text)-1]
271	if text == "*" {
272		text = ":"
273	}
274
275	//union operator
276	strs := strings.Split(text, ",")
277	if len(strs) > 1 {
278		union := []*ListNode{}
279		for _, str := range strs {
280			parser, err := parseAction("union", fmt.Sprintf("[%s]", strings.Trim(str, " ")))
281			if err != nil {
282				return err
283			}
284			union = append(union, parser.Root)
285		}
286		cur.append(newUnion(union))
287		return p.parseInsideAction(cur)
288	}
289
290	// dict key
291	value := dictKeyRex.FindStringSubmatch(text)
292	if value != nil {
293		parser, err := parseAction("arraydict", fmt.Sprintf(".%s", value[1]))
294		if err != nil {
295			return err
296		}
297		for _, node := range parser.Root.Nodes {
298			cur.append(node)
299		}
300		return p.parseInsideAction(cur)
301	}
302
303	//slice operator
304	value = sliceOperatorRex.FindStringSubmatch(text)
305	if value == nil {
306		return fmt.Errorf("invalid array index %s", text)
307	}
308	value = value[1:]
309	params := [3]ParamsEntry{}
310	for i := 0; i < 3; i++ {
311		if value[i] != "" {
312			if i > 0 {
313				value[i] = value[i][1:]
314			}
315			if i > 0 && value[i] == "" {
316				params[i].Known = false
317			} else {
318				var err error
319				params[i].Known = true
320				params[i].Value, err = strconv.Atoi(value[i])
321				if err != nil {
322					return fmt.Errorf("array index %s is not a number", value[i])
323				}
324			}
325		} else {
326			if i == 1 {
327				params[i].Known = true
328				params[i].Value = params[0].Value + 1
329				params[i].Derived = true
330			} else {
331				params[i].Known = false
332				params[i].Value = 0
333			}
334		}
335	}
336	cur.append(newArray(params))
337	return p.parseInsideAction(cur)
338}
339
340// parseFilter scans filter inside array selection
341func (p *Parser) parseFilter(cur *ListNode) error {
342	p.pos += len("[?(")
343	p.consumeText()
344	begin := false
345	end := false
346	var pair rune
347
348Loop:
349	for {
350		r := p.next()
351		switch r {
352		case eof, '\n':
353			return fmt.Errorf("unterminated filter")
354		case '"', '\'':
355			if begin == false {
356				//save the paired rune
357				begin = true
358				pair = r
359				continue
360			}
361			//only add when met paired rune
362			if p.input[p.pos-2] != '\\' && r == pair {
363				end = true
364			}
365		case ')':
366			//in rightParser below quotes only appear zero or once
367			//and must be paired at the beginning and end
368			if begin == end {
369				break Loop
370			}
371		}
372	}
373	if p.next() != ']' {
374		return fmt.Errorf("unclosed array expect ]")
375	}
376	reg := regexp.MustCompile(`^([^!<>=]+)([!<>=]+)(.+?)$`)
377	text := p.consumeText()
378	text = text[:len(text)-2]
379	value := reg.FindStringSubmatch(text)
380	if value == nil {
381		parser, err := parseAction("text", text)
382		if err != nil {
383			return err
384		}
385		cur.append(newFilter(parser.Root, newList(), "exists"))
386	} else {
387		leftParser, err := parseAction("left", value[1])
388		if err != nil {
389			return err
390		}
391		rightParser, err := parseAction("right", value[3])
392		if err != nil {
393			return err
394		}
395		cur.append(newFilter(leftParser.Root, rightParser.Root, value[2]))
396	}
397	return p.parseInsideAction(cur)
398}
399
400// parseQuote unquotes string inside double or single quote
401func (p *Parser) parseQuote(cur *ListNode, end rune) error {
402Loop:
403	for {
404		switch p.next() {
405		case eof, '\n':
406			return fmt.Errorf("unterminated quoted string")
407		case end:
408			//if it's not escape break the Loop
409			if p.input[p.pos-2] != '\\' {
410				break Loop
411			}
412		}
413	}
414	value := p.consumeText()
415	s, err := UnquoteExtend(value)
416	if err != nil {
417		return fmt.Errorf("unquote string %s error %v", value, err)
418	}
419	cur.append(newText(s))
420	return p.parseInsideAction(cur)
421}
422
423// parseField scans a field until a terminator
424func (p *Parser) parseField(cur *ListNode) error {
425	p.consumeText()
426	for p.advance() {
427	}
428	value := p.consumeText()
429	if value == "*" {
430		cur.append(newWildcard())
431	} else {
432		cur.append(newField(strings.Replace(value, "\\", "", -1)))
433	}
434	return p.parseInsideAction(cur)
435}
436
437// advance scans until next non-escaped terminator
438func (p *Parser) advance() bool {
439	r := p.next()
440	if r == '\\' {
441		p.next()
442	} else if isTerminator(r) {
443		p.backup()
444		return false
445	}
446	return true
447}
448
449// isTerminator reports whether the input is at valid termination character to appear after an identifier.
450func isTerminator(r rune) bool {
451	if isSpace(r) || isEndOfLine(r) {
452		return true
453	}
454	switch r {
455	case eof, '.', ',', '[', ']', '$', '@', '{', '}':
456		return true
457	}
458	return false
459}
460
461// isSpace reports whether r is a space character.
462func isSpace(r rune) bool {
463	return r == ' ' || r == '\t'
464}
465
466// isEndOfLine reports whether r is an end-of-line character.
467func isEndOfLine(r rune) bool {
468	return r == '\r' || r == '\n'
469}
470
471// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
472func isAlphaNumeric(r rune) bool {
473	return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
474}
475
476// isBool reports whether s is a boolean value.
477func isBool(s string) bool {
478	return s == "true" || s == "false"
479}
480
481//UnquoteExtend is almost same as strconv.Unquote(), but it support parse single quotes as a string
482func UnquoteExtend(s string) (string, error) {
483	n := len(s)
484	if n < 2 {
485		return "", ErrSyntax
486	}
487	quote := s[0]
488	if quote != s[n-1] {
489		return "", ErrSyntax
490	}
491	s = s[1 : n-1]
492
493	if quote != '"' && quote != '\'' {
494		return "", ErrSyntax
495	}
496
497	// Is it trivial?  Avoid allocation.
498	if !contains(s, '\\') && !contains(s, quote) {
499		return s, nil
500	}
501
502	var runeTmp [utf8.UTFMax]byte
503	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
504	for len(s) > 0 {
505		c, multibyte, ss, err := strconv.UnquoteChar(s, quote)
506		if err != nil {
507			return "", err
508		}
509		s = ss
510		if c < utf8.RuneSelf || !multibyte {
511			buf = append(buf, byte(c))
512		} else {
513			n := utf8.EncodeRune(runeTmp[:], c)
514			buf = append(buf, runeTmp[:n]...)
515		}
516	}
517	return string(buf), nil
518}
519
520func contains(s string, c byte) bool {
521	for i := 0; i < len(s); i++ {
522		if s[i] == c {
523			return true
524		}
525	}
526	return false
527}
528