1package jmespath
2
3import (
4	"encoding/json"
5	"fmt"
6	"strconv"
7	"strings"
8)
9
10type astNodeType int
11
12//go:generate stringer -type astNodeType
13const (
14	ASTEmpty astNodeType = iota
15	ASTComparator
16	ASTCurrentNode
17	ASTExpRef
18	ASTFunctionExpression
19	ASTField
20	ASTFilterProjection
21	ASTFlatten
22	ASTIdentity
23	ASTIndex
24	ASTIndexExpression
25	ASTKeyValPair
26	ASTLiteral
27	ASTMultiSelectHash
28	ASTMultiSelectList
29	ASTOrExpression
30	ASTAndExpression
31	ASTNotExpression
32	ASTPipe
33	ASTProjection
34	ASTSubexpression
35	ASTSlice
36	ASTValueProjection
37)
38
39// ASTNode represents the abstract syntax tree of a JMESPath expression.
40type ASTNode struct {
41	nodeType astNodeType
42	value    interface{}
43	children []ASTNode
44}
45
46func (node ASTNode) String() string {
47	return node.PrettyPrint(0)
48}
49
50// PrettyPrint will pretty print the parsed AST.
51// The AST is an implementation detail and this pretty print
52// function is provided as a convenience method to help with
53// debugging.  You should not rely on its output as the internal
54// structure of the AST may change at any time.
55func (node ASTNode) PrettyPrint(indent int) string {
56	spaces := strings.Repeat(" ", indent)
57	output := fmt.Sprintf("%s%s {\n", spaces, node.nodeType)
58	nextIndent := indent + 2
59	if node.value != nil {
60		if converted, ok := node.value.(fmt.Stringer); ok {
61			// Account for things like comparator nodes
62			// that are enums with a String() method.
63			output += fmt.Sprintf("%svalue: %s\n", strings.Repeat(" ", nextIndent), converted.String())
64		} else {
65			output += fmt.Sprintf("%svalue: %#v\n", strings.Repeat(" ", nextIndent), node.value)
66		}
67	}
68	lastIndex := len(node.children)
69	if lastIndex > 0 {
70		output += fmt.Sprintf("%schildren: {\n", strings.Repeat(" ", nextIndent))
71		childIndent := nextIndent + 2
72		for _, elem := range node.children {
73			output += elem.PrettyPrint(childIndent)
74		}
75	}
76	output += fmt.Sprintf("%s}\n", spaces)
77	return output
78}
79
80var bindingPowers = map[tokType]int{
81	tEOF:                0,
82	tUnquotedIdentifier: 0,
83	tQuotedIdentifier:   0,
84	tRbracket:           0,
85	tRparen:             0,
86	tComma:              0,
87	tRbrace:             0,
88	tNumber:             0,
89	tCurrent:            0,
90	tExpref:             0,
91	tColon:              0,
92	tPipe:               1,
93	tOr:                 2,
94	tAnd:                3,
95	tEQ:                 5,
96	tLT:                 5,
97	tLTE:                5,
98	tGT:                 5,
99	tGTE:                5,
100	tNE:                 5,
101	tFlatten:            9,
102	tStar:               20,
103	tFilter:             21,
104	tDot:                40,
105	tNot:                45,
106	tLbrace:             50,
107	tLbracket:           55,
108	tLparen:             60,
109}
110
111// Parser holds state about the current expression being parsed.
112type Parser struct {
113	expression string
114	tokens     []token
115	index      int
116}
117
118// NewParser creates a new JMESPath parser.
119func NewParser() *Parser {
120	p := Parser{}
121	return &p
122}
123
124// Parse will compile a JMESPath expression.
125func (p *Parser) Parse(expression string) (ASTNode, error) {
126	lexer := NewLexer()
127	p.expression = expression
128	p.index = 0
129	tokens, err := lexer.tokenize(expression)
130	if err != nil {
131		return ASTNode{}, err
132	}
133	p.tokens = tokens
134	parsed, err := p.parseExpression(0)
135	if err != nil {
136		return ASTNode{}, err
137	}
138	if p.current() != tEOF {
139		return ASTNode{}, p.syntaxError(fmt.Sprintf(
140			"Unexpected token at the end of the expression: %s", p.current()))
141	}
142	return parsed, nil
143}
144
145func (p *Parser) parseExpression(bindingPower int) (ASTNode, error) {
146	var err error
147	leftToken := p.lookaheadToken(0)
148	p.advance()
149	leftNode, err := p.nud(leftToken)
150	if err != nil {
151		return ASTNode{}, err
152	}
153	currentToken := p.current()
154	for bindingPower < bindingPowers[currentToken] {
155		p.advance()
156		leftNode, err = p.led(currentToken, leftNode)
157		if err != nil {
158			return ASTNode{}, err
159		}
160		currentToken = p.current()
161	}
162	return leftNode, nil
163}
164
165func (p *Parser) parseIndexExpression() (ASTNode, error) {
166	if p.lookahead(0) == tColon || p.lookahead(1) == tColon {
167		return p.parseSliceExpression()
168	}
169	indexStr := p.lookaheadToken(0).value
170	parsedInt, err := strconv.Atoi(indexStr)
171	if err != nil {
172		return ASTNode{}, err
173	}
174	indexNode := ASTNode{nodeType: ASTIndex, value: parsedInt}
175	p.advance()
176	if err := p.match(tRbracket); err != nil {
177		return ASTNode{}, err
178	}
179	return indexNode, nil
180}
181
182func (p *Parser) parseSliceExpression() (ASTNode, error) {
183	parts := []*int{nil, nil, nil}
184	index := 0
185	current := p.current()
186	for current != tRbracket && index < 3 {
187		if current == tColon {
188			index++
189			p.advance()
190		} else if current == tNumber {
191			parsedInt, err := strconv.Atoi(p.lookaheadToken(0).value)
192			if err != nil {
193				return ASTNode{}, err
194			}
195			parts[index] = &parsedInt
196			p.advance()
197		} else {
198			return ASTNode{}, p.syntaxError(
199				"Expected tColon or tNumber" + ", received: " + p.current().String())
200		}
201		current = p.current()
202	}
203	if err := p.match(tRbracket); err != nil {
204		return ASTNode{}, err
205	}
206	return ASTNode{
207		nodeType: ASTSlice,
208		value:    parts,
209	}, nil
210}
211
212func (p *Parser) match(tokenType tokType) error {
213	if p.current() == tokenType {
214		p.advance()
215		return nil
216	}
217	return p.syntaxError("Expected " + tokenType.String() + ", received: " + p.current().String())
218}
219
220func (p *Parser) led(tokenType tokType, node ASTNode) (ASTNode, error) {
221	switch tokenType {
222	case tDot:
223		if p.current() != tStar {
224			right, err := p.parseDotRHS(bindingPowers[tDot])
225			return ASTNode{
226				nodeType: ASTSubexpression,
227				children: []ASTNode{node, right},
228			}, err
229		}
230		p.advance()
231		right, err := p.parseProjectionRHS(bindingPowers[tDot])
232		return ASTNode{
233			nodeType: ASTValueProjection,
234			children: []ASTNode{node, right},
235		}, err
236	case tPipe:
237		right, err := p.parseExpression(bindingPowers[tPipe])
238		return ASTNode{nodeType: ASTPipe, children: []ASTNode{node, right}}, err
239	case tOr:
240		right, err := p.parseExpression(bindingPowers[tOr])
241		return ASTNode{nodeType: ASTOrExpression, children: []ASTNode{node, right}}, err
242	case tAnd:
243		right, err := p.parseExpression(bindingPowers[tAnd])
244		return ASTNode{nodeType: ASTAndExpression, children: []ASTNode{node, right}}, err
245	case tLparen:
246		name := node.value
247		var args []ASTNode
248		for p.current() != tRparen {
249			expression, err := p.parseExpression(0)
250			if err != nil {
251				return ASTNode{}, err
252			}
253			if p.current() == tComma {
254				if err := p.match(tComma); err != nil {
255					return ASTNode{}, err
256				}
257			}
258			args = append(args, expression)
259		}
260		if err := p.match(tRparen); err != nil {
261			return ASTNode{}, err
262		}
263		return ASTNode{
264			nodeType: ASTFunctionExpression,
265			value:    name,
266			children: args,
267		}, nil
268	case tFilter:
269		return p.parseFilter(node)
270	case tFlatten:
271		left := ASTNode{nodeType: ASTFlatten, children: []ASTNode{node}}
272		right, err := p.parseProjectionRHS(bindingPowers[tFlatten])
273		return ASTNode{
274			nodeType: ASTProjection,
275			children: []ASTNode{left, right},
276		}, err
277	case tEQ, tNE, tGT, tGTE, tLT, tLTE:
278		right, err := p.parseExpression(bindingPowers[tokenType])
279		if err != nil {
280			return ASTNode{}, err
281		}
282		return ASTNode{
283			nodeType: ASTComparator,
284			value:    tokenType,
285			children: []ASTNode{node, right},
286		}, nil
287	case tLbracket:
288		tokenType := p.current()
289		var right ASTNode
290		var err error
291		if tokenType == tNumber || tokenType == tColon {
292			right, err = p.parseIndexExpression()
293			if err != nil {
294				return ASTNode{}, err
295			}
296			return p.projectIfSlice(node, right)
297		}
298		// Otherwise this is a projection.
299		if err := p.match(tStar); err != nil {
300			return ASTNode{}, err
301		}
302		if err := p.match(tRbracket); err != nil {
303			return ASTNode{}, err
304		}
305		right, err = p.parseProjectionRHS(bindingPowers[tStar])
306		if err != nil {
307			return ASTNode{}, err
308		}
309		return ASTNode{
310			nodeType: ASTProjection,
311			children: []ASTNode{node, right},
312		}, nil
313	}
314	return ASTNode{}, p.syntaxError("Unexpected token: " + tokenType.String())
315}
316
317func (p *Parser) nud(token token) (ASTNode, error) {
318	switch token.tokenType {
319	case tJSONLiteral:
320		var parsed interface{}
321		err := json.Unmarshal([]byte(token.value), &parsed)
322		if err != nil {
323			return ASTNode{}, err
324		}
325		return ASTNode{nodeType: ASTLiteral, value: parsed}, nil
326	case tStringLiteral:
327		return ASTNode{nodeType: ASTLiteral, value: token.value}, nil
328	case tUnquotedIdentifier:
329		return ASTNode{
330			nodeType: ASTField,
331			value:    token.value,
332		}, nil
333	case tQuotedIdentifier:
334		node := ASTNode{nodeType: ASTField, value: token.value}
335		if p.current() == tLparen {
336			return ASTNode{}, p.syntaxErrorToken("Can't have quoted identifier as function name.", token)
337		}
338		return node, nil
339	case tStar:
340		left := ASTNode{nodeType: ASTIdentity}
341		var right ASTNode
342		var err error
343		if p.current() == tRbracket {
344			right = ASTNode{nodeType: ASTIdentity}
345		} else {
346			right, err = p.parseProjectionRHS(bindingPowers[tStar])
347		}
348		return ASTNode{nodeType: ASTValueProjection, children: []ASTNode{left, right}}, err
349	case tFilter:
350		return p.parseFilter(ASTNode{nodeType: ASTIdentity})
351	case tLbrace:
352		return p.parseMultiSelectHash()
353	case tFlatten:
354		left := ASTNode{
355			nodeType: ASTFlatten,
356			children: []ASTNode{{nodeType: ASTIdentity}},
357		}
358		right, err := p.parseProjectionRHS(bindingPowers[tFlatten])
359		if err != nil {
360			return ASTNode{}, err
361		}
362		return ASTNode{nodeType: ASTProjection, children: []ASTNode{left, right}}, nil
363	case tLbracket:
364		tokenType := p.current()
365		//var right ASTNode
366		if tokenType == tNumber || tokenType == tColon {
367			right, err := p.parseIndexExpression()
368			if err != nil {
369				return ASTNode{}, nil
370			}
371			return p.projectIfSlice(ASTNode{nodeType: ASTIdentity}, right)
372		} else if tokenType == tStar && p.lookahead(1) == tRbracket {
373			p.advance()
374			p.advance()
375			right, err := p.parseProjectionRHS(bindingPowers[tStar])
376			if err != nil {
377				return ASTNode{}, err
378			}
379			return ASTNode{
380				nodeType: ASTProjection,
381				children: []ASTNode{{nodeType: ASTIdentity}, right},
382			}, nil
383		} else {
384			return p.parseMultiSelectList()
385		}
386	case tCurrent:
387		return ASTNode{nodeType: ASTCurrentNode}, nil
388	case tExpref:
389		expression, err := p.parseExpression(bindingPowers[tExpref])
390		if err != nil {
391			return ASTNode{}, err
392		}
393		return ASTNode{nodeType: ASTExpRef, children: []ASTNode{expression}}, nil
394	case tNot:
395		expression, err := p.parseExpression(bindingPowers[tNot])
396		if err != nil {
397			return ASTNode{}, err
398		}
399		return ASTNode{nodeType: ASTNotExpression, children: []ASTNode{expression}}, nil
400	case tLparen:
401		expression, err := p.parseExpression(0)
402		if err != nil {
403			return ASTNode{}, err
404		}
405		if err := p.match(tRparen); err != nil {
406			return ASTNode{}, err
407		}
408		return expression, nil
409	case tEOF:
410		return ASTNode{}, p.syntaxErrorToken("Incomplete expression", token)
411	}
412
413	return ASTNode{}, p.syntaxErrorToken("Invalid token: "+token.tokenType.String(), token)
414}
415
416func (p *Parser) parseMultiSelectList() (ASTNode, error) {
417	var expressions []ASTNode
418	for {
419		expression, err := p.parseExpression(0)
420		if err != nil {
421			return ASTNode{}, err
422		}
423		expressions = append(expressions, expression)
424		if p.current() == tRbracket {
425			break
426		}
427		err = p.match(tComma)
428		if err != nil {
429			return ASTNode{}, err
430		}
431	}
432	err := p.match(tRbracket)
433	if err != nil {
434		return ASTNode{}, err
435	}
436	return ASTNode{
437		nodeType: ASTMultiSelectList,
438		children: expressions,
439	}, nil
440}
441
442func (p *Parser) parseMultiSelectHash() (ASTNode, error) {
443	var children []ASTNode
444	for {
445		keyToken := p.lookaheadToken(0)
446		if err := p.match(tUnquotedIdentifier); err != nil {
447			if err := p.match(tQuotedIdentifier); err != nil {
448				return ASTNode{}, p.syntaxError("Expected tQuotedIdentifier or tUnquotedIdentifier")
449			}
450		}
451		keyName := keyToken.value
452		err := p.match(tColon)
453		if err != nil {
454			return ASTNode{}, err
455		}
456		value, err := p.parseExpression(0)
457		if err != nil {
458			return ASTNode{}, err
459		}
460		node := ASTNode{
461			nodeType: ASTKeyValPair,
462			value:    keyName,
463			children: []ASTNode{value},
464		}
465		children = append(children, node)
466		if p.current() == tComma {
467			err := p.match(tComma)
468			if err != nil {
469				return ASTNode{}, nil
470			}
471		} else if p.current() == tRbrace {
472			err := p.match(tRbrace)
473			if err != nil {
474				return ASTNode{}, nil
475			}
476			break
477		}
478	}
479	return ASTNode{
480		nodeType: ASTMultiSelectHash,
481		children: children,
482	}, nil
483}
484
485func (p *Parser) projectIfSlice(left ASTNode, right ASTNode) (ASTNode, error) {
486	indexExpr := ASTNode{
487		nodeType: ASTIndexExpression,
488		children: []ASTNode{left, right},
489	}
490	if right.nodeType == ASTSlice {
491		right, err := p.parseProjectionRHS(bindingPowers[tStar])
492		return ASTNode{
493			nodeType: ASTProjection,
494			children: []ASTNode{indexExpr, right},
495		}, err
496	}
497	return indexExpr, nil
498}
499func (p *Parser) parseFilter(node ASTNode) (ASTNode, error) {
500	var right, condition ASTNode
501	var err error
502	condition, err = p.parseExpression(0)
503	if err != nil {
504		return ASTNode{}, err
505	}
506	if err := p.match(tRbracket); err != nil {
507		return ASTNode{}, err
508	}
509	if p.current() == tFlatten {
510		right = ASTNode{nodeType: ASTIdentity}
511	} else {
512		right, err = p.parseProjectionRHS(bindingPowers[tFilter])
513		if err != nil {
514			return ASTNode{}, err
515		}
516	}
517
518	return ASTNode{
519		nodeType: ASTFilterProjection,
520		children: []ASTNode{node, right, condition},
521	}, nil
522}
523
524func (p *Parser) parseDotRHS(bindingPower int) (ASTNode, error) {
525	lookahead := p.current()
526	if tokensOneOf([]tokType{tQuotedIdentifier, tUnquotedIdentifier, tStar}, lookahead) {
527		return p.parseExpression(bindingPower)
528	} else if lookahead == tLbracket {
529		if err := p.match(tLbracket); err != nil {
530			return ASTNode{}, err
531		}
532		return p.parseMultiSelectList()
533	} else if lookahead == tLbrace {
534		if err := p.match(tLbrace); err != nil {
535			return ASTNode{}, err
536		}
537		return p.parseMultiSelectHash()
538	}
539	return ASTNode{}, p.syntaxError("Expected identifier, lbracket, or lbrace")
540}
541
542func (p *Parser) parseProjectionRHS(bindingPower int) (ASTNode, error) {
543	current := p.current()
544	if bindingPowers[current] < 10 {
545		return ASTNode{nodeType: ASTIdentity}, nil
546	} else if current == tLbracket {
547		return p.parseExpression(bindingPower)
548	} else if current == tFilter {
549		return p.parseExpression(bindingPower)
550	} else if current == tDot {
551		err := p.match(tDot)
552		if err != nil {
553			return ASTNode{}, err
554		}
555		return p.parseDotRHS(bindingPower)
556	} else {
557		return ASTNode{}, p.syntaxError("Error")
558	}
559}
560
561func (p *Parser) lookahead(number int) tokType {
562	return p.lookaheadToken(number).tokenType
563}
564
565func (p *Parser) current() tokType {
566	return p.lookahead(0)
567}
568
569func (p *Parser) lookaheadToken(number int) token {
570	return p.tokens[p.index+number]
571}
572
573func (p *Parser) advance() {
574	p.index++
575}
576
577func tokensOneOf(elements []tokType, token tokType) bool {
578	for _, elem := range elements {
579		if elem == token {
580			return true
581		}
582	}
583	return false
584}
585
586func (p *Parser) syntaxError(msg string) SyntaxError {
587	return SyntaxError{
588		msg:        msg,
589		Expression: p.expression,
590		Offset:     p.lookaheadToken(0).position,
591	}
592}
593
594// Create a SyntaxError based on the provided token.
595// This differs from syntaxError() which creates a SyntaxError
596// based on the current lookahead token.
597func (p *Parser) syntaxErrorToken(msg string, t token) SyntaxError {
598	return SyntaxError{
599		msg:        msg,
600		Expression: p.expression,
601		Offset:     t.position,
602	}
603}
604