1from __future__ import ( 2 print_function, 3 absolute_import, 4 division, 5 generators, 6 nested_scopes, 7) 8import sys 9import os.path 10 11import ply.yacc 12 13from jsonpath_ng.exceptions import JsonPathParserError 14from jsonpath_ng.jsonpath import * 15from jsonpath_ng.lexer import JsonPathLexer 16 17logger = logging.getLogger(__name__) 18 19 20def parse(string): 21 return JsonPathParser().parse(string) 22 23 24class JsonPathParser(object): 25 ''' 26 An LALR-parser for JsonPath 27 ''' 28 29 tokens = JsonPathLexer.tokens 30 31 def __init__(self, debug=False, lexer_class=None): 32 if self.__doc__ is None: 33 raise JsonPathParserError( 34 'Docstrings have been removed! By design of PLY, ' 35 'jsonpath-rw requires docstrings. You must not use ' 36 'PYTHONOPTIMIZE=2 or python -OO.' 37 ) 38 39 self.debug = debug 40 self.lexer_class = lexer_class or JsonPathLexer # Crufty but works around statefulness in PLY 41 42 def parse(self, string, lexer = None): 43 lexer = lexer or self.lexer_class() 44 return self.parse_token_stream(lexer.tokenize(string)) 45 46 def parse_token_stream(self, token_iterator, start_symbol='jsonpath'): 47 48 # Since PLY has some crufty aspects and dumps files, we try to keep them local 49 # However, we need to derive the name of the output Python file :-/ 50 output_directory = os.path.dirname(__file__) 51 try: 52 module_name = os.path.splitext(os.path.split(__file__)[1])[0] 53 except: 54 module_name = __name__ 55 56 parsing_table_module = '_'.join([module_name, start_symbol, 'parsetab']) 57 58 # And we regenerate the parse table every time; it doesn't actually take that long! 59 new_parser = ply.yacc.yacc(module=self, 60 debug=self.debug, 61 tabmodule = parsing_table_module, 62 outputdir = output_directory, 63 write_tables=0, 64 start = start_symbol, 65 errorlog = logger) 66 67 return new_parser.parse(lexer = IteratorToTokenStream(token_iterator)) 68 69 # ===================== PLY Parser specification ===================== 70 71 precedence = [ 72 ('left', ','), 73 ('left', 'DOUBLEDOT'), 74 ('left', '.'), 75 ('left', '|'), 76 ('left', '&'), 77 ('left', 'WHERE'), 78 ] 79 80 def p_error(self, t): 81 raise JsonPathParserError('Parse error at %s:%s near token %s (%s)' 82 % (t.lineno, t.col, t.value, t.type)) 83 84 def p_jsonpath_binop(self, p): 85 """jsonpath : jsonpath '.' jsonpath 86 | jsonpath DOUBLEDOT jsonpath 87 | jsonpath WHERE jsonpath 88 | jsonpath '|' jsonpath 89 | jsonpath '&' jsonpath""" 90 op = p[2] 91 92 if op == '.': 93 p[0] = Child(p[1], p[3]) 94 elif op == '..': 95 p[0] = Descendants(p[1], p[3]) 96 elif op == 'where': 97 p[0] = Where(p[1], p[3]) 98 elif op == '|': 99 p[0] = Union(p[1], p[3]) 100 elif op == '&': 101 p[0] = Intersect(p[1], p[3]) 102 103 def p_jsonpath_fields(self, p): 104 "jsonpath : fields_or_any" 105 p[0] = Fields(*p[1]) 106 107 def p_jsonpath_named_operator(self, p): 108 "jsonpath : NAMED_OPERATOR" 109 if p[1] == 'this': 110 p[0] = This() 111 elif p[1] == 'parent': 112 p[0] = Parent() 113 else: 114 raise JsonPathParserError('Unknown named operator `%s` at %s:%s' 115 % (p[1], p.lineno(1), p.lexpos(1))) 116 117 def p_jsonpath_root(self, p): 118 "jsonpath : '$'" 119 p[0] = Root() 120 121 def p_jsonpath_idx(self, p): 122 "jsonpath : '[' idx ']'" 123 p[0] = p[2] 124 125 def p_jsonpath_slice(self, p): 126 "jsonpath : '[' slice ']'" 127 p[0] = p[2] 128 129 def p_jsonpath_fieldbrackets(self, p): 130 "jsonpath : '[' fields ']'" 131 p[0] = Fields(*p[2]) 132 133 def p_jsonpath_child_fieldbrackets(self, p): 134 "jsonpath : jsonpath '[' fields ']'" 135 p[0] = Child(p[1], Fields(*p[3])) 136 137 def p_jsonpath_child_idxbrackets(self, p): 138 "jsonpath : jsonpath '[' idx ']'" 139 p[0] = Child(p[1], p[3]) 140 141 def p_jsonpath_child_slicebrackets(self, p): 142 "jsonpath : jsonpath '[' slice ']'" 143 p[0] = Child(p[1], p[3]) 144 145 def p_jsonpath_parens(self, p): 146 "jsonpath : '(' jsonpath ')'" 147 p[0] = p[2] 148 149 # Because fields in brackets cannot be '*' - that is reserved for array indices 150 def p_fields_or_any(self, p): 151 """fields_or_any : fields 152 | '*' """ 153 if p[1] == '*': 154 p[0] = ['*'] 155 else: 156 p[0] = p[1] 157 158 def p_fields_id(self, p): 159 "fields : ID" 160 p[0] = [p[1]] 161 162 def p_fields_comma(self, p): 163 "fields : fields ',' fields" 164 p[0] = p[1] + p[3] 165 166 def p_idx(self, p): 167 "idx : NUMBER" 168 p[0] = Index(p[1]) 169 170 def p_slice_any(self, p): 171 "slice : '*'" 172 p[0] = Slice() 173 174 def p_slice(self, p): # Currently does not support `step` 175 "slice : maybe_int ':' maybe_int" 176 p[0] = Slice(start=p[1], end=p[3]) 177 178 def p_maybe_int(self, p): 179 """maybe_int : NUMBER 180 | empty""" 181 p[0] = p[1] 182 183 def p_empty(self, p): 184 'empty :' 185 p[0] = None 186 187class IteratorToTokenStream(object): 188 def __init__(self, iterator): 189 self.iterator = iterator 190 191 def token(self): 192 try: 193 return next(self.iterator) 194 except StopIteration: 195 return None 196 197 198if __name__ == '__main__': 199 logging.basicConfig() 200 parser = JsonPathParser(debug=True) 201 print(parser.parse(sys.stdin.read())) 202