1from __future__ import print_function, absolute_import, division, generators, nested_scopes 2import sys 3import os.path 4import logging 5 6import ply.yacc 7 8from jsonpath_rw.jsonpath import * 9from jsonpath_rw.lexer import JsonPathLexer 10 11logger = logging.getLogger(__name__) 12 13def parse(string): 14 return JsonPathParser().parse(string) 15 16class JsonPathParser(object): 17 ''' 18 An LALR-parser for JsonPath 19 ''' 20 21 tokens = JsonPathLexer.tokens 22 23 def __init__(self, debug=False, lexer_class=None): 24 if self.__doc__ == None: 25 raise Exception('Docstrings have been removed! By design of PLY, jsonpath-rw requires docstrings. You must not use PYTHONOPTIMIZE=2 or python -OO.') 26 27 self.debug = debug 28 self.lexer_class = lexer_class or JsonPathLexer # Crufty but works around statefulness in PLY 29 30 def parse(self, string, lexer = None): 31 lexer = lexer or self.lexer_class() 32 return self.parse_token_stream(lexer.tokenize(string)) 33 34 def parse_token_stream(self, token_iterator, start_symbol='jsonpath'): 35 36 # Since PLY has some crufty aspects and dumps files, we try to keep them local 37 # However, we need to derive the name of the output Python file :-/ 38 output_directory = os.path.dirname(__file__) 39 try: 40 module_name = os.path.splitext(os.path.split(__file__)[1])[0] 41 except: 42 module_name = __name__ 43 44 parsing_table_module = '_'.join([module_name, start_symbol, 'parsetab']) 45 46 # And we regenerate the parse table every time; it doesn't actually take that long! 47 new_parser = ply.yacc.yacc(module=self, 48 debug=self.debug, 49 tabmodule = parsing_table_module, 50 outputdir = output_directory, 51 write_tables=0, 52 start = start_symbol, 53 errorlog = logger) 54 55 return new_parser.parse(lexer = IteratorToTokenStream(token_iterator)) 56 57 # ===================== PLY Parser specification ===================== 58 59 precedence = [ 60 ('left', ','), 61 ('left', 'DOUBLEDOT'), 62 ('left', '.'), 63 ('left', '|'), 64 ('left', '&'), 65 ('left', 'WHERE'), 66 ] 67 68 def p_error(self, t): 69 raise Exception('Parse error at %s:%s near token %s (%s)' % (t.lineno, t.col, t.value, t.type)) 70 71 def p_jsonpath_binop(self, p): 72 """jsonpath : jsonpath '.' jsonpath 73 | jsonpath DOUBLEDOT jsonpath 74 | jsonpath WHERE jsonpath 75 | jsonpath '|' jsonpath 76 | jsonpath '&' jsonpath""" 77 op = p[2] 78 79 if op == '.': 80 p[0] = Child(p[1], p[3]) 81 elif op == '..': 82 p[0] = Descendants(p[1], p[3]) 83 elif op == 'where': 84 p[0] = Where(p[1], p[3]) 85 elif op == '|': 86 p[0] = Union(p[1], p[3]) 87 elif op == '&': 88 p[0] = Intersect(p[1], p[3]) 89 90 def p_jsonpath_fields(self, p): 91 "jsonpath : fields_or_any" 92 p[0] = Fields(*p[1]) 93 94 def p_jsonpath_named_operator(self, p): 95 "jsonpath : NAMED_OPERATOR" 96 if p[1] == 'this': 97 p[0] = This() 98 elif p[1] == 'parent': 99 p[0] = Parent() 100 else: 101 raise Exception('Unknown named operator `%s` at %s:%s' % (p[1], p.lineno(1), p.lexpos(1))) 102 103 def p_jsonpath_root(self, p): 104 "jsonpath : '$'" 105 p[0] = Root() 106 107 def p_jsonpath_idx(self, p): 108 "jsonpath : '[' idx ']'" 109 p[0] = p[2] 110 111 def p_jsonpath_slice(self, p): 112 "jsonpath : '[' slice ']'" 113 p[0] = p[2] 114 115 def p_jsonpath_fieldbrackets(self, p): 116 "jsonpath : '[' fields ']'" 117 p[0] = Fields(*p[2]) 118 119 def p_jsonpath_child_fieldbrackets(self, p): 120 "jsonpath : jsonpath '[' fields ']'" 121 p[0] = Child(p[1], Fields(*p[3])) 122 123 def p_jsonpath_child_idxbrackets(self, p): 124 "jsonpath : jsonpath '[' idx ']'" 125 p[0] = Child(p[1], p[3]) 126 127 def p_jsonpath_child_slicebrackets(self, p): 128 "jsonpath : jsonpath '[' slice ']'" 129 p[0] = Child(p[1], p[3]) 130 131 def p_jsonpath_parens(self, p): 132 "jsonpath : '(' jsonpath ')'" 133 p[0] = p[2] 134 135 # Because fields in brackets cannot be '*' - that is reserved for array indices 136 def p_fields_or_any(self, p): 137 """fields_or_any : fields 138 | '*' """ 139 if p[1] == '*': 140 p[0] = ['*'] 141 else: 142 p[0] = p[1] 143 144 def p_fields_id(self, p): 145 "fields : ID" 146 p[0] = [p[1]] 147 148 def p_fields_comma(self, p): 149 "fields : fields ',' fields" 150 p[0] = p[1] + p[3] 151 152 def p_idx(self, p): 153 "idx : NUMBER" 154 p[0] = Index(p[1]) 155 156 def p_slice_any(self, p): 157 "slice : '*'" 158 p[0] = Slice() 159 160 def p_slice(self, p): # Currently does not support `step` 161 "slice : maybe_int ':' maybe_int" 162 p[0] = Slice(start=p[1], end=p[3]) 163 164 def p_maybe_int(self, p): 165 """maybe_int : NUMBER 166 | empty""" 167 p[0] = p[1] 168 169 def p_empty(self, p): 170 'empty :' 171 p[0] = None 172 173class IteratorToTokenStream(object): 174 def __init__(self, iterator): 175 self.iterator = iterator 176 177 def token(self): 178 try: 179 return next(self.iterator) 180 except StopIteration: 181 return None 182 183 184if __name__ == '__main__': 185 logging.basicConfig() 186 parser = JsonPathParser(debug=True) 187 print(parser.parse(sys.stdin.read())) 188