1# results.py 2from collections.abc import MutableMapping, Mapping, MutableSequence, Iterator 3import pprint 4from weakref import ref as wkref 5from typing import Tuple, Any 6 7str_type: Tuple[type, ...] = (str, bytes) 8_generator_type = type((_ for _ in ())) 9 10 11class _ParseResultsWithOffset: 12 __slots__ = ["tup"] 13 14 def __init__(self, p1, p2): 15 self.tup = (p1, p2) 16 17 def __getitem__(self, i): 18 return self.tup[i] 19 20 def __getstate__(self): 21 return self.tup 22 23 def __setstate__(self, *args): 24 self.tup = args[0] 25 26 27class ParseResults: 28 """Structured parse results, to provide multiple means of access to 29 the parsed data: 30 31 - as a list (``len(results)``) 32 - by list index (``results[0], results[1]``, etc.) 33 - by attribute (``results.<results_name>`` - see :class:`ParserElement.set_results_name`) 34 35 Example:: 36 37 integer = Word(nums) 38 date_str = (integer.set_results_name("year") + '/' 39 + integer.set_results_name("month") + '/' 40 + integer.set_results_name("day")) 41 # equivalent form: 42 # date_str = (integer("year") + '/' 43 # + integer("month") + '/' 44 # + integer("day")) 45 46 # parse_string returns a ParseResults object 47 result = date_str.parse_string("1999/12/31") 48 49 def test(s, fn=repr): 50 print("{} -> {}".format(s, fn(eval(s)))) 51 test("list(result)") 52 test("result[0]") 53 test("result['month']") 54 test("result.day") 55 test("'month' in result") 56 test("'minutes' in result") 57 test("result.dump()", str) 58 59 prints:: 60 61 list(result) -> ['1999', '/', '12', '/', '31'] 62 result[0] -> '1999' 63 result['month'] -> '12' 64 result.day -> '31' 65 'month' in result -> True 66 'minutes' in result -> False 67 result.dump() -> ['1999', '/', '12', '/', '31'] 68 - day: 31 69 - month: 12 70 - year: 1999 71 """ 72 73 _null_values: Tuple[Any, ...] = (None, [], "", ()) 74 75 __slots__ = [ 76 "_name", 77 "_parent", 78 "_all_names", 79 "_modal", 80 "_toklist", 81 "_tokdict", 82 "__weakref__", 83 ] 84 85 class List(list): 86 """ 87 Simple wrapper class to distinguish parsed list results that should be preserved 88 as actual Python lists, instead of being converted to :class:`ParseResults`: 89 90 LBRACK, RBRACK = map(pp.Suppress, "[]") 91 element = pp.Forward() 92 item = ppc.integer 93 element_list = LBRACK + pp.delimited_list(element) + RBRACK 94 95 # add parse actions to convert from ParseResults to actual Python collection types 96 def as_python_list(t): 97 return pp.ParseResults.List(t.as_list()) 98 element_list.add_parse_action(as_python_list) 99 100 element <<= item | element_list 101 102 element.run_tests(''' 103 100 104 [2,3,4] 105 [[2, 1],3,4] 106 [(2, 1),3,4] 107 (2,3,4) 108 ''', post_parse=lambda s, r: (r[0], type(r[0]))) 109 110 prints: 111 112 100 113 (100, <class 'int'>) 114 115 [2,3,4] 116 ([2, 3, 4], <class 'list'>) 117 118 [[2, 1],3,4] 119 ([[2, 1], 3, 4], <class 'list'>) 120 121 (Used internally by :class:`Group` when `aslist=True`.) 122 """ 123 124 def __new__(cls, contained=None): 125 if contained is None: 126 contained = [] 127 128 if not isinstance(contained, list): 129 raise TypeError( 130 "{} may only be constructed with a list," 131 " not {}".format(cls.__name__, type(contained).__name__) 132 ) 133 134 return list.__new__(cls) 135 136 def __new__(cls, toklist=None, name=None, **kwargs): 137 if isinstance(toklist, ParseResults): 138 return toklist 139 self = object.__new__(cls) 140 self._name = None 141 self._parent = None 142 self._all_names = set() 143 144 if toklist is None: 145 self._toklist = [] 146 elif isinstance(toklist, (list, _generator_type)): 147 self._toklist = ( 148 [toklist[:]] 149 if isinstance(toklist, ParseResults.List) 150 else list(toklist) 151 ) 152 else: 153 self._toklist = [toklist] 154 self._tokdict = dict() 155 return self 156 157 # Performance tuning: we construct a *lot* of these, so keep this 158 # constructor as small and fast as possible 159 def __init__( 160 self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance 161 ): 162 self._modal = modal 163 if name is not None and name != "": 164 if isinstance(name, int): 165 name = str(name) 166 if not modal: 167 self._all_names = {name} 168 self._name = name 169 if toklist not in self._null_values: 170 if isinstance(toklist, (str_type, type)): 171 toklist = [toklist] 172 if asList: 173 if isinstance(toklist, ParseResults): 174 self[name] = _ParseResultsWithOffset( 175 ParseResults(toklist._toklist), 0 176 ) 177 else: 178 self[name] = _ParseResultsWithOffset( 179 ParseResults(toklist[0]), 0 180 ) 181 self[name]._name = name 182 else: 183 try: 184 self[name] = toklist[0] 185 except (KeyError, TypeError, IndexError): 186 if toklist is not self: 187 self[name] = toklist 188 else: 189 self._name = name 190 191 def __getitem__(self, i): 192 if isinstance(i, (int, slice)): 193 return self._toklist[i] 194 else: 195 if i not in self._all_names: 196 return self._tokdict[i][-1][0] 197 else: 198 return ParseResults([v[0] for v in self._tokdict[i]]) 199 200 def __setitem__(self, k, v, isinstance=isinstance): 201 if isinstance(v, _ParseResultsWithOffset): 202 self._tokdict[k] = self._tokdict.get(k, list()) + [v] 203 sub = v[0] 204 elif isinstance(k, (int, slice)): 205 self._toklist[k] = v 206 sub = v 207 else: 208 self._tokdict[k] = self._tokdict.get(k, list()) + [ 209 _ParseResultsWithOffset(v, 0) 210 ] 211 sub = v 212 if isinstance(sub, ParseResults): 213 sub._parent = wkref(self) 214 215 def __delitem__(self, i): 216 if isinstance(i, (int, slice)): 217 mylen = len(self._toklist) 218 del self._toklist[i] 219 220 # convert int to slice 221 if isinstance(i, int): 222 if i < 0: 223 i += mylen 224 i = slice(i, i + 1) 225 # get removed indices 226 removed = list(range(*i.indices(mylen))) 227 removed.reverse() 228 # fixup indices in token dictionary 229 for name, occurrences in self._tokdict.items(): 230 for j in removed: 231 for k, (value, position) in enumerate(occurrences): 232 occurrences[k] = _ParseResultsWithOffset( 233 value, position - (position > j) 234 ) 235 else: 236 del self._tokdict[i] 237 238 def __contains__(self, k) -> bool: 239 return k in self._tokdict 240 241 def __len__(self) -> int: 242 return len(self._toklist) 243 244 def __bool__(self) -> bool: 245 return not not (self._toklist or self._tokdict) 246 247 def __iter__(self) -> Iterator: 248 return iter(self._toklist) 249 250 def __reversed__(self) -> Iterator: 251 return iter(self._toklist[::-1]) 252 253 def keys(self): 254 return iter(self._tokdict) 255 256 def values(self): 257 return (self[k] for k in self.keys()) 258 259 def items(self): 260 return ((k, self[k]) for k in self.keys()) 261 262 def haskeys(self) -> bool: 263 """ 264 Since ``keys()`` returns an iterator, this method is helpful in bypassing 265 code that looks for the existence of any defined results names.""" 266 return bool(self._tokdict) 267 268 def pop(self, *args, **kwargs): 269 """ 270 Removes and returns item at specified index (default= ``last``). 271 Supports both ``list`` and ``dict`` semantics for ``pop()``. If 272 passed no argument or an integer argument, it will use ``list`` 273 semantics and pop tokens from the list of parsed tokens. If passed 274 a non-integer argument (most likely a string), it will use ``dict`` 275 semantics and pop the corresponding value from any defined results 276 names. A second default return value argument is supported, just as in 277 ``dict.pop()``. 278 279 Example:: 280 281 numlist = Word(nums)[...] 282 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321'] 283 284 def remove_first(tokens): 285 tokens.pop(0) 286 numlist.add_parse_action(remove_first) 287 print(numlist.parse_string("0 123 321")) # -> ['123', '321'] 288 289 label = Word(alphas) 290 patt = label("LABEL") + OneOrMore(Word(nums)) 291 print(patt.parse_string("AAB 123 321").dump()) 292 293 # Use pop() in a parse action to remove named result (note that corresponding value is not 294 # removed from list form of results) 295 def remove_LABEL(tokens): 296 tokens.pop("LABEL") 297 return tokens 298 patt.add_parse_action(remove_LABEL) 299 print(patt.parse_string("AAB 123 321").dump()) 300 301 prints:: 302 303 ['AAB', '123', '321'] 304 - LABEL: AAB 305 306 ['AAB', '123', '321'] 307 """ 308 if not args: 309 args = [-1] 310 for k, v in kwargs.items(): 311 if k == "default": 312 args = (args[0], v) 313 else: 314 raise TypeError( 315 "pop() got an unexpected keyword argument {!r}".format(k) 316 ) 317 if isinstance(args[0], int) or len(args) == 1 or args[0] in self: 318 index = args[0] 319 ret = self[index] 320 del self[index] 321 return ret 322 else: 323 defaultvalue = args[1] 324 return defaultvalue 325 326 def get(self, key, default_value=None): 327 """ 328 Returns named result matching the given key, or if there is no 329 such name, then returns the given ``default_value`` or ``None`` if no 330 ``default_value`` is specified. 331 332 Similar to ``dict.get()``. 333 334 Example:: 335 336 integer = Word(nums) 337 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 338 339 result = date_str.parse_string("1999/12/31") 340 print(result.get("year")) # -> '1999' 341 print(result.get("hour", "not specified")) # -> 'not specified' 342 print(result.get("hour")) # -> None 343 """ 344 if key in self: 345 return self[key] 346 else: 347 return default_value 348 349 def insert(self, index, ins_string): 350 """ 351 Inserts new element at location index in the list of parsed tokens. 352 353 Similar to ``list.insert()``. 354 355 Example:: 356 357 numlist = Word(nums)[...] 358 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321'] 359 360 # use a parse action to insert the parse location in the front of the parsed results 361 def insert_locn(locn, tokens): 362 tokens.insert(0, locn) 363 numlist.add_parse_action(insert_locn) 364 print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321'] 365 """ 366 self._toklist.insert(index, ins_string) 367 # fixup indices in token dictionary 368 for name, occurrences in self._tokdict.items(): 369 for k, (value, position) in enumerate(occurrences): 370 occurrences[k] = _ParseResultsWithOffset( 371 value, position + (position > index) 372 ) 373 374 def append(self, item): 375 """ 376 Add single element to end of ``ParseResults`` list of elements. 377 378 Example:: 379 380 numlist = Word(nums)[...] 381 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321'] 382 383 # use a parse action to compute the sum of the parsed integers, and add it to the end 384 def append_sum(tokens): 385 tokens.append(sum(map(int, tokens))) 386 numlist.add_parse_action(append_sum) 387 print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444] 388 """ 389 self._toklist.append(item) 390 391 def extend(self, itemseq): 392 """ 393 Add sequence of elements to end of ``ParseResults`` list of elements. 394 395 Example:: 396 397 patt = OneOrMore(Word(alphas)) 398 399 # use a parse action to append the reverse of the matched strings, to make a palindrome 400 def make_palindrome(tokens): 401 tokens.extend(reversed([t[::-1] for t in tokens])) 402 return ''.join(tokens) 403 patt.add_parse_action(make_palindrome) 404 print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 405 """ 406 if isinstance(itemseq, ParseResults): 407 self.__iadd__(itemseq) 408 else: 409 self._toklist.extend(itemseq) 410 411 def clear(self): 412 """ 413 Clear all elements and results names. 414 """ 415 del self._toklist[:] 416 self._tokdict.clear() 417 418 def __getattr__(self, name): 419 try: 420 return self[name] 421 except KeyError: 422 if name.startswith("__"): 423 raise AttributeError(name) 424 return "" 425 426 def __add__(self, other) -> "ParseResults": 427 ret = self.copy() 428 ret += other 429 return ret 430 431 def __iadd__(self, other) -> "ParseResults": 432 if other._tokdict: 433 offset = len(self._toklist) 434 addoffset = lambda a: offset if a < 0 else a + offset 435 otheritems = other._tokdict.items() 436 otherdictitems = [ 437 (k, _ParseResultsWithOffset(v[0], addoffset(v[1]))) 438 for k, vlist in otheritems 439 for v in vlist 440 ] 441 for k, v in otherdictitems: 442 self[k] = v 443 if isinstance(v[0], ParseResults): 444 v[0]._parent = wkref(self) 445 446 self._toklist += other._toklist 447 self._all_names |= other._all_names 448 return self 449 450 def __radd__(self, other) -> "ParseResults": 451 if isinstance(other, int) and other == 0: 452 # useful for merging many ParseResults using sum() builtin 453 return self.copy() 454 else: 455 # this may raise a TypeError - so be it 456 return other + self 457 458 def __repr__(self) -> str: 459 return "{}({!r}, {})".format(type(self).__name__, self._toklist, self.as_dict()) 460 461 def __str__(self) -> str: 462 return ( 463 "[" 464 + ", ".join( 465 str(i) if isinstance(i, ParseResults) else repr(i) 466 for i in self._toklist 467 ) 468 + "]" 469 ) 470 471 def _asStringList(self, sep=""): 472 out = [] 473 for item in self._toklist: 474 if out and sep: 475 out.append(sep) 476 if isinstance(item, ParseResults): 477 out += item._asStringList() 478 else: 479 out.append(str(item)) 480 return out 481 482 def as_list(self) -> list: 483 """ 484 Returns the parse results as a nested list of matching tokens, all converted to strings. 485 486 Example:: 487 488 patt = OneOrMore(Word(alphas)) 489 result = patt.parse_string("sldkj lsdkj sldkj") 490 # even though the result prints in string-like form, it is actually a pyparsing ParseResults 491 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 492 493 # Use as_list() to create an actual list 494 result_list = result.as_list() 495 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 496 """ 497 return [ 498 res.as_list() if isinstance(res, ParseResults) else res 499 for res in self._toklist 500 ] 501 502 def as_dict(self) -> dict: 503 """ 504 Returns the named parse results as a nested dictionary. 505 506 Example:: 507 508 integer = Word(nums) 509 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 510 511 result = date_str.parse_string('12/31/1999') 512 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 513 514 result_dict = result.as_dict() 515 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 516 517 # even though a ParseResults supports dict-like access, sometime you just need to have a dict 518 import json 519 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 520 print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"} 521 """ 522 523 def to_item(obj): 524 if isinstance(obj, ParseResults): 525 return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj] 526 else: 527 return obj 528 529 return dict((k, to_item(v)) for k, v in self.items()) 530 531 def copy(self) -> "ParseResults": 532 """ 533 Returns a new copy of a :class:`ParseResults` object. 534 """ 535 ret = ParseResults(self._toklist) 536 ret._tokdict = self._tokdict.copy() 537 ret._parent = self._parent 538 ret._all_names |= self._all_names 539 ret._name = self._name 540 return ret 541 542 def get_name(self): 543 r""" 544 Returns the results name for this token expression. Useful when several 545 different expressions might match at a particular location. 546 547 Example:: 548 549 integer = Word(nums) 550 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 551 house_number_expr = Suppress('#') + Word(nums, alphanums) 552 user_data = (Group(house_number_expr)("house_number") 553 | Group(ssn_expr)("ssn") 554 | Group(integer)("age")) 555 user_info = OneOrMore(user_data) 556 557 result = user_info.parse_string("22 111-22-3333 #221B") 558 for item in result: 559 print(item.get_name(), ':', item[0]) 560 561 prints:: 562 563 age : 22 564 ssn : 111-22-3333 565 house_number : 221B 566 """ 567 if self._name: 568 return self._name 569 elif self._parent: 570 par = self._parent() 571 572 def find_in_parent(sub): 573 return next( 574 ( 575 k 576 for k, vlist in par._tokdict.items() 577 for v, loc in vlist 578 if sub is v 579 ), 580 None, 581 ) 582 583 return find_in_parent(self) if par else None 584 elif ( 585 len(self) == 1 586 and len(self._tokdict) == 1 587 and next(iter(self._tokdict.values()))[0][1] in (0, -1) 588 ): 589 return next(iter(self._tokdict.keys())) 590 else: 591 return None 592 593 def dump(self, indent="", full=True, include_list=True, _depth=0) -> str: 594 """ 595 Diagnostic method for listing out the contents of 596 a :class:`ParseResults`. Accepts an optional ``indent`` argument so 597 that this string can be embedded in a nested display of other data. 598 599 Example:: 600 601 integer = Word(nums) 602 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 603 604 result = date_str.parse_string('12/31/1999') 605 print(result.dump()) 606 607 prints:: 608 609 ['12', '/', '31', '/', '1999'] 610 - day: 1999 611 - month: 31 612 - year: 12 613 """ 614 out = [] 615 NL = "\n" 616 out.append(indent + str(self.as_list()) if include_list else "") 617 618 if full: 619 if self.haskeys(): 620 items = sorted((str(k), v) for k, v in self.items()) 621 for k, v in items: 622 if out: 623 out.append(NL) 624 out.append("{}{}- {}: ".format(indent, (" " * _depth), k)) 625 if isinstance(v, ParseResults): 626 if v: 627 out.append( 628 v.dump( 629 indent=indent, 630 full=full, 631 include_list=include_list, 632 _depth=_depth + 1, 633 ) 634 ) 635 else: 636 out.append(str(v)) 637 else: 638 out.append(repr(v)) 639 if any(isinstance(vv, ParseResults) for vv in self): 640 v = self 641 for i, vv in enumerate(v): 642 if isinstance(vv, ParseResults): 643 out.append( 644 "\n{}{}[{}]:\n{}{}{}".format( 645 indent, 646 (" " * (_depth)), 647 i, 648 indent, 649 (" " * (_depth + 1)), 650 vv.dump( 651 indent=indent, 652 full=full, 653 include_list=include_list, 654 _depth=_depth + 1, 655 ), 656 ) 657 ) 658 else: 659 out.append( 660 "\n%s%s[%d]:\n%s%s%s" 661 % ( 662 indent, 663 (" " * (_depth)), 664 i, 665 indent, 666 (" " * (_depth + 1)), 667 str(vv), 668 ) 669 ) 670 671 return "".join(out) 672 673 def pprint(self, *args, **kwargs): 674 """ 675 Pretty-printer for parsed results as a list, using the 676 `pprint <https://docs.python.org/3/library/pprint.html>`_ module. 677 Accepts additional positional or keyword args as defined for 678 `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ . 679 680 Example:: 681 682 ident = Word(alphas, alphanums) 683 num = Word(nums) 684 func = Forward() 685 term = ident | num | Group('(' + func + ')') 686 func <<= ident + Group(Optional(delimited_list(term))) 687 result = func.parse_string("fna a,b,(fnb c,d,200),100") 688 result.pprint(width=40) 689 690 prints:: 691 692 ['fna', 693 ['a', 694 'b', 695 ['(', 'fnb', ['c', 'd', '200'], ')'], 696 '100']] 697 """ 698 pprint.pprint(self.as_list(), *args, **kwargs) 699 700 # add support for pickle protocol 701 def __getstate__(self): 702 return ( 703 self._toklist, 704 ( 705 self._tokdict.copy(), 706 self._parent is not None and self._parent() or None, 707 self._all_names, 708 self._name, 709 ), 710 ) 711 712 def __setstate__(self, state): 713 self._toklist, (self._tokdict, par, inAccumNames, self._name) = state 714 self._all_names = set(inAccumNames) 715 if par is not None: 716 self._parent = wkref(par) 717 else: 718 self._parent = None 719 720 def __getnewargs__(self): 721 return self._toklist, self._name 722 723 def __dir__(self): 724 return dir(type(self)) + list(self.keys()) 725 726 @classmethod 727 def from_dict(cls, other, name=None) -> "ParseResults": 728 """ 729 Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the 730 name-value relations as results names. If an optional ``name`` argument is 731 given, a nested ``ParseResults`` will be returned. 732 """ 733 734 def is_iterable(obj): 735 try: 736 iter(obj) 737 except Exception: 738 return False 739 else: 740 return not isinstance(obj, str_type) 741 742 ret = cls([]) 743 for k, v in other.items(): 744 if isinstance(v, Mapping): 745 ret += cls.from_dict(v, name=k) 746 else: 747 ret += cls([v], name=k, asList=is_iterable(v)) 748 if name is not None: 749 ret = cls([ret], name=name) 750 return ret 751 752 asList = as_list 753 asDict = as_dict 754 getName = get_name 755 756 757MutableMapping.register(ParseResults) 758MutableSequence.register(ParseResults) 759