1# actions.py 2 3from .exceptions import ParseException 4from .util import col 5 6 7class OnlyOnce: 8 """ 9 Wrapper for parse actions, to ensure they are only called once. 10 """ 11 12 def __init__(self, method_call): 13 from .core import _trim_arity 14 15 self.callable = _trim_arity(method_call) 16 self.called = False 17 18 def __call__(self, s, l, t): 19 if not self.called: 20 results = self.callable(s, l, t) 21 self.called = True 22 return results 23 raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset") 24 25 def reset(self): 26 """ 27 Allow the associated parse action to be called once more. 28 """ 29 30 self.called = False 31 32 33def match_only_at_col(n): 34 """ 35 Helper method for defining parse actions that require matching at 36 a specific column in the input text. 37 """ 38 39 def verify_col(strg, locn, toks): 40 if col(locn, strg) != n: 41 raise ParseException(strg, locn, "matched token not at column {}".format(n)) 42 43 return verify_col 44 45 46def replace_with(repl_str): 47 """ 48 Helper method for common parse actions that simply return 49 a literal value. Especially useful when used with 50 :class:`transform_string<ParserElement.transform_string>` (). 51 52 Example:: 53 54 num = Word(nums).set_parse_action(lambda toks: int(toks[0])) 55 na = one_of("N/A NA").set_parse_action(replace_with(math.nan)) 56 term = na | num 57 58 OneOrMore(term).parse_string("324 234 N/A 234") # -> [324, 234, nan, 234] 59 """ 60 return lambda s, l, t: [repl_str] 61 62 63def remove_quotes(s, l, t): 64 """ 65 Helper parse action for removing quotation marks from parsed 66 quoted strings. 67 68 Example:: 69 70 # by default, quotation marks are included in parsed results 71 quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] 72 73 # use remove_quotes to strip quotation marks from parsed results 74 quoted_string.set_parse_action(remove_quotes) 75 quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] 76 """ 77 return t[0][1:-1] 78 79 80def with_attribute(*args, **attr_dict): 81 """ 82 Helper to create a validating parse action to be used with start 83 tags created with :class:`make_xml_tags` or 84 :class:`make_html_tags`. Use ``with_attribute`` to qualify 85 a starting tag with a required attribute value, to avoid false 86 matches on common tags such as ``<TD>`` or ``<DIV>``. 87 88 Call ``with_attribute`` with a series of attribute names and 89 values. Specify the list of filter attributes names and values as: 90 91 - keyword arguments, as in ``(align="right")``, or 92 - as an explicit dict with ``**`` operator, when an attribute 93 name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}`` 94 - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))`` 95 96 For attribute names with a namespace prefix, you must use the second 97 form. Attribute names are matched insensitive to upper/lower case. 98 99 If just testing for ``class`` (with or without a namespace), use 100 :class:`with_class`. 101 102 To verify that the attribute exists, but without specifying a value, 103 pass ``with_attribute.ANY_VALUE`` as the value. 104 105 Example:: 106 107 html = ''' 108 <div> 109 Some text 110 <div type="grid">1 4 0 1 0</div> 111 <div type="graph">1,3 2,3 1,1</div> 112 <div>this has no type</div> 113 </div> 114 115 ''' 116 div,div_end = make_html_tags("div") 117 118 # only match div tag having a type attribute with value "grid" 119 div_grid = div().set_parse_action(with_attribute(type="grid")) 120 grid_expr = div_grid + SkipTo(div | div_end)("body") 121 for grid_header in grid_expr.search_string(html): 122 print(grid_header.body) 123 124 # construct a match with any div tag having a type attribute, regardless of the value 125 div_any_type = div().set_parse_action(with_attribute(type=with_attribute.ANY_VALUE)) 126 div_expr = div_any_type + SkipTo(div | div_end)("body") 127 for div_header in div_expr.search_string(html): 128 print(div_header.body) 129 130 prints:: 131 132 1 4 0 1 0 133 134 1 4 0 1 0 135 1,3 2,3 1,1 136 """ 137 if args: 138 attrs = args[:] 139 else: 140 attrs = attr_dict.items() 141 attrs = [(k, v) for k, v in attrs] 142 143 def pa(s, l, tokens): 144 for attrName, attrValue in attrs: 145 if attrName not in tokens: 146 raise ParseException(s, l, "no matching attribute " + attrName) 147 if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue: 148 raise ParseException( 149 s, 150 l, 151 "attribute {!r} has value {!r}, must be {!r}".format( 152 attrName, tokens[attrName], attrValue 153 ), 154 ) 155 156 return pa 157 158 159with_attribute.ANY_VALUE = object() 160 161 162def with_class(classname, namespace=""): 163 """ 164 Simplified version of :class:`with_attribute` when 165 matching on a div class - made difficult because ``class`` is 166 a reserved word in Python. 167 168 Example:: 169 170 html = ''' 171 <div> 172 Some text 173 <div class="grid">1 4 0 1 0</div> 174 <div class="graph">1,3 2,3 1,1</div> 175 <div>this <div> has no class</div> 176 </div> 177 178 ''' 179 div,div_end = make_html_tags("div") 180 div_grid = div().set_parse_action(with_class("grid")) 181 182 grid_expr = div_grid + SkipTo(div | div_end)("body") 183 for grid_header in grid_expr.search_string(html): 184 print(grid_header.body) 185 186 div_any_type = div().set_parse_action(with_class(withAttribute.ANY_VALUE)) 187 div_expr = div_any_type + SkipTo(div | div_end)("body") 188 for div_header in div_expr.search_string(html): 189 print(div_header.body) 190 191 prints:: 192 193 1 4 0 1 0 194 195 1 4 0 1 0 196 1,3 2,3 1,1 197 """ 198 classattr = "{}:class".format(namespace) if namespace else "class" 199 return with_attribute(**{classattr: classname}) 200 201 202# pre-PEP8 compatibility symbols 203replaceWith = replace_with 204removeQuotes = remove_quotes 205withAttribute = with_attribute 206withClass = with_class 207matchOnlyAtCol = match_only_at_col 208