1# 2# simple_unit_tests.py 3# 4# While these unit tests *do* perform low-level unit testing of the classes in pyparsing, 5# this testing module should also serve an instructional purpose, to clearly show simple passing 6# and failing parse cases of some basic pyparsing expressions. 7# 8# Copyright (c) 2018 Paul T. McGuire 9# 10from __future__ import division 11 12try: 13 import unittest2 as unittest 14except ImportError: 15 import unittest 16import pyparsing as pp 17from collections import namedtuple 18from datetime import datetime 19 20# Test spec data class for specifying simple pyparsing test cases 21PpTestSpec = namedtuple("PpTestSpec", "desc expr text parse_fn " 22 "expected_list expected_dict expected_fail_locn") 23PpTestSpec.__new__.__defaults__ = ('', pp.Empty(), '', 'parseString', None, None, None) 24 25 26class PyparsingExpressionTestCase(unittest.TestCase): 27 """ 28 Base pyparsing testing class to parse various pyparsing expressions against 29 given text strings. Subclasses must define a class attribute 'tests' which 30 is a list of PpTestSpec instances. 31 """ 32 33 if not hasattr(unittest.TestCase, 'subTest'): 34 # Python 2 compatibility 35 from contextlib import contextmanager 36 @contextmanager 37 def subTest(self, **params): 38 print('subTest:', params) 39 yield 40 41 tests = [] 42 def runTest(self): 43 if self.__class__ is PyparsingExpressionTestCase: 44 return 45 46 for test_spec in self.tests: 47 # for each spec in the class's tests list, create a subtest 48 # that will either: 49 # - parse the string with expected success, display the 50 # results, and validate the returned ParseResults 51 # - or parse the string with expected failure, display the 52 # error message and mark the error location, and validate 53 # the location against an expected value 54 with self.subTest(test_spec=test_spec): 55 test_spec.expr.streamline() 56 print("\n{0} - {1}({2})".format(test_spec.desc, 57 type(test_spec.expr).__name__, 58 test_spec.expr)) 59 60 parsefn = getattr(test_spec.expr, test_spec.parse_fn) 61 if test_spec.expected_fail_locn is None: 62 # expect success 63 result = parsefn(test_spec.text) 64 if test_spec.parse_fn == 'parseString': 65 print(result.dump()) 66 # compare results against given list and/or dict 67 if test_spec.expected_list is not None: 68 self.assertEqual(result.asList(), test_spec.expected_list) 69 if test_spec.expected_dict is not None: 70 self.assertEqual(result.asDict(), test_spec.expected_dict) 71 elif test_spec.parse_fn == 'transformString': 72 print(result) 73 # compare results against given list and/or dict 74 if test_spec.expected_list is not None: 75 self.assertEqual([result], test_spec.expected_list) 76 elif test_spec.parse_fn == 'searchString': 77 print(result) 78 # compare results against given list and/or dict 79 if test_spec.expected_list is not None: 80 self.assertEqual([result], test_spec.expected_list) 81 else: 82 # expect fail 83 try: 84 parsefn(test_spec.text) 85 except Exception as exc: 86 if not hasattr(exc, '__traceback__'): 87 # Python 2 compatibility 88 from sys import exc_info 89 etype, value, traceback = exc_info() 90 exc.__traceback__ = traceback 91 print(pp.ParseException.explain(exc)) 92 self.assertEqual(exc.loc, test_spec.expected_fail_locn) 93 else: 94 self.assertTrue(False, "failed to raise expected exception") 95 96 97# =========== TEST DEFINITIONS START HERE ============== 98 99class TestLiteral(PyparsingExpressionTestCase): 100 tests = [ 101 PpTestSpec( 102 desc = "Simple match", 103 expr = pp.Literal("xyz"), 104 text = "xyz", 105 expected_list = ["xyz"], 106 ), 107 PpTestSpec( 108 desc = "Simple match after skipping whitespace", 109 expr = pp.Literal("xyz"), 110 text = " xyz", 111 expected_list = ["xyz"], 112 ), 113 PpTestSpec( 114 desc = "Simple fail - parse an empty string", 115 expr = pp.Literal("xyz"), 116 text = "", 117 expected_fail_locn = 0, 118 ), 119 PpTestSpec( 120 desc = "Simple fail - parse a mismatching string", 121 expr = pp.Literal("xyz"), 122 text = "xyu", 123 expected_fail_locn = 0, 124 ), 125 PpTestSpec( 126 desc = "Simple fail - parse a partially matching string", 127 expr = pp.Literal("xyz"), 128 text = "xy", 129 expected_fail_locn = 0, 130 ), 131 PpTestSpec( 132 desc = "Fail - parse a partially matching string by matching individual letters", 133 expr = pp.Literal("x") + pp.Literal("y") + pp.Literal("z"), 134 text = "xy", 135 expected_fail_locn = 2, 136 ), 137 ] 138 139class TestCaselessLiteral(PyparsingExpressionTestCase): 140 tests = [ 141 PpTestSpec( 142 desc = "Match colors, converting to consistent case", 143 expr = (pp.CaselessLiteral("RED") 144 | pp.CaselessLiteral("GREEN") 145 | pp.CaselessLiteral("BLUE"))[...], 146 text = "red Green BluE blue GREEN green rEd", 147 expected_list = ['RED', 'GREEN', 'BLUE', 'BLUE', 'GREEN', 'GREEN', 'RED'], 148 ), 149 ] 150 151class TestWord(PyparsingExpressionTestCase): 152 tests = [ 153 PpTestSpec( 154 desc = "Simple Word match", 155 expr = pp.Word("xy"), 156 text = "xxyxxyy", 157 expected_list = ["xxyxxyy"], 158 ), 159 PpTestSpec( 160 desc = "Simple Word match of two separate Words", 161 expr = pp.Word("x") + pp.Word("y"), 162 text = "xxxxxyy", 163 expected_list = ["xxxxx", "yy"], 164 ), 165 PpTestSpec( 166 desc = "Simple Word match of two separate Words - implicitly skips whitespace", 167 expr = pp.Word("x") + pp.Word("y"), 168 text = "xxxxx yy", 169 expected_list = ["xxxxx", "yy"], 170 ), 171 ] 172 173class TestCombine(PyparsingExpressionTestCase): 174 tests = [ 175 PpTestSpec( 176 desc="Parsing real numbers - fail, parsed numbers are in pieces", 177 expr=(pp.Word(pp.nums) + '.' + pp.Word(pp.nums))[...], 178 text="1.2 2.3 3.1416 98.6", 179 expected_list=['1', '.', '2', '2', '.', '3', '3', '.', '1416', '98', '.', '6'], 180 ), 181 PpTestSpec( 182 desc="Parsing real numbers - better, use Combine to combine multiple tokens into one", 183 expr=pp.Combine(pp.Word(pp.nums) + '.' + pp.Word(pp.nums))[...], 184 text="1.2 2.3 3.1416 98.6", 185 expected_list=['1.2', '2.3', '3.1416', '98.6'], 186 ), 187 ] 188 189class TestRepetition(PyparsingExpressionTestCase): 190 tests = [ 191 PpTestSpec( 192 desc = "Match several words", 193 expr = (pp.Word("x") | pp.Word("y"))[...], 194 text = "xxyxxyyxxyxyxxxy", 195 expected_list = ['xx', 'y', 'xx', 'yy', 'xx', 'y', 'x', 'y', 'xxx', 'y'], 196 ), 197 PpTestSpec( 198 desc = "Match several words, skipping whitespace", 199 expr = (pp.Word("x") | pp.Word("y"))[...], 200 text = "x x y xxy yxx y xyx xxy", 201 expected_list = ['x', 'x', 'y', 'xx', 'y', 'y', 'xx', 'y', 'x', 'y', 'x', 'xx', 'y'], 202 ), 203 PpTestSpec( 204 desc = "Match several words, skipping whitespace (old style)", 205 expr = pp.OneOrMore(pp.Word("x") | pp.Word("y")), 206 text = "x x y xxy yxx y xyx xxy", 207 expected_list = ['x', 'x', 'y', 'xx', 'y', 'y', 'xx', 'y', 'x', 'y', 'x', 'xx', 'y'], 208 ), 209 PpTestSpec( 210 desc = "Match words and numbers - show use of results names to collect types of tokens", 211 expr = (pp.Word(pp.alphas)("alpha*") 212 | pp.pyparsing_common.integer("int*"))[...], 213 text = "sdlfj23084ksdfs08234kjsdlfkjd0934", 214 expected_list = ['sdlfj', 23084, 'ksdfs', 8234, 'kjsdlfkjd', 934], 215 expected_dict = { 'alpha': ['sdlfj', 'ksdfs', 'kjsdlfkjd'], 'int': [23084, 8234, 934] } 216 ), 217 PpTestSpec( 218 desc = "Using delimitedList (comma is the default delimiter)", 219 expr = pp.delimitedList(pp.Word(pp.alphas)), 220 text = "xxyx,xy,y,xxyx,yxx, xy", 221 expected_list = ['xxyx', 'xy', 'y', 'xxyx', 'yxx', 'xy'], 222 ), 223 PpTestSpec( 224 desc = "Using delimitedList, with ':' delimiter", 225 expr = pp.delimitedList(pp.Word(pp.hexnums, exact=2), delim=':', combine=True), 226 text = "0A:4B:73:21:FE:76", 227 expected_list = ['0A:4B:73:21:FE:76'], 228 ), 229 ] 230 231class TestResultsName(PyparsingExpressionTestCase): 232 tests = [ 233 PpTestSpec( 234 desc = "Match with results name", 235 expr = pp.Literal("xyz").setResultsName("value"), 236 text = "xyz", 237 expected_dict = {'value': 'xyz'}, 238 expected_list = ['xyz'], 239 ), 240 PpTestSpec( 241 desc = "Match with results name - using naming short-cut", 242 expr = pp.Literal("xyz")("value"), 243 text = "xyz", 244 expected_dict = {'value': 'xyz'}, 245 expected_list = ['xyz'], 246 ), 247 PpTestSpec( 248 desc = "Define multiple results names", 249 expr = pp.Word(pp.alphas, pp.alphanums)("key") + '=' + pp.pyparsing_common.integer("value"), 250 text = "range=5280", 251 expected_dict = {'key': 'range', 'value': 5280}, 252 expected_list = ['range', '=', 5280], 253 ), 254 ] 255 256class TestGroups(PyparsingExpressionTestCase): 257 EQ = pp.Suppress('=') 258 tests = [ 259 PpTestSpec( 260 desc = "Define multiple results names in groups", 261 expr = pp.Group(pp.Word(pp.alphas)("key") 262 + EQ 263 + pp.pyparsing_common.number("value"))[...], 264 text = "range=5280 long=-138.52 lat=46.91", 265 expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]], 266 ), 267 PpTestSpec( 268 desc = "Define multiple results names in groups - use Dict to define results names using parsed keys", 269 expr = pp.Dict(pp.Group(pp.Word(pp.alphas) 270 + EQ 271 + pp.pyparsing_common.number)[...]), 272 text = "range=5280 long=-138.52 lat=46.91", 273 expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]], 274 expected_dict = {'lat': 46.91, 'long': -138.52, 'range': 5280} 275 ), 276 PpTestSpec( 277 desc = "Define multiple value types", 278 expr = pp.Dict(pp.Group(pp.Word(pp.alphas) 279 + EQ 280 + (pp.pyparsing_common.number | pp.oneOf("True False") | pp.QuotedString("'")) 281 )[...] 282 ), 283 text = "long=-122.47 lat=37.82 public=True name='Golden Gate Bridge'", 284 expected_list = [['long', -122.47], ['lat', 37.82], ['public', 'True'], ['name', 'Golden Gate Bridge']], 285 expected_dict = {'long': -122.47, 'lat': 37.82, 'public': 'True', 'name': 'Golden Gate Bridge'} 286 ), 287 ] 288 289class TestParseAction(PyparsingExpressionTestCase): 290 tests = [ 291 PpTestSpec( 292 desc="Parsing real numbers - use parse action to convert to float at parse time", 293 expr=pp.Combine(pp.Word(pp.nums) + '.' + pp.Word(pp.nums)).addParseAction(lambda t: float(t[0]))[...], 294 text="1.2 2.3 3.1416 98.6", 295 expected_list= [1.2, 2.3, 3.1416, 98.6], # note, these are now floats, not strs 296 ), 297 PpTestSpec( 298 desc = "Match with numeric string converted to int", 299 expr = pp.Word("0123456789").addParseAction(lambda t: int(t[0])), 300 text = "12345", 301 expected_list = [12345], # note - result is type int, not str 302 ), 303 PpTestSpec( 304 desc = "Use two parse actions to convert numeric string, then convert to datetime", 305 expr = pp.Word(pp.nums).addParseAction(lambda t: int(t[0]), 306 lambda t: datetime.utcfromtimestamp(t[0])), 307 text = "1537415628", 308 expected_list = [datetime(2018, 9, 20, 3, 53, 48)], 309 ), 310 PpTestSpec( 311 desc = "Use tokenMap for parse actions that operate on a single-length token", 312 expr = pp.Word(pp.nums).addParseAction(pp.tokenMap(int), 313 pp.tokenMap(datetime.utcfromtimestamp)), 314 text = "1537415628", 315 expected_list = [datetime(2018, 9, 20, 3, 53, 48)], 316 ), 317 PpTestSpec( 318 desc = "Using a built-in function that takes a sequence of strs as a parse action", 319 expr = pp.Word(pp.hexnums, exact=2)[...].addParseAction(':'.join), 320 text = "0A4B7321FE76", 321 expected_list = ['0A:4B:73:21:FE:76'], 322 ), 323 PpTestSpec( 324 desc = "Using a built-in function that takes a sequence of strs as a parse action", 325 expr = pp.Word(pp.hexnums, exact=2)[...].addParseAction(sorted), 326 text = "0A4B7321FE76", 327 expected_list = ['0A', '21', '4B', '73', '76', 'FE'], 328 ), 329 ] 330 331class TestResultsModifyingParseAction(PyparsingExpressionTestCase): 332 def compute_stats_parse_action(t): 333 # by the time this parse action is called, parsed numeric words 334 # have been converted to ints by a previous parse action, so 335 # they can be treated as ints 336 t['sum'] = sum(t) 337 t['ave'] = sum(t) / len(t) 338 t['min'] = min(t) 339 t['max'] = max(t) 340 341 tests = [ 342 PpTestSpec( 343 desc = "A parse action that adds new key-values", 344 expr = pp.pyparsing_common.integer[...].addParseAction(compute_stats_parse_action), 345 text = "27 1 14 22 89", 346 expected_list = [27, 1, 14, 22, 89], 347 expected_dict = {'ave': 30.6, 'max': 89, 'min': 1, 'sum': 153} 348 ), 349 ] 350 351class TestRegex(PyparsingExpressionTestCase): 352 tests = [ 353 PpTestSpec( 354 desc="Parsing real numbers - using Regex instead of Combine", 355 expr=pp.Regex(r'\d+\.\d+').addParseAction(lambda t: float(t[0]))[...], 356 text="1.2 2.3 3.1416 98.6", 357 expected_list=[1.2, 2.3, 3.1416, 98.6], # note, these are now floats, not strs 358 ), 359 ] 360 361class TestParseCondition(PyparsingExpressionTestCase): 362 tests = [ 363 PpTestSpec( 364 desc = "Define a condition to only match numeric values that are multiples of 7", 365 expr = pp.Word(pp.nums).addCondition(lambda t: int(t[0]) % 7 == 0)[...], 366 text = "14 35 77 12 28", 367 expected_list = ['14', '35', '77'], 368 ), 369 PpTestSpec( 370 desc = "Separate conversion to int and condition into separate parse action/conditions", 371 expr = pp.Word(pp.nums).addParseAction(lambda t: int(t[0])) 372 .addCondition(lambda t: t[0] % 7 == 0)[...], 373 text = "14 35 77 12 28", 374 expected_list = [14, 35, 77], 375 ), 376 ] 377 378class TestTransformStringUsingParseActions(PyparsingExpressionTestCase): 379 markup_convert_map = { 380 '*' : 'B', 381 '_' : 'U', 382 '/' : 'I', 383 } 384 def markup_convert(t): 385 htmltag = TestTransformStringUsingParseActions.markup_convert_map[t.markup_symbol] 386 return "<{0}>{1}</{2}>".format(htmltag, t.body, htmltag) 387 388 tests = [ 389 PpTestSpec( 390 desc = "Use transformString to convert simple markup to HTML", 391 expr = (pp.oneOf(markup_convert_map)('markup_symbol') 392 + "(" + pp.CharsNotIn(")")('body') + ")").addParseAction(markup_convert), 393 text = "Show in *(bold), _(underscore), or /(italic) type", 394 expected_list = ['Show in <B>bold</B>, <U>underscore</U>, or <I>italic</I> type'], 395 parse_fn = 'transformString', 396 ), 397 ] 398 399class TestCommonHelperExpressions(PyparsingExpressionTestCase): 400 tests = [ 401 PpTestSpec( 402 desc = "A comma-delimited list of words", 403 expr = pp.delimitedList(pp.Word(pp.alphas)), 404 text = "this, that, blah,foo, bar", 405 expected_list = ['this', 'that', 'blah', 'foo', 'bar'], 406 ), 407 PpTestSpec( 408 desc = "A counted array of words", 409 expr = pp.countedArray(pp.Word('ab'))[...], 410 text = "2 aaa bbb 0 3 abab bbaa abbab", 411 expected_list = [['aaa', 'bbb'], [], ['abab', 'bbaa', 'abbab']], 412 ), 413 PpTestSpec( 414 desc = "skipping comments with ignore", 415 expr = (pp.pyparsing_common.identifier('lhs') 416 + '=' 417 + pp.pyparsing_common.fnumber('rhs')).ignore(pp.cppStyleComment), 418 text = "abc_100 = /* value to be tested */ 3.1416", 419 expected_list = ['abc_100', '=', 3.1416], 420 expected_dict = {'lhs': 'abc_100', 'rhs': 3.1416}, 421 ), 422 PpTestSpec( 423 desc = "some pre-defined expressions in pyparsing_common, and building a dotted identifier with delimted_list", 424 expr = (pp.pyparsing_common.number("id_num") 425 + pp.delimitedList(pp.pyparsing_common.identifier, '.', combine=True)("name") 426 + pp.pyparsing_common.ipv4_address("ip_address") 427 ), 428 text = "1001 www.google.com 192.168.10.199", 429 expected_list = [1001, 'www.google.com', '192.168.10.199'], 430 expected_dict = {'id_num': 1001, 'name': 'www.google.com', 'ip_address': '192.168.10.199'}, 431 ), 432 PpTestSpec( 433 desc = "using oneOf (shortcut for Literal('a') | Literal('b') | Literal('c'))", 434 expr = pp.oneOf("a b c")[...], 435 text = "a b a b b a c c a b b", 436 expected_list = ['a', 'b', 'a', 'b', 'b', 'a', 'c', 'c', 'a', 'b', 'b'], 437 ), 438 PpTestSpec( 439 desc = "parsing nested parentheses", 440 expr = pp.nestedExpr(), 441 text = "(a b (c) d (e f g ()))", 442 expected_list = [['a', 'b', ['c'], 'd', ['e', 'f', 'g', []]]], 443 ), 444 PpTestSpec( 445 desc = "parsing nested braces", 446 expr = (pp.Keyword('if') 447 + pp.nestedExpr()('condition') 448 + pp.nestedExpr('{', '}')('body')), 449 text = 'if ((x == y) || !z) {printf("{}");}', 450 expected_list = ['if', [['x', '==', 'y'], '||', '!z'], ['printf(', '"{}"', ');']], 451 expected_dict = {'condition': [[['x', '==', 'y'], '||', '!z']], 452 'body': [['printf(', '"{}"', ');']]}, 453 ), 454 ] 455 456 457def _get_decl_line_no(cls): 458 import inspect 459 return inspect.getsourcelines(cls)[1] 460 461 462# get all test case classes defined in this module and sort them by decl line no 463test_case_classes = list(PyparsingExpressionTestCase.__subclasses__()) 464test_case_classes.sort(key=_get_decl_line_no) 465 466# make into a suite and run it - this will run the tests in the same order 467# they are declared in this module 468# 469# runnable from setup.py using "python setup.py test -s simple_unit_tests.suite" 470# 471suite = unittest.TestSuite(cls() for cls in test_case_classes) 472 473 474# ============ MAIN ================ 475 476if __name__ == '__main__': 477 import sys 478 if sys.version_info[0] < 3: 479 print("simple_unit_tests.py requires Python 3.x - exiting...") 480 exit(0) 481 482 result = unittest.TextTestRunner().run(suite) 483 484 exit(0 if result.wasSuccessful() else 1) 485