1#
2# simple_unit_tests.py
3#
4# While these unit tests *do* perform low-level unit testing of the classes in pyparsing,
5# this testing module should also serve an instructional purpose, to clearly show simple passing
6# and failing parse cases of some basic pyparsing expressions.
7#
8# Copyright (c) 2018  Paul T. McGuire
9#
10from __future__ import division
11
12try:
13    import unittest2 as unittest
14except ImportError:
15    import unittest
16import pyparsing as pp
17from collections import namedtuple
18from datetime import datetime
19
20# Test spec data class for specifying simple pyparsing test cases
21PpTestSpec = namedtuple("PpTestSpec", "desc expr text parse_fn "
22                                      "expected_list expected_dict expected_fail_locn")
23PpTestSpec.__new__.__defaults__ = ('', pp.Empty(), '', 'parseString', None, None, None)
24
25
26class PyparsingExpressionTestCase(unittest.TestCase):
27    """
28    Base pyparsing testing class to parse various pyparsing expressions against
29    given text strings. Subclasses must define a class attribute 'tests' which
30    is a list of PpTestSpec instances.
31    """
32
33    if not hasattr(unittest.TestCase, 'subTest'):
34        # Python 2 compatibility
35        from contextlib import contextmanager
36        @contextmanager
37        def subTest(self, **params):
38            print('subTest:', params)
39            yield
40
41    tests = []
42    def runTest(self):
43        if self.__class__ is PyparsingExpressionTestCase:
44            return
45
46        for test_spec in self.tests:
47            # for each spec in the class's tests list, create a subtest
48            # that will either:
49            #  - parse the string with expected success, display the
50            #    results, and validate the returned ParseResults
51            #  - or parse the string with expected failure, display the
52            #    error message and mark the error location, and validate
53            #    the location against an expected value
54            with self.subTest(test_spec=test_spec):
55                test_spec.expr.streamline()
56                print("\n{0} - {1}({2})".format(test_spec.desc,
57                                                type(test_spec.expr).__name__,
58                                                test_spec.expr))
59
60                parsefn = getattr(test_spec.expr, test_spec.parse_fn)
61                if test_spec.expected_fail_locn is None:
62                    # expect success
63                    result = parsefn(test_spec.text)
64                    if test_spec.parse_fn == 'parseString':
65                        print(result.dump())
66                        # compare results against given list and/or dict
67                        if test_spec.expected_list is not None:
68                            self.assertEqual(result.asList(), test_spec.expected_list)
69                        if test_spec.expected_dict is not None:
70                            self.assertEqual(result.asDict(), test_spec.expected_dict)
71                    elif test_spec.parse_fn == 'transformString':
72                        print(result)
73                        # compare results against given list and/or dict
74                        if test_spec.expected_list is not None:
75                            self.assertEqual([result], test_spec.expected_list)
76                    elif test_spec.parse_fn == 'searchString':
77                        print(result)
78                        # compare results against given list and/or dict
79                        if test_spec.expected_list is not None:
80                            self.assertEqual([result], test_spec.expected_list)
81                else:
82                    # expect fail
83                    try:
84                        parsefn(test_spec.text)
85                    except Exception as exc:
86                        if not hasattr(exc, '__traceback__'):
87                            # Python 2 compatibility
88                            from sys import exc_info
89                            etype, value, traceback = exc_info()
90                            exc.__traceback__ = traceback
91                        print(pp.ParseException.explain(exc))
92                        self.assertEqual(exc.loc, test_spec.expected_fail_locn)
93                    else:
94                        self.assertTrue(False, "failed to raise expected exception")
95
96
97# =========== TEST DEFINITIONS START HERE ==============
98
99class TestLiteral(PyparsingExpressionTestCase):
100    tests = [
101        PpTestSpec(
102            desc = "Simple match",
103            expr = pp.Literal("xyz"),
104            text = "xyz",
105            expected_list = ["xyz"],
106        ),
107        PpTestSpec(
108            desc = "Simple match after skipping whitespace",
109            expr = pp.Literal("xyz"),
110            text = "  xyz",
111            expected_list = ["xyz"],
112        ),
113        PpTestSpec(
114            desc = "Simple fail - parse an empty string",
115            expr = pp.Literal("xyz"),
116            text = "",
117            expected_fail_locn = 0,
118        ),
119        PpTestSpec(
120            desc = "Simple fail - parse a mismatching string",
121            expr = pp.Literal("xyz"),
122            text = "xyu",
123            expected_fail_locn = 0,
124        ),
125        PpTestSpec(
126            desc = "Simple fail - parse a partially matching string",
127            expr = pp.Literal("xyz"),
128            text = "xy",
129            expected_fail_locn = 0,
130        ),
131        PpTestSpec(
132            desc = "Fail - parse a partially matching string by matching individual letters",
133            expr =  pp.Literal("x") + pp.Literal("y") + pp.Literal("z"),
134            text = "xy",
135            expected_fail_locn = 2,
136        ),
137    ]
138
139class TestCaselessLiteral(PyparsingExpressionTestCase):
140    tests = [
141        PpTestSpec(
142            desc = "Match colors, converting to consistent case",
143            expr = (pp.CaselessLiteral("RED")
144                    | pp.CaselessLiteral("GREEN")
145                    | pp.CaselessLiteral("BLUE"))[...],
146            text = "red Green BluE blue GREEN green rEd",
147            expected_list = ['RED', 'GREEN', 'BLUE', 'BLUE', 'GREEN', 'GREEN', 'RED'],
148        ),
149    ]
150
151class TestWord(PyparsingExpressionTestCase):
152    tests = [
153        PpTestSpec(
154            desc = "Simple Word match",
155            expr = pp.Word("xy"),
156            text = "xxyxxyy",
157            expected_list = ["xxyxxyy"],
158        ),
159        PpTestSpec(
160            desc = "Simple Word match of two separate Words",
161            expr = pp.Word("x") + pp.Word("y"),
162            text = "xxxxxyy",
163            expected_list = ["xxxxx", "yy"],
164        ),
165        PpTestSpec(
166            desc = "Simple Word match of two separate Words - implicitly skips whitespace",
167            expr = pp.Word("x") + pp.Word("y"),
168            text = "xxxxx yy",
169            expected_list = ["xxxxx", "yy"],
170        ),
171    ]
172
173class TestCombine(PyparsingExpressionTestCase):
174    tests = [
175        PpTestSpec(
176            desc="Parsing real numbers - fail, parsed numbers are in pieces",
177            expr=(pp.Word(pp.nums) + '.' + pp.Word(pp.nums))[...],
178            text="1.2 2.3 3.1416 98.6",
179            expected_list=['1', '.', '2', '2', '.', '3', '3', '.', '1416', '98', '.', '6'],
180        ),
181        PpTestSpec(
182            desc="Parsing real numbers - better, use Combine to combine multiple tokens into one",
183            expr=pp.Combine(pp.Word(pp.nums) + '.' + pp.Word(pp.nums))[...],
184            text="1.2 2.3 3.1416 98.6",
185            expected_list=['1.2', '2.3', '3.1416', '98.6'],
186        ),
187    ]
188
189class TestRepetition(PyparsingExpressionTestCase):
190    tests = [
191        PpTestSpec(
192            desc = "Match several words",
193            expr = (pp.Word("x") | pp.Word("y"))[...],
194            text = "xxyxxyyxxyxyxxxy",
195            expected_list = ['xx', 'y', 'xx', 'yy', 'xx', 'y', 'x', 'y', 'xxx', 'y'],
196        ),
197        PpTestSpec(
198            desc = "Match several words, skipping whitespace",
199            expr = (pp.Word("x") | pp.Word("y"))[...],
200            text = "x x  y xxy yxx y xyx  xxy",
201            expected_list = ['x', 'x', 'y', 'xx', 'y', 'y', 'xx', 'y', 'x', 'y', 'x', 'xx', 'y'],
202        ),
203        PpTestSpec(
204            desc = "Match several words, skipping whitespace (old style)",
205            expr = pp.OneOrMore(pp.Word("x") | pp.Word("y")),
206            text = "x x  y xxy yxx y xyx  xxy",
207            expected_list = ['x', 'x', 'y', 'xx', 'y', 'y', 'xx', 'y', 'x', 'y', 'x', 'xx', 'y'],
208        ),
209        PpTestSpec(
210            desc = "Match words and numbers - show use of results names to collect types of tokens",
211            expr = (pp.Word(pp.alphas)("alpha*")
212                    | pp.pyparsing_common.integer("int*"))[...],
213            text = "sdlfj23084ksdfs08234kjsdlfkjd0934",
214            expected_list = ['sdlfj', 23084, 'ksdfs', 8234, 'kjsdlfkjd', 934],
215            expected_dict = { 'alpha': ['sdlfj', 'ksdfs', 'kjsdlfkjd'], 'int': [23084, 8234, 934] }
216        ),
217        PpTestSpec(
218            desc = "Using delimitedList (comma is the default delimiter)",
219            expr = pp.delimitedList(pp.Word(pp.alphas)),
220            text = "xxyx,xy,y,xxyx,yxx, xy",
221            expected_list = ['xxyx', 'xy', 'y', 'xxyx', 'yxx', 'xy'],
222        ),
223        PpTestSpec(
224            desc = "Using delimitedList, with ':' delimiter",
225            expr = pp.delimitedList(pp.Word(pp.hexnums, exact=2), delim=':', combine=True),
226            text = "0A:4B:73:21:FE:76",
227            expected_list = ['0A:4B:73:21:FE:76'],
228        ),
229    ]
230
231class TestResultsName(PyparsingExpressionTestCase):
232    tests = [
233        PpTestSpec(
234            desc = "Match with results name",
235            expr = pp.Literal("xyz").setResultsName("value"),
236            text = "xyz",
237            expected_dict = {'value': 'xyz'},
238            expected_list = ['xyz'],
239        ),
240        PpTestSpec(
241            desc = "Match with results name - using naming short-cut",
242            expr = pp.Literal("xyz")("value"),
243            text = "xyz",
244            expected_dict = {'value': 'xyz'},
245            expected_list = ['xyz'],
246        ),
247        PpTestSpec(
248            desc = "Define multiple results names",
249            expr = pp.Word(pp.alphas, pp.alphanums)("key") + '=' + pp.pyparsing_common.integer("value"),
250            text = "range=5280",
251            expected_dict = {'key': 'range', 'value': 5280},
252            expected_list = ['range', '=', 5280],
253        ),
254    ]
255
256class TestGroups(PyparsingExpressionTestCase):
257    EQ = pp.Suppress('=')
258    tests = [
259        PpTestSpec(
260            desc = "Define multiple results names in groups",
261            expr = pp.Group(pp.Word(pp.alphas)("key")
262                            + EQ
263                            + pp.pyparsing_common.number("value"))[...],
264            text = "range=5280 long=-138.52 lat=46.91",
265            expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]],
266        ),
267        PpTestSpec(
268            desc = "Define multiple results names in groups - use Dict to define results names using parsed keys",
269            expr = pp.Dict(pp.Group(pp.Word(pp.alphas)
270                                    + EQ
271                                    + pp.pyparsing_common.number)[...]),
272            text = "range=5280 long=-138.52 lat=46.91",
273            expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]],
274            expected_dict = {'lat': 46.91, 'long': -138.52, 'range': 5280}
275        ),
276        PpTestSpec(
277            desc = "Define multiple value types",
278            expr = pp.Dict(pp.Group(pp.Word(pp.alphas)
279                                          + EQ
280                                          + (pp.pyparsing_common.number | pp.oneOf("True False") | pp.QuotedString("'"))
281                                        )[...]
282                           ),
283            text = "long=-122.47 lat=37.82 public=True name='Golden Gate Bridge'",
284            expected_list = [['long', -122.47], ['lat', 37.82], ['public', 'True'], ['name', 'Golden Gate Bridge']],
285            expected_dict = {'long': -122.47, 'lat': 37.82, 'public': 'True', 'name': 'Golden Gate Bridge'}
286        ),
287    ]
288
289class TestParseAction(PyparsingExpressionTestCase):
290    tests = [
291        PpTestSpec(
292            desc="Parsing real numbers - use parse action to convert to float at parse time",
293            expr=pp.Combine(pp.Word(pp.nums) + '.' + pp.Word(pp.nums)).addParseAction(lambda t: float(t[0]))[...],
294            text="1.2 2.3 3.1416 98.6",
295            expected_list= [1.2, 2.3, 3.1416, 98.6], # note, these are now floats, not strs
296        ),
297        PpTestSpec(
298            desc = "Match with numeric string converted to int",
299            expr = pp.Word("0123456789").addParseAction(lambda t: int(t[0])),
300            text = "12345",
301            expected_list = [12345],  # note - result is type int, not str
302        ),
303        PpTestSpec(
304            desc = "Use two parse actions to convert numeric string, then convert to datetime",
305            expr = pp.Word(pp.nums).addParseAction(lambda t: int(t[0]),
306                                                   lambda t: datetime.utcfromtimestamp(t[0])),
307            text = "1537415628",
308            expected_list = [datetime(2018, 9, 20, 3, 53, 48)],
309        ),
310        PpTestSpec(
311            desc = "Use tokenMap for parse actions that operate on a single-length token",
312            expr = pp.Word(pp.nums).addParseAction(pp.tokenMap(int),
313                                                   pp.tokenMap(datetime.utcfromtimestamp)),
314            text = "1537415628",
315            expected_list = [datetime(2018, 9, 20, 3, 53, 48)],
316        ),
317        PpTestSpec(
318            desc = "Using a built-in function that takes a sequence of strs as a parse action",
319            expr = pp.Word(pp.hexnums, exact=2)[...].addParseAction(':'.join),
320            text = "0A4B7321FE76",
321            expected_list = ['0A:4B:73:21:FE:76'],
322        ),
323        PpTestSpec(
324            desc = "Using a built-in function that takes a sequence of strs as a parse action",
325            expr = pp.Word(pp.hexnums, exact=2)[...].addParseAction(sorted),
326            text = "0A4B7321FE76",
327            expected_list = ['0A', '21', '4B', '73', '76', 'FE'],
328        ),
329    ]
330
331class TestResultsModifyingParseAction(PyparsingExpressionTestCase):
332    def compute_stats_parse_action(t):
333        # by the time this parse action is called, parsed numeric words
334        # have been converted to ints by a previous parse action, so
335        # they can be treated as ints
336        t['sum'] = sum(t)
337        t['ave'] = sum(t) / len(t)
338        t['min'] = min(t)
339        t['max'] = max(t)
340
341    tests = [
342        PpTestSpec(
343            desc = "A parse action that adds new key-values",
344            expr = pp.pyparsing_common.integer[...].addParseAction(compute_stats_parse_action),
345            text = "27 1 14 22 89",
346            expected_list = [27, 1, 14, 22, 89],
347            expected_dict = {'ave': 30.6, 'max': 89, 'min': 1, 'sum': 153}
348        ),
349    ]
350
351class TestRegex(PyparsingExpressionTestCase):
352    tests = [
353        PpTestSpec(
354            desc="Parsing real numbers - using Regex instead of Combine",
355            expr=pp.Regex(r'\d+\.\d+').addParseAction(lambda t: float(t[0]))[...],
356            text="1.2 2.3 3.1416 98.6",
357            expected_list=[1.2, 2.3, 3.1416, 98.6],  # note, these are now floats, not strs
358        ),
359    ]
360
361class TestParseCondition(PyparsingExpressionTestCase):
362    tests = [
363        PpTestSpec(
364            desc = "Define a condition to only match numeric values that are multiples of 7",
365            expr = pp.Word(pp.nums).addCondition(lambda t: int(t[0]) % 7 == 0)[...],
366            text = "14 35 77 12 28",
367            expected_list = ['14', '35', '77'],
368        ),
369        PpTestSpec(
370            desc = "Separate conversion to int and condition into separate parse action/conditions",
371            expr = pp.Word(pp.nums).addParseAction(lambda t: int(t[0]))
372                                   .addCondition(lambda t: t[0] % 7 == 0)[...],
373            text = "14 35 77 12 28",
374            expected_list = [14, 35, 77],
375        ),
376    ]
377
378class TestTransformStringUsingParseActions(PyparsingExpressionTestCase):
379    markup_convert_map = {
380        '*' : 'B',
381        '_' : 'U',
382        '/' : 'I',
383    }
384    def markup_convert(t):
385        htmltag = TestTransformStringUsingParseActions.markup_convert_map[t.markup_symbol]
386        return "<{0}>{1}</{2}>".format(htmltag, t.body, htmltag)
387
388    tests = [
389        PpTestSpec(
390            desc = "Use transformString to convert simple markup to HTML",
391            expr = (pp.oneOf(markup_convert_map)('markup_symbol')
392                    + "(" + pp.CharsNotIn(")")('body') + ")").addParseAction(markup_convert),
393            text = "Show in *(bold), _(underscore), or /(italic) type",
394            expected_list = ['Show in <B>bold</B>, <U>underscore</U>, or <I>italic</I> type'],
395            parse_fn = 'transformString',
396        ),
397    ]
398
399class TestCommonHelperExpressions(PyparsingExpressionTestCase):
400    tests = [
401        PpTestSpec(
402            desc = "A comma-delimited list of words",
403            expr = pp.delimitedList(pp.Word(pp.alphas)),
404            text = "this, that, blah,foo,   bar",
405            expected_list = ['this', 'that', 'blah', 'foo', 'bar'],
406        ),
407        PpTestSpec(
408            desc = "A counted array of words",
409            expr = pp.countedArray(pp.Word('ab'))[...],
410            text = "2 aaa bbb 0 3 abab bbaa abbab",
411            expected_list = [['aaa', 'bbb'], [], ['abab', 'bbaa', 'abbab']],
412        ),
413        PpTestSpec(
414            desc = "skipping comments with ignore",
415            expr = (pp.pyparsing_common.identifier('lhs')
416                    + '='
417                    + pp.pyparsing_common.fnumber('rhs')).ignore(pp.cppStyleComment),
418            text = "abc_100 = /* value to be tested */ 3.1416",
419            expected_list = ['abc_100', '=', 3.1416],
420            expected_dict = {'lhs': 'abc_100', 'rhs': 3.1416},
421        ),
422        PpTestSpec(
423            desc = "some pre-defined expressions in pyparsing_common, and building a dotted identifier with delimted_list",
424            expr = (pp.pyparsing_common.number("id_num")
425                    + pp.delimitedList(pp.pyparsing_common.identifier, '.', combine=True)("name")
426                    + pp.pyparsing_common.ipv4_address("ip_address")
427                    ),
428            text = "1001 www.google.com 192.168.10.199",
429            expected_list = [1001, 'www.google.com', '192.168.10.199'],
430            expected_dict = {'id_num': 1001, 'name': 'www.google.com', 'ip_address': '192.168.10.199'},
431        ),
432        PpTestSpec(
433            desc = "using oneOf (shortcut for Literal('a') | Literal('b') | Literal('c'))",
434            expr = pp.oneOf("a b c")[...],
435            text = "a b a b b a c c a b b",
436            expected_list = ['a', 'b', 'a', 'b', 'b', 'a', 'c', 'c', 'a', 'b', 'b'],
437        ),
438        PpTestSpec(
439            desc = "parsing nested parentheses",
440            expr = pp.nestedExpr(),
441            text = "(a b (c) d (e f g ()))",
442            expected_list = [['a', 'b', ['c'], 'd', ['e', 'f', 'g', []]]],
443        ),
444        PpTestSpec(
445            desc = "parsing nested braces",
446            expr = (pp.Keyword('if')
447                    + pp.nestedExpr()('condition')
448                    + pp.nestedExpr('{', '}')('body')),
449            text = 'if ((x == y) || !z) {printf("{}");}',
450            expected_list = ['if', [['x', '==', 'y'], '||', '!z'], ['printf(', '"{}"', ');']],
451            expected_dict = {'condition': [[['x', '==', 'y'], '||', '!z']],
452                             'body': [['printf(', '"{}"', ');']]},
453        ),
454    ]
455
456
457def _get_decl_line_no(cls):
458    import inspect
459    return inspect.getsourcelines(cls)[1]
460
461
462# get all test case classes defined in this module and sort them by decl line no
463test_case_classes = list(PyparsingExpressionTestCase.__subclasses__())
464test_case_classes.sort(key=_get_decl_line_no)
465
466# make into a suite and run it - this will run the tests in the same order
467# they are declared in this module
468#
469# runnable from setup.py using "python setup.py test -s simple_unit_tests.suite"
470#
471suite = unittest.TestSuite(cls() for cls in test_case_classes)
472
473
474# ============ MAIN ================
475
476if __name__ == '__main__':
477    import sys
478    if sys.version_info[0] < 3:
479        print("simple_unit_tests.py requires Python 3.x - exiting...")
480        exit(0)
481
482    result = unittest.TextTestRunner().run(suite)
483
484    exit(0 if result.wasSuccessful() else 1)
485