1"""Utilities for diffing test files and their output"""
2
3import codecs
4import difflib
5import re
6
7from cram._encoding import b
8
9__all__ = ['esc', 'glob', 'regex', 'unified_diff']
10
11def _regex(pattern, s):
12    """Match a regular expression or return False if invalid.
13
14    >>> from cram._encoding import b
15    >>> [bool(_regex(r, b('foobar'))) for r in (b('foo.*'), b('***'))]
16    [True, False]
17    """
18    try:
19        return re.match(pattern + b(r'\Z'), s)
20    except re.error:
21        return False
22
23def _glob(el, l):
24    r"""Match a glob-like pattern.
25
26    The only supported special characters are * and ?. Escaping is
27    supported.
28
29    >>> from cram._encoding import b
30    >>> bool(_glob(b(r'\* \\ \? fo?b*'), b('* \\ ? foobar')))
31    True
32    """
33    i, n = 0, len(el)
34    res = b('')
35    while i < n:
36        c = el[i:i + 1]
37        i += 1
38        if c == b('\\') and el[i] in b('*?\\'):
39            res += el[i - 1:i + 1]
40            i += 1
41        elif c == b('*'):
42            res += b('.*')
43        elif c == b('?'):
44            res += b('.')
45        else:
46            res += re.escape(c)
47    return _regex(res, l)
48
49def _matchannotation(keyword, matchfunc, el, l):
50    """Apply match function based on annotation keyword"""
51    ann = b(' (%s)\n' % keyword)
52    return el.endswith(ann) and matchfunc(el[:-len(ann)], l[:-1])
53
54def regex(el, l):
55    """Apply a regular expression match to a line annotated with '(re)'"""
56    return _matchannotation('re', _regex, el, l)
57
58def glob(el, l):
59    """Apply a glob match to a line annotated with '(glob)'"""
60    return _matchannotation('glob', _glob, el, l)
61
62def esc(el, l):
63    """Apply an escape match to a line annotated with '(esc)'"""
64    ann = b(' (esc)\n')
65
66    if el.endswith(ann):
67        el = codecs.escape_decode(el[:-len(ann)])[0] + b('\n')
68    if el == l:
69        return True
70
71    if l.endswith(ann):
72        l = codecs.escape_decode(l[:-len(ann)])[0] + b('\n')
73    return el == l
74
75class _SequenceMatcher(difflib.SequenceMatcher, object):
76    """Like difflib.SequenceMatcher, but supports custom match functions"""
77    def __init__(self, *args, **kwargs):
78        self._matchers = kwargs.pop('matchers', [])
79        super(_SequenceMatcher, self).__init__(*args, **kwargs)
80
81    def _match(self, el, l):
82        """Tests for matching lines using custom matchers"""
83        for matcher in self._matchers:
84            if matcher(el, l):
85                return True
86        return False
87
88    def find_longest_match(self, alo, ahi, blo, bhi):
89        """Find longest matching block in a[alo:ahi] and b[blo:bhi]"""
90        # SequenceMatcher uses find_longest_match() to slowly whittle down
91        # the differences between a and b until it has each matching block.
92        # Because of this, we can end up doing the same matches many times.
93        matches = []
94        for n, (el, line) in enumerate(zip(self.a[alo:ahi], self.b[blo:bhi])):
95            if el != line and self._match(el, line):
96                # This fools the superclass's method into thinking that the
97                # regex/glob in a is identical to b by replacing a's line (the
98                # expected output) with b's line (the actual output).
99                self.a[alo + n] = line
100                matches.append((n, el))
101        ret = super(_SequenceMatcher, self).find_longest_match(alo, ahi,
102                                                               blo, bhi)
103        # Restore the lines replaced above. Otherwise, the diff output
104        # would seem to imply that the tests never had any regexes/globs.
105        for n, el in matches:
106            self.a[alo + n] = el
107        return ret
108
109def unified_diff(l1, l2, fromfile=b(''), tofile=b(''), fromfiledate=b(''),
110                 tofiledate=b(''), n=3, lineterm=b('\n'), matchers=None):
111    r"""Compare two sequences of lines; generate the delta as a unified diff.
112
113    This is like difflib.unified_diff(), but allows custom matchers.
114
115    >>> from cram._encoding import b
116    >>> l1 = [b('a\n'), b('? (glob)\n')]
117    >>> l2 = [b('a\n'), b('b\n')]
118    >>> (list(unified_diff(l1, l2, b('f1'), b('f2'), b('1970-01-01'),
119    ...                    b('1970-01-02'))) ==
120    ...  [b('--- f1\t1970-01-01\n'), b('+++ f2\t1970-01-02\n'),
121    ...   b('@@ -1,2 +1,2 @@\n'), b(' a\n'), b('-? (glob)\n'), b('+b\n')])
122    True
123
124    >>> from cram._diff import glob
125    >>> list(unified_diff(l1, l2, matchers=[glob]))
126    []
127    """
128    if matchers is None:
129        matchers = []
130    started = False
131    matcher = _SequenceMatcher(None, l1, l2, matchers=matchers)
132    for group in matcher.get_grouped_opcodes(n):
133        if not started:
134            if fromfiledate:
135                fromdate = b('\t') + fromfiledate
136            else:
137                fromdate = b('')
138            if tofiledate:
139                todate = b('\t') + tofiledate
140            else:
141                todate = b('')
142            yield b('--- ') + fromfile + fromdate + lineterm
143            yield b('+++ ') + tofile + todate + lineterm
144            started = True
145        i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
146        yield (b("@@ -%d,%d +%d,%d @@" % (i1 + 1, i2 - i1, j1 + 1, j2 - j1)) +
147               lineterm)
148        for tag, i1, i2, j1, j2 in group:
149            if tag == 'equal':
150                for line in l1[i1:i2]:
151                    yield b(' ') + line
152                continue
153            if tag == 'replace' or tag == 'delete':
154                for line in l1[i1:i2]:
155                    yield b('-') + line
156            if tag == 'replace' or tag == 'insert':
157                for line in l2[j1:j2]:
158                    yield b('+') + line
159