1"""Utilities for diffing test files and their output""" 2 3import codecs 4import difflib 5import re 6 7from cram._encoding import b 8 9__all__ = ['esc', 'glob', 'regex', 'unified_diff'] 10 11def _regex(pattern, s): 12 """Match a regular expression or return False if invalid. 13 14 >>> from cram._encoding import b 15 >>> [bool(_regex(r, b('foobar'))) for r in (b('foo.*'), b('***'))] 16 [True, False] 17 """ 18 try: 19 return re.match(pattern + b(r'\Z'), s) 20 except re.error: 21 return False 22 23def _glob(el, l): 24 r"""Match a glob-like pattern. 25 26 The only supported special characters are * and ?. Escaping is 27 supported. 28 29 >>> from cram._encoding import b 30 >>> bool(_glob(b(r'\* \\ \? fo?b*'), b('* \\ ? foobar'))) 31 True 32 """ 33 i, n = 0, len(el) 34 res = b('') 35 while i < n: 36 c = el[i:i + 1] 37 i += 1 38 if c == b('\\') and el[i] in b('*?\\'): 39 res += el[i - 1:i + 1] 40 i += 1 41 elif c == b('*'): 42 res += b('.*') 43 elif c == b('?'): 44 res += b('.') 45 else: 46 res += re.escape(c) 47 return _regex(res, l) 48 49def _matchannotation(keyword, matchfunc, el, l): 50 """Apply match function based on annotation keyword""" 51 ann = b(' (%s)\n' % keyword) 52 return el.endswith(ann) and matchfunc(el[:-len(ann)], l[:-1]) 53 54def regex(el, l): 55 """Apply a regular expression match to a line annotated with '(re)'""" 56 return _matchannotation('re', _regex, el, l) 57 58def glob(el, l): 59 """Apply a glob match to a line annotated with '(glob)'""" 60 return _matchannotation('glob', _glob, el, l) 61 62def esc(el, l): 63 """Apply an escape match to a line annotated with '(esc)'""" 64 ann = b(' (esc)\n') 65 66 if el.endswith(ann): 67 el = codecs.escape_decode(el[:-len(ann)])[0] + b('\n') 68 if el == l: 69 return True 70 71 if l.endswith(ann): 72 l = codecs.escape_decode(l[:-len(ann)])[0] + b('\n') 73 return el == l 74 75class _SequenceMatcher(difflib.SequenceMatcher, object): 76 """Like difflib.SequenceMatcher, but supports custom match functions""" 77 def __init__(self, *args, **kwargs): 78 self._matchers = kwargs.pop('matchers', []) 79 super(_SequenceMatcher, self).__init__(*args, **kwargs) 80 81 def _match(self, el, l): 82 """Tests for matching lines using custom matchers""" 83 for matcher in self._matchers: 84 if matcher(el, l): 85 return True 86 return False 87 88 def find_longest_match(self, alo, ahi, blo, bhi): 89 """Find longest matching block in a[alo:ahi] and b[blo:bhi]""" 90 # SequenceMatcher uses find_longest_match() to slowly whittle down 91 # the differences between a and b until it has each matching block. 92 # Because of this, we can end up doing the same matches many times. 93 matches = [] 94 for n, (el, line) in enumerate(zip(self.a[alo:ahi], self.b[blo:bhi])): 95 if el != line and self._match(el, line): 96 # This fools the superclass's method into thinking that the 97 # regex/glob in a is identical to b by replacing a's line (the 98 # expected output) with b's line (the actual output). 99 self.a[alo + n] = line 100 matches.append((n, el)) 101 ret = super(_SequenceMatcher, self).find_longest_match(alo, ahi, 102 blo, bhi) 103 # Restore the lines replaced above. Otherwise, the diff output 104 # would seem to imply that the tests never had any regexes/globs. 105 for n, el in matches: 106 self.a[alo + n] = el 107 return ret 108 109def unified_diff(l1, l2, fromfile=b(''), tofile=b(''), fromfiledate=b(''), 110 tofiledate=b(''), n=3, lineterm=b('\n'), matchers=None): 111 r"""Compare two sequences of lines; generate the delta as a unified diff. 112 113 This is like difflib.unified_diff(), but allows custom matchers. 114 115 >>> from cram._encoding import b 116 >>> l1 = [b('a\n'), b('? (glob)\n')] 117 >>> l2 = [b('a\n'), b('b\n')] 118 >>> (list(unified_diff(l1, l2, b('f1'), b('f2'), b('1970-01-01'), 119 ... b('1970-01-02'))) == 120 ... [b('--- f1\t1970-01-01\n'), b('+++ f2\t1970-01-02\n'), 121 ... b('@@ -1,2 +1,2 @@\n'), b(' a\n'), b('-? (glob)\n'), b('+b\n')]) 122 True 123 124 >>> from cram._diff import glob 125 >>> list(unified_diff(l1, l2, matchers=[glob])) 126 [] 127 """ 128 if matchers is None: 129 matchers = [] 130 started = False 131 matcher = _SequenceMatcher(None, l1, l2, matchers=matchers) 132 for group in matcher.get_grouped_opcodes(n): 133 if not started: 134 if fromfiledate: 135 fromdate = b('\t') + fromfiledate 136 else: 137 fromdate = b('') 138 if tofiledate: 139 todate = b('\t') + tofiledate 140 else: 141 todate = b('') 142 yield b('--- ') + fromfile + fromdate + lineterm 143 yield b('+++ ') + tofile + todate + lineterm 144 started = True 145 i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4] 146 yield (b("@@ -%d,%d +%d,%d @@" % (i1 + 1, i2 - i1, j1 + 1, j2 - j1)) + 147 lineterm) 148 for tag, i1, i2, j1, j2 in group: 149 if tag == 'equal': 150 for line in l1[i1:i2]: 151 yield b(' ') + line 152 continue 153 if tag == 'replace' or tag == 'delete': 154 for line in l1[i1:i2]: 155 yield b('-') + line 156 if tag == 'replace' or tag == 'insert': 157 for line in l2[j1:j2]: 158 yield b('+') + line 159