1"""Diff parsing functions and classes.""" 2from __future__ import (absolute_import, division, print_function) 3__metaclass__ = type 4 5import re 6import textwrap 7import traceback 8 9from . import types as t 10 11from .util import ( 12 ApplicationError, 13) 14 15 16def parse_diff(lines): 17 """ 18 :type lines: list[str] 19 :rtype: list[FileDiff] 20 """ 21 return DiffParser(lines).files 22 23 24class FileDiff: 25 """Parsed diff for a single file.""" 26 def __init__(self, old_path, new_path): 27 """ 28 :type old_path: str 29 :type new_path: str 30 """ 31 self.old = DiffSide(old_path, new=False) 32 self.new = DiffSide(new_path, new=True) 33 self.headers = [] # type: t.List[str] 34 self.binary = False 35 36 def append_header(self, line): 37 """ 38 :type line: str 39 """ 40 self.headers.append(line) 41 42 @property 43 def is_complete(self): 44 """ 45 :rtype: bool 46 """ 47 return self.old.is_complete and self.new.is_complete 48 49 50class DiffSide: 51 """Parsed diff for a single 'side' of a single file.""" 52 def __init__(self, path, new): 53 """ 54 :type path: str 55 :type new: bool 56 """ 57 self.path = path 58 self.new = new 59 self.prefix = '+' if self.new else '-' 60 self.eof_newline = True 61 self.exists = True 62 63 self.lines = [] # type: t.List[t.Tuple[int, str]] 64 self.lines_and_context = [] # type: t.List[t.Tuple[int, str]] 65 self.ranges = [] # type: t.List[t.Tuple[int, int]] 66 67 self._next_line_number = 0 68 self._lines_remaining = 0 69 self._range_start = 0 70 71 def set_start(self, line_start, line_count): 72 """ 73 :type line_start: int 74 :type line_count: int 75 """ 76 self._next_line_number = line_start 77 self._lines_remaining = line_count 78 self._range_start = 0 79 80 def append(self, line): 81 """ 82 :type line: str 83 """ 84 if self._lines_remaining <= 0: 85 raise Exception('Diff range overflow.') 86 87 entry = self._next_line_number, line 88 89 if line.startswith(' '): 90 pass 91 elif line.startswith(self.prefix): 92 self.lines.append(entry) 93 94 if not self._range_start: 95 self._range_start = self._next_line_number 96 else: 97 raise Exception('Unexpected diff content prefix.') 98 99 self.lines_and_context.append(entry) 100 101 self._lines_remaining -= 1 102 103 if self._range_start: 104 if self.is_complete: 105 range_end = self._next_line_number 106 elif line.startswith(' '): 107 range_end = self._next_line_number - 1 108 else: 109 range_end = 0 110 111 if range_end: 112 self.ranges.append((self._range_start, range_end)) 113 self._range_start = 0 114 115 self._next_line_number += 1 116 117 @property 118 def is_complete(self): 119 """ 120 :rtype: bool 121 """ 122 return self._lines_remaining == 0 123 124 def format_lines(self, context=True): 125 """ 126 :type context: bool 127 :rtype: list[str] 128 """ 129 if context: 130 lines = self.lines_and_context 131 else: 132 lines = self.lines 133 134 return ['%s:%4d %s' % (self.path, line[0], line[1]) for line in lines] 135 136 137class DiffParser: 138 """Parse diff lines.""" 139 def __init__(self, lines): 140 """ 141 :type lines: list[str] 142 """ 143 self.lines = lines 144 self.files = [] # type: t.List[FileDiff] 145 146 self.action = self.process_start 147 self.line_number = 0 148 self.previous_line = None # type: t.Optional[str] 149 self.line = None # type: t.Optional[str] 150 self.file = None # type: t.Optional[FileDiff] 151 152 for self.line in self.lines: 153 self.line_number += 1 154 155 try: 156 self.action() 157 except Exception as ex: 158 message = textwrap.dedent(''' 159 %s 160 161 Line: %d 162 Previous: %s 163 Current: %s 164 %s 165 ''').strip() % ( 166 ex, 167 self.line_number, 168 self.previous_line or '', 169 self.line or '', 170 traceback.format_exc(), 171 ) 172 173 raise ApplicationError(message.strip()) 174 175 self.previous_line = self.line 176 177 self.complete_file() 178 179 def process_start(self): 180 """Process a diff start line.""" 181 self.complete_file() 182 183 match = re.search(r'^diff --git "?a/(?P<old_path>.*)"? "?b/(?P<new_path>.*)"?$', self.line) 184 185 if not match: 186 raise Exception('Unexpected diff start line.') 187 188 self.file = FileDiff(match.group('old_path'), match.group('new_path')) 189 self.action = self.process_continue 190 191 def process_range(self): 192 """Process a diff range line.""" 193 match = re.search(r'^@@ -((?P<old_start>[0-9]+),)?(?P<old_count>[0-9]+) \+((?P<new_start>[0-9]+),)?(?P<new_count>[0-9]+) @@', self.line) 194 195 if not match: 196 raise Exception('Unexpected diff range line.') 197 198 self.file.old.set_start(int(match.group('old_start') or 1), int(match.group('old_count'))) 199 self.file.new.set_start(int(match.group('new_start') or 1), int(match.group('new_count'))) 200 self.action = self.process_content 201 202 def process_continue(self): 203 """Process a diff start, range or header line.""" 204 if self.line.startswith('diff '): 205 self.process_start() 206 elif self.line.startswith('@@ '): 207 self.process_range() 208 else: 209 self.process_header() 210 211 def process_header(self): 212 """Process a diff header line.""" 213 if self.line.startswith('Binary files '): 214 self.file.binary = True 215 elif self.line == '--- /dev/null': 216 self.file.old.exists = False 217 elif self.line == '+++ /dev/null': 218 self.file.new.exists = False 219 else: 220 self.file.append_header(self.line) 221 222 def process_content(self): 223 """Process a diff content line.""" 224 if self.line == r'\ No newline at end of file': 225 if self.previous_line.startswith(' '): 226 self.file.old.eof_newline = False 227 self.file.new.eof_newline = False 228 elif self.previous_line.startswith('-'): 229 self.file.old.eof_newline = False 230 elif self.previous_line.startswith('+'): 231 self.file.new.eof_newline = False 232 else: 233 raise Exception('Unexpected previous diff content line.') 234 235 return 236 237 if self.file.is_complete: 238 self.process_continue() 239 return 240 241 if self.line.startswith(' '): 242 self.file.old.append(self.line) 243 self.file.new.append(self.line) 244 elif self.line.startswith('-'): 245 self.file.old.append(self.line) 246 elif self.line.startswith('+'): 247 self.file.new.append(self.line) 248 else: 249 raise Exception('Unexpected diff content line.') 250 251 def complete_file(self): 252 """Complete processing of the current file, if any.""" 253 if not self.file: 254 return 255 256 self.files.append(self.file) 257