1#!/usr/local/bin/python3.8 2# vim:fileencoding=utf-8 3# License: GPL v3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net> 4 5import concurrent.futures 6import os 7import shlex 8import shutil 9import subprocess 10from typing import Dict, Iterator, List, Optional, Sequence, Tuple, Union 11 12from . import global_data 13from .collect import lines_for_path 14from .diff_speedup import changed_center 15 16left_lines: Tuple[str, ...] = () 17right_lines: Tuple[str, ...] = () 18GIT_DIFF = 'git diff --no-color --no-ext-diff --exit-code -U_CONTEXT_ --no-index --' 19DIFF_DIFF = 'diff -p -U _CONTEXT_ --' 20worker_processes: List[int] = [] 21 22 23def find_differ() -> Optional[str]: 24 if shutil.which('git') and subprocess.Popen(['git', '--help'], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL).wait() == 0: 25 return GIT_DIFF 26 if shutil.which('diff'): 27 return DIFF_DIFF 28 29 30def set_diff_command(opt: str) -> None: 31 if opt == 'auto': 32 cmd = find_differ() 33 if cmd is None: 34 raise SystemExit('Failed to find either the git or diff programs on your system') 35 else: 36 cmd = opt 37 global_data.cmd = cmd 38 39 40def run_diff(file1: str, file2: str, context: int = 3) -> Tuple[bool, Union[int, bool], str]: 41 # returns: ok, is_different, patch 42 cmd = shlex.split(global_data.cmd.replace('_CONTEXT_', str(context))) 43 # we resolve symlinks because git diff does not follow symlinks, while diff 44 # does. We want consistent behavior, also for integration with git difftool 45 # we always want symlinks to be followed. 46 path1 = os.path.realpath(file1) 47 path2 = os.path.realpath(file2) 48 p = subprocess.Popen( 49 cmd + [path1, path2], 50 stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.DEVNULL) 51 worker_processes.append(p.pid) 52 stdout, stderr = p.communicate() 53 returncode = p.wait() 54 worker_processes.remove(p.pid) 55 if returncode in (0, 1): 56 return True, returncode == 1, stdout.decode('utf-8') 57 return False, returncode, stderr.decode('utf-8') 58 59 60class Chunk: 61 62 __slots__ = ('is_context', 'left_start', 'right_start', 'left_count', 'right_count', 'centers') 63 64 def __init__(self, left_start: int, right_start: int, is_context: bool = False) -> None: 65 self.is_context = is_context 66 self.left_start = left_start 67 self.right_start = right_start 68 self.left_count = self.right_count = 0 69 self.centers: Optional[Tuple[Tuple[int, int], ...]] = None 70 71 def add_line(self) -> None: 72 self.right_count += 1 73 74 def remove_line(self) -> None: 75 self.left_count += 1 76 77 def context_line(self) -> None: 78 self.left_count += 1 79 self.right_count += 1 80 81 def finalize(self) -> None: 82 if not self.is_context and self.left_count == self.right_count: 83 self.centers = tuple( 84 changed_center(left_lines[self.left_start + i], right_lines[self.right_start + i]) 85 for i in range(self.left_count) 86 ) 87 88 def __repr__(self) -> str: 89 return 'Chunk(is_context={}, left_start={}, left_count={}, right_start={}, right_count={})'.format( 90 self.is_context, self.left_start, self.left_count, self.right_start, self.right_count) 91 92 93class Hunk: 94 95 def __init__(self, title: str, left: Tuple[int, int], right: Tuple[int, int]) -> None: 96 self.left_start, self.left_count = left 97 self.right_start, self.right_count = right 98 self.left_start -= 1 # 0-index 99 self.right_start -= 1 # 0-index 100 self.title = title 101 self.added_count = self.removed_count = 0 102 self.chunks: List[Chunk] = [] 103 self.current_chunk: Optional[Chunk] = None 104 self.largest_line_number = max(self.left_start + self.left_count, self.right_start + self.right_count) 105 106 def new_chunk(self, is_context: bool = False) -> Chunk: 107 if self.chunks: 108 c = self.chunks[-1] 109 left_start = c.left_start + c.left_count 110 right_start = c.right_start + c.right_count 111 else: 112 left_start = self.left_start 113 right_start = self.right_start 114 return Chunk(left_start, right_start, is_context) 115 116 def ensure_diff_chunk(self) -> None: 117 if self.current_chunk is None: 118 self.current_chunk = self.new_chunk(is_context=False) 119 elif self.current_chunk.is_context: 120 self.chunks.append(self.current_chunk) 121 self.current_chunk = self.new_chunk(is_context=False) 122 123 def ensure_context_chunk(self) -> None: 124 if self.current_chunk is None: 125 self.current_chunk = self.new_chunk(is_context=True) 126 elif not self.current_chunk.is_context: 127 self.chunks.append(self.current_chunk) 128 self.current_chunk = self.new_chunk(is_context=True) 129 130 def add_line(self) -> None: 131 self.ensure_diff_chunk() 132 if self.current_chunk is not None: 133 self.current_chunk.add_line() 134 self.added_count += 1 135 136 def remove_line(self) -> None: 137 self.ensure_diff_chunk() 138 if self.current_chunk is not None: 139 self.current_chunk.remove_line() 140 self.removed_count += 1 141 142 def context_line(self) -> None: 143 self.ensure_context_chunk() 144 if self.current_chunk is not None: 145 self.current_chunk.context_line() 146 147 def finalize(self) -> None: 148 if self.current_chunk is not None: 149 self.chunks.append(self.current_chunk) 150 del self.current_chunk 151 # Sanity check 152 c = self.chunks[-1] 153 if c.left_start + c.left_count != self.left_start + self.left_count: 154 raise ValueError('Left side line mismatch {} != {}'.format(c.left_start + c.left_count, self.left_start + self.left_count)) 155 if c.right_start + c.right_count != self.right_start + self.right_count: 156 raise ValueError('Left side line mismatch {} != {}'.format(c.right_start + c.right_count, self.right_start + self.right_count)) 157 for c in self.chunks: 158 c.finalize() 159 160 161def parse_range(x: str) -> Tuple[int, int]: 162 parts = x[1:].split(',', 1) 163 start = abs(int(parts[0])) 164 count = 1 if len(parts) < 2 else int(parts[1]) 165 return start, count 166 167 168def parse_hunk_header(line: str) -> Hunk: 169 parts: Tuple[str, ...] = tuple(filter(None, line.split('@@', 2))) 170 linespec = parts[0].strip() 171 title = '' 172 if len(parts) == 2: 173 title = parts[1].strip() 174 left, right = map(parse_range, linespec.split()) 175 return Hunk(title, left, right) 176 177 178class Patch: 179 180 def __init__(self, all_hunks: Sequence[Hunk]): 181 self.all_hunks = all_hunks 182 self.largest_line_number = self.all_hunks[-1].largest_line_number if self.all_hunks else 0 183 self.added_count = sum(h.added_count for h in all_hunks) 184 self.removed_count = sum(h.removed_count for h in all_hunks) 185 186 def __iter__(self) -> Iterator[Hunk]: 187 return iter(self.all_hunks) 188 189 def __len__(self) -> int: 190 return len(self.all_hunks) 191 192 193def parse_patch(raw: str) -> Patch: 194 all_hunks = [] 195 current_hunk = None 196 for line in raw.splitlines(): 197 if line.startswith('@@ '): 198 current_hunk = parse_hunk_header(line) 199 all_hunks.append(current_hunk) 200 else: 201 if current_hunk is None: 202 continue 203 q = line[0] 204 if q == '+': 205 current_hunk.add_line() 206 elif q == '-': 207 current_hunk.remove_line() 208 elif q == '\\': 209 continue 210 else: 211 current_hunk.context_line() 212 for h in all_hunks: 213 h.finalize() 214 return Patch(all_hunks) 215 216 217class Differ: 218 219 diff_executor: Optional[concurrent.futures.ThreadPoolExecutor] = None 220 221 def __init__(self) -> None: 222 self.jmap: Dict[str, str] = {} 223 self.jobs: List[str] = [] 224 if Differ.diff_executor is None: 225 Differ.diff_executor = self.diff_executor = concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) 226 227 def add_diff(self, file1: str, file2: str) -> None: 228 self.jmap[file1] = file2 229 self.jobs.append(file1) 230 231 def __call__(self, context: int = 3) -> Union[str, Dict[str, Patch]]: 232 global left_lines, right_lines 233 ans: Dict[str, Patch] = {} 234 executor = self.diff_executor 235 assert executor is not None 236 jobs = {executor.submit(run_diff, key, self.jmap[key], context): key for key in self.jobs} 237 for future in concurrent.futures.as_completed(jobs): 238 key = jobs[future] 239 left_path, right_path = key, self.jmap[key] 240 try: 241 ok, returncode, output = future.result() 242 except FileNotFoundError as err: 243 return 'Could not find the {} executable. Is it in your PATH?'.format(err.filename) 244 except Exception as e: 245 return 'Running git diff for {} vs. {} generated an exception: {}'.format(left_path, right_path, e) 246 if not ok: 247 return output + '\nRunning git diff for {} vs. {} failed'.format(left_path, right_path) 248 left_lines = lines_for_path(left_path) 249 right_lines = lines_for_path(right_path) 250 try: 251 patch = parse_patch(output) 252 except Exception: 253 import traceback 254 return traceback.format_exc() + '\nParsing diff for {} vs. {} failed'.format(left_path, right_path) 255 else: 256 ans[key] = patch 257 return ans 258