1#!/usr/local/bin/python3.8
2# vim:fileencoding=utf-8
3# License: GPL v3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
4
5import concurrent.futures
6import os
7import shlex
8import shutil
9import subprocess
10from typing import Dict, Iterator, List, Optional, Sequence, Tuple, Union
11
12from . import global_data
13from .collect import lines_for_path
14from .diff_speedup import changed_center
15
16left_lines: Tuple[str, ...] = ()
17right_lines: Tuple[str, ...] = ()
18GIT_DIFF = 'git diff --no-color --no-ext-diff --exit-code -U_CONTEXT_ --no-index --'
19DIFF_DIFF = 'diff -p -U _CONTEXT_ --'
20worker_processes: List[int] = []
21
22
23def find_differ() -> Optional[str]:
24    if shutil.which('git') and subprocess.Popen(['git', '--help'], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL).wait() == 0:
25        return GIT_DIFF
26    if shutil.which('diff'):
27        return DIFF_DIFF
28
29
30def set_diff_command(opt: str) -> None:
31    if opt == 'auto':
32        cmd = find_differ()
33        if cmd is None:
34            raise SystemExit('Failed to find either the git or diff programs on your system')
35    else:
36        cmd = opt
37    global_data.cmd = cmd
38
39
40def run_diff(file1: str, file2: str, context: int = 3) -> Tuple[bool, Union[int, bool], str]:
41    # returns: ok, is_different, patch
42    cmd = shlex.split(global_data.cmd.replace('_CONTEXT_', str(context)))
43    # we resolve symlinks because git diff does not follow symlinks, while diff
44    # does. We want consistent behavior, also for integration with git difftool
45    # we always want symlinks to be followed.
46    path1 = os.path.realpath(file1)
47    path2 = os.path.realpath(file2)
48    p = subprocess.Popen(
49            cmd + [path1, path2],
50            stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.DEVNULL)
51    worker_processes.append(p.pid)
52    stdout, stderr = p.communicate()
53    returncode = p.wait()
54    worker_processes.remove(p.pid)
55    if returncode in (0, 1):
56        return True, returncode == 1, stdout.decode('utf-8')
57    return False, returncode, stderr.decode('utf-8')
58
59
60class Chunk:
61
62    __slots__ = ('is_context', 'left_start', 'right_start', 'left_count', 'right_count', 'centers')
63
64    def __init__(self, left_start: int, right_start: int, is_context: bool = False) -> None:
65        self.is_context = is_context
66        self.left_start = left_start
67        self.right_start = right_start
68        self.left_count = self.right_count = 0
69        self.centers: Optional[Tuple[Tuple[int, int], ...]] = None
70
71    def add_line(self) -> None:
72        self.right_count += 1
73
74    def remove_line(self) -> None:
75        self.left_count += 1
76
77    def context_line(self) -> None:
78        self.left_count += 1
79        self.right_count += 1
80
81    def finalize(self) -> None:
82        if not self.is_context and self.left_count == self.right_count:
83            self.centers = tuple(
84                changed_center(left_lines[self.left_start + i], right_lines[self.right_start + i])
85                for i in range(self.left_count)
86            )
87
88    def __repr__(self) -> str:
89        return 'Chunk(is_context={}, left_start={}, left_count={}, right_start={}, right_count={})'.format(
90                self.is_context, self.left_start, self.left_count, self.right_start, self.right_count)
91
92
93class Hunk:
94
95    def __init__(self, title: str, left: Tuple[int, int], right: Tuple[int, int]) -> None:
96        self.left_start, self.left_count = left
97        self.right_start, self.right_count = right
98        self.left_start -= 1  # 0-index
99        self.right_start -= 1  # 0-index
100        self.title = title
101        self.added_count = self.removed_count = 0
102        self.chunks: List[Chunk] = []
103        self.current_chunk: Optional[Chunk] = None
104        self.largest_line_number = max(self.left_start + self.left_count, self.right_start + self.right_count)
105
106    def new_chunk(self, is_context: bool = False) -> Chunk:
107        if self.chunks:
108            c = self.chunks[-1]
109            left_start = c.left_start + c.left_count
110            right_start = c.right_start + c.right_count
111        else:
112            left_start = self.left_start
113            right_start = self.right_start
114        return Chunk(left_start, right_start, is_context)
115
116    def ensure_diff_chunk(self) -> None:
117        if self.current_chunk is None:
118            self.current_chunk = self.new_chunk(is_context=False)
119        elif self.current_chunk.is_context:
120            self.chunks.append(self.current_chunk)
121            self.current_chunk = self.new_chunk(is_context=False)
122
123    def ensure_context_chunk(self) -> None:
124        if self.current_chunk is None:
125            self.current_chunk = self.new_chunk(is_context=True)
126        elif not self.current_chunk.is_context:
127            self.chunks.append(self.current_chunk)
128            self.current_chunk = self.new_chunk(is_context=True)
129
130    def add_line(self) -> None:
131        self.ensure_diff_chunk()
132        if self.current_chunk is not None:
133            self.current_chunk.add_line()
134        self.added_count += 1
135
136    def remove_line(self) -> None:
137        self.ensure_diff_chunk()
138        if self.current_chunk is not None:
139            self.current_chunk.remove_line()
140        self.removed_count += 1
141
142    def context_line(self) -> None:
143        self.ensure_context_chunk()
144        if self.current_chunk is not None:
145            self.current_chunk.context_line()
146
147    def finalize(self) -> None:
148        if self.current_chunk is not None:
149            self.chunks.append(self.current_chunk)
150        del self.current_chunk
151        # Sanity check
152        c = self.chunks[-1]
153        if c.left_start + c.left_count != self.left_start + self.left_count:
154            raise ValueError('Left side line mismatch {} != {}'.format(c.left_start + c.left_count, self.left_start + self.left_count))
155        if c.right_start + c.right_count != self.right_start + self.right_count:
156            raise ValueError('Left side line mismatch {} != {}'.format(c.right_start + c.right_count, self.right_start + self.right_count))
157        for c in self.chunks:
158            c.finalize()
159
160
161def parse_range(x: str) -> Tuple[int, int]:
162    parts = x[1:].split(',', 1)
163    start = abs(int(parts[0]))
164    count = 1 if len(parts) < 2 else int(parts[1])
165    return start, count
166
167
168def parse_hunk_header(line: str) -> Hunk:
169    parts: Tuple[str, ...] = tuple(filter(None, line.split('@@', 2)))
170    linespec = parts[0].strip()
171    title = ''
172    if len(parts) == 2:
173        title = parts[1].strip()
174    left, right = map(parse_range, linespec.split())
175    return Hunk(title, left, right)
176
177
178class Patch:
179
180    def __init__(self, all_hunks: Sequence[Hunk]):
181        self.all_hunks = all_hunks
182        self.largest_line_number = self.all_hunks[-1].largest_line_number if self.all_hunks else 0
183        self.added_count = sum(h.added_count for h in all_hunks)
184        self.removed_count = sum(h.removed_count for h in all_hunks)
185
186    def __iter__(self) -> Iterator[Hunk]:
187        return iter(self.all_hunks)
188
189    def __len__(self) -> int:
190        return len(self.all_hunks)
191
192
193def parse_patch(raw: str) -> Patch:
194    all_hunks = []
195    current_hunk = None
196    for line in raw.splitlines():
197        if line.startswith('@@ '):
198            current_hunk = parse_hunk_header(line)
199            all_hunks.append(current_hunk)
200        else:
201            if current_hunk is None:
202                continue
203            q = line[0]
204            if q == '+':
205                current_hunk.add_line()
206            elif q == '-':
207                current_hunk.remove_line()
208            elif q == '\\':
209                continue
210            else:
211                current_hunk.context_line()
212    for h in all_hunks:
213        h.finalize()
214    return Patch(all_hunks)
215
216
217class Differ:
218
219    diff_executor: Optional[concurrent.futures.ThreadPoolExecutor] = None
220
221    def __init__(self) -> None:
222        self.jmap: Dict[str, str] = {}
223        self.jobs: List[str] = []
224        if Differ.diff_executor is None:
225            Differ.diff_executor = self.diff_executor = concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count())
226
227    def add_diff(self, file1: str, file2: str) -> None:
228        self.jmap[file1] = file2
229        self.jobs.append(file1)
230
231    def __call__(self, context: int = 3) -> Union[str, Dict[str, Patch]]:
232        global left_lines, right_lines
233        ans: Dict[str, Patch] = {}
234        executor = self.diff_executor
235        assert executor is not None
236        jobs = {executor.submit(run_diff, key, self.jmap[key], context): key for key in self.jobs}
237        for future in concurrent.futures.as_completed(jobs):
238            key = jobs[future]
239            left_path, right_path = key, self.jmap[key]
240            try:
241                ok, returncode, output = future.result()
242            except FileNotFoundError as err:
243                return 'Could not find the {} executable. Is it in your PATH?'.format(err.filename)
244            except Exception as e:
245                return 'Running git diff for {} vs. {} generated an exception: {}'.format(left_path, right_path, e)
246            if not ok:
247                return output + '\nRunning git diff for {} vs. {} failed'.format(left_path, right_path)
248            left_lines = lines_for_path(left_path)
249            right_lines = lines_for_path(right_path)
250            try:
251                patch = parse_patch(output)
252            except Exception:
253                import traceback
254                return traceback.format_exc() + '\nParsing diff for {} vs. {} failed'.format(left_path, right_path)
255            else:
256                ans[key] = patch
257        return ans
258