1# patch.py - patch file parsing routines
2#
3# Copyright 2006 Brendan Cully <brendan@kublai.com>
4# Copyright 2007 Chris Mason <chris.mason@oracle.com>
5#
6# This software may be used and distributed according to the terms of the
7# GNU General Public License version 2 or any later version.
8
9from __future__ import absolute_import, print_function
10
11import collections
12import contextlib
13import copy
14import errno
15import os
16import re
17import shutil
18import zlib
19
20from .i18n import _
21from .node import (
22    hex,
23    sha1nodeconstants,
24    short,
25)
26from .pycompat import open
27from . import (
28    copies,
29    diffhelper,
30    diffutil,
31    encoding,
32    error,
33    mail,
34    mdiff,
35    pathutil,
36    pycompat,
37    scmutil,
38    similar,
39    util,
40    vfs as vfsmod,
41)
42from .utils import (
43    dateutil,
44    hashutil,
45    procutil,
46    stringutil,
47)
48
49stringio = util.stringio
50
51gitre = re.compile(br'diff --git a/(.*) b/(.*)')
52tabsplitter = re.compile(br'(\t+|[^\t]+)')
53wordsplitter = re.compile(
54    br'(\t+| +|[a-zA-Z0-9_\x80-\xff]+|[^ \ta-zA-Z0-9_\x80-\xff])'
55)
56
57PatchError = error.PatchError
58
59# public functions
60
61
62def split(stream):
63    '''return an iterator of individual patches from a stream'''
64
65    def isheader(line, inheader):
66        if inheader and line.startswith((b' ', b'\t')):
67            # continuation
68            return True
69        if line.startswith((b' ', b'-', b'+')):
70            # diff line - don't check for header pattern in there
71            return False
72        l = line.split(b': ', 1)
73        return len(l) == 2 and b' ' not in l[0]
74
75    def chunk(lines):
76        return stringio(b''.join(lines))
77
78    def hgsplit(stream, cur):
79        inheader = True
80
81        for line in stream:
82            if not line.strip():
83                inheader = False
84            if not inheader and line.startswith(b'# HG changeset patch'):
85                yield chunk(cur)
86                cur = []
87                inheader = True
88
89            cur.append(line)
90
91        if cur:
92            yield chunk(cur)
93
94    def mboxsplit(stream, cur):
95        for line in stream:
96            if line.startswith(b'From '):
97                for c in split(chunk(cur[1:])):
98                    yield c
99                cur = []
100
101            cur.append(line)
102
103        if cur:
104            for c in split(chunk(cur[1:])):
105                yield c
106
107    def mimesplit(stream, cur):
108        def msgfp(m):
109            fp = stringio()
110            g = mail.Generator(fp, mangle_from_=False)
111            g.flatten(m)
112            fp.seek(0)
113            return fp
114
115        for line in stream:
116            cur.append(line)
117        c = chunk(cur)
118
119        m = mail.parse(c)
120        if not m.is_multipart():
121            yield msgfp(m)
122        else:
123            ok_types = (b'text/plain', b'text/x-diff', b'text/x-patch')
124            for part in m.walk():
125                ct = part.get_content_type()
126                if ct not in ok_types:
127                    continue
128                yield msgfp(part)
129
130    def headersplit(stream, cur):
131        inheader = False
132
133        for line in stream:
134            if not inheader and isheader(line, inheader):
135                yield chunk(cur)
136                cur = []
137                inheader = True
138            if inheader and not isheader(line, inheader):
139                inheader = False
140
141            cur.append(line)
142
143        if cur:
144            yield chunk(cur)
145
146    def remainder(cur):
147        yield chunk(cur)
148
149    class fiter(object):
150        def __init__(self, fp):
151            self.fp = fp
152
153        def __iter__(self):
154            return self
155
156        def next(self):
157            l = self.fp.readline()
158            if not l:
159                raise StopIteration
160            return l
161
162        __next__ = next
163
164    inheader = False
165    cur = []
166
167    mimeheaders = [b'content-type']
168
169    if not util.safehasattr(stream, b'next'):
170        # http responses, for example, have readline but not next
171        stream = fiter(stream)
172
173    for line in stream:
174        cur.append(line)
175        if line.startswith(b'# HG changeset patch'):
176            return hgsplit(stream, cur)
177        elif line.startswith(b'From '):
178            return mboxsplit(stream, cur)
179        elif isheader(line, inheader):
180            inheader = True
181            if line.split(b':', 1)[0].lower() in mimeheaders:
182                # let email parser handle this
183                return mimesplit(stream, cur)
184        elif line.startswith(b'--- ') and inheader:
185            # No evil headers seen by diff start, split by hand
186            return headersplit(stream, cur)
187        # Not enough info, keep reading
188
189    # if we are here, we have a very plain patch
190    return remainder(cur)
191
192
193## Some facility for extensible patch parsing:
194# list of pairs ("header to match", "data key")
195patchheadermap = [
196    (b'Date', b'date'),
197    (b'Branch', b'branch'),
198    (b'Node ID', b'nodeid'),
199]
200
201
202@contextlib.contextmanager
203def extract(ui, fileobj):
204    """extract patch from data read from fileobj.
205
206    patch can be a normal patch or contained in an email message.
207
208    return a dictionary. Standard keys are:
209      - filename,
210      - message,
211      - user,
212      - date,
213      - branch,
214      - node,
215      - p1,
216      - p2.
217    Any item can be missing from the dictionary. If filename is missing,
218    fileobj did not contain a patch. Caller must unlink filename when done."""
219
220    fd, tmpname = pycompat.mkstemp(prefix=b'hg-patch-')
221    tmpfp = os.fdopen(fd, 'wb')
222    try:
223        yield _extract(ui, fileobj, tmpname, tmpfp)
224    finally:
225        tmpfp.close()
226        os.unlink(tmpname)
227
228
229def _extract(ui, fileobj, tmpname, tmpfp):
230
231    # attempt to detect the start of a patch
232    # (this heuristic is borrowed from quilt)
233    diffre = re.compile(
234        br'^(?:Index:[ \t]|diff[ \t]-|RCS file: |'
235        br'retrieving revision [0-9]+(\.[0-9]+)*$|'
236        br'---[ \t].*?^\+\+\+[ \t]|'
237        br'\*\*\*[ \t].*?^---[ \t])',
238        re.MULTILINE | re.DOTALL,
239    )
240
241    data = {}
242
243    msg = mail.parse(fileobj)
244
245    subject = msg['Subject'] and mail.headdecode(msg['Subject'])
246    data[b'user'] = msg['From'] and mail.headdecode(msg['From'])
247    if not subject and not data[b'user']:
248        # Not an email, restore parsed headers if any
249        subject = (
250            b'\n'.join(
251                b': '.join(map(encoding.strtolocal, h)) for h in msg.items()
252            )
253            + b'\n'
254        )
255
256    # should try to parse msg['Date']
257    parents = []
258
259    nodeid = msg['X-Mercurial-Node']
260    if nodeid:
261        data[b'nodeid'] = nodeid = mail.headdecode(nodeid)
262        ui.debug(b'Node ID: %s\n' % nodeid)
263
264    if subject:
265        if subject.startswith(b'[PATCH'):
266            pend = subject.find(b']')
267            if pend >= 0:
268                subject = subject[pend + 1 :].lstrip()
269        subject = re.sub(br'\n[ \t]+', b' ', subject)
270        ui.debug(b'Subject: %s\n' % subject)
271    if data[b'user']:
272        ui.debug(b'From: %s\n' % data[b'user'])
273    diffs_seen = 0
274    ok_types = (b'text/plain', b'text/x-diff', b'text/x-patch')
275    message = b''
276    for part in msg.walk():
277        content_type = pycompat.bytestr(part.get_content_type())
278        ui.debug(b'Content-Type: %s\n' % content_type)
279        if content_type not in ok_types:
280            continue
281        payload = part.get_payload(decode=True)
282        m = diffre.search(payload)
283        if m:
284            hgpatch = False
285            hgpatchheader = False
286            ignoretext = False
287
288            ui.debug(b'found patch at byte %d\n' % m.start(0))
289            diffs_seen += 1
290            cfp = stringio()
291            for line in payload[: m.start(0)].splitlines():
292                if line.startswith(b'# HG changeset patch') and not hgpatch:
293                    ui.debug(b'patch generated by hg export\n')
294                    hgpatch = True
295                    hgpatchheader = True
296                    # drop earlier commit message content
297                    cfp.seek(0)
298                    cfp.truncate()
299                    subject = None
300                elif hgpatchheader:
301                    if line.startswith(b'# User '):
302                        data[b'user'] = line[7:]
303                        ui.debug(b'From: %s\n' % data[b'user'])
304                    elif line.startswith(b"# Parent "):
305                        parents.append(line[9:].lstrip())
306                    elif line.startswith(b"# "):
307                        for header, key in patchheadermap:
308                            prefix = b'# %s ' % header
309                            if line.startswith(prefix):
310                                data[key] = line[len(prefix) :]
311                                ui.debug(b'%s: %s\n' % (header, data[key]))
312                    else:
313                        hgpatchheader = False
314                elif line == b'---':
315                    ignoretext = True
316                if not hgpatchheader and not ignoretext:
317                    cfp.write(line)
318                    cfp.write(b'\n')
319            message = cfp.getvalue()
320            if tmpfp:
321                tmpfp.write(payload)
322                if not payload.endswith(b'\n'):
323                    tmpfp.write(b'\n')
324        elif not diffs_seen and message and content_type == b'text/plain':
325            message += b'\n' + payload
326
327    if subject and not message.startswith(subject):
328        message = b'%s\n%s' % (subject, message)
329    data[b'message'] = message
330    tmpfp.close()
331    if parents:
332        data[b'p1'] = parents.pop(0)
333        if parents:
334            data[b'p2'] = parents.pop(0)
335
336    if diffs_seen:
337        data[b'filename'] = tmpname
338
339    return data
340
341
342class patchmeta(object):
343    """Patched file metadata
344
345    'op' is the performed operation within ADD, DELETE, RENAME, MODIFY
346    or COPY.  'path' is patched file path. 'oldpath' is set to the
347    origin file when 'op' is either COPY or RENAME, None otherwise. If
348    file mode is changed, 'mode' is a tuple (islink, isexec) where
349    'islink' is True if the file is a symlink and 'isexec' is True if
350    the file is executable. Otherwise, 'mode' is None.
351    """
352
353    def __init__(self, path):
354        self.path = path
355        self.oldpath = None
356        self.mode = None
357        self.op = b'MODIFY'
358        self.binary = False
359
360    def setmode(self, mode):
361        islink = mode & 0o20000
362        isexec = mode & 0o100
363        self.mode = (islink, isexec)
364
365    def copy(self):
366        other = patchmeta(self.path)
367        other.oldpath = self.oldpath
368        other.mode = self.mode
369        other.op = self.op
370        other.binary = self.binary
371        return other
372
373    def _ispatchinga(self, afile):
374        if afile == b'/dev/null':
375            return self.op == b'ADD'
376        return afile == b'a/' + (self.oldpath or self.path)
377
378    def _ispatchingb(self, bfile):
379        if bfile == b'/dev/null':
380            return self.op == b'DELETE'
381        return bfile == b'b/' + self.path
382
383    def ispatching(self, afile, bfile):
384        return self._ispatchinga(afile) and self._ispatchingb(bfile)
385
386    def __repr__(self):
387        return "<patchmeta %s %r>" % (self.op, self.path)
388
389
390def readgitpatch(lr):
391    """extract git-style metadata about patches from <patchname>"""
392
393    # Filter patch for git information
394    gp = None
395    gitpatches = []
396    for line in lr:
397        line = line.rstrip(b'\r\n')
398        if line.startswith(b'diff --git a/'):
399            m = gitre.match(line)
400            if m:
401                if gp:
402                    gitpatches.append(gp)
403                dst = m.group(2)
404                gp = patchmeta(dst)
405        elif gp:
406            if line.startswith(b'--- '):
407                gitpatches.append(gp)
408                gp = None
409                continue
410            if line.startswith(b'rename from '):
411                gp.op = b'RENAME'
412                gp.oldpath = line[12:]
413            elif line.startswith(b'rename to '):
414                gp.path = line[10:]
415            elif line.startswith(b'copy from '):
416                gp.op = b'COPY'
417                gp.oldpath = line[10:]
418            elif line.startswith(b'copy to '):
419                gp.path = line[8:]
420            elif line.startswith(b'deleted file'):
421                gp.op = b'DELETE'
422            elif line.startswith(b'new file mode '):
423                gp.op = b'ADD'
424                gp.setmode(int(line[-6:], 8))
425            elif line.startswith(b'new mode '):
426                gp.setmode(int(line[-6:], 8))
427            elif line.startswith(b'GIT binary patch'):
428                gp.binary = True
429    if gp:
430        gitpatches.append(gp)
431
432    return gitpatches
433
434
435class linereader(object):
436    # simple class to allow pushing lines back into the input stream
437    def __init__(self, fp):
438        self.fp = fp
439        self.buf = []
440
441    def push(self, line):
442        if line is not None:
443            self.buf.append(line)
444
445    def readline(self):
446        if self.buf:
447            l = self.buf[0]
448            del self.buf[0]
449            return l
450        return self.fp.readline()
451
452    def __iter__(self):
453        return iter(self.readline, b'')
454
455
456class abstractbackend(object):
457    def __init__(self, ui):
458        self.ui = ui
459
460    def getfile(self, fname):
461        """Return target file data and flags as a (data, (islink,
462        isexec)) tuple. Data is None if file is missing/deleted.
463        """
464        raise NotImplementedError
465
466    def setfile(self, fname, data, mode, copysource):
467        """Write data to target file fname and set its mode. mode is a
468        (islink, isexec) tuple. If data is None, the file content should
469        be left unchanged. If the file is modified after being copied,
470        copysource is set to the original file name.
471        """
472        raise NotImplementedError
473
474    def unlink(self, fname):
475        """Unlink target file."""
476        raise NotImplementedError
477
478    def writerej(self, fname, failed, total, lines):
479        """Write rejected lines for fname. total is the number of hunks
480        which failed to apply and total the total number of hunks for this
481        files.
482        """
483
484    def exists(self, fname):
485        raise NotImplementedError
486
487    def close(self):
488        raise NotImplementedError
489
490
491class fsbackend(abstractbackend):
492    def __init__(self, ui, basedir):
493        super(fsbackend, self).__init__(ui)
494        self.opener = vfsmod.vfs(basedir)
495
496    def getfile(self, fname):
497        if self.opener.islink(fname):
498            return (self.opener.readlink(fname), (True, False))
499
500        isexec = False
501        try:
502            isexec = self.opener.lstat(fname).st_mode & 0o100 != 0
503        except OSError as e:
504            if e.errno != errno.ENOENT:
505                raise
506        try:
507            return (self.opener.read(fname), (False, isexec))
508        except IOError as e:
509            if e.errno != errno.ENOENT:
510                raise
511            return None, None
512
513    def setfile(self, fname, data, mode, copysource):
514        islink, isexec = mode
515        if data is None:
516            self.opener.setflags(fname, islink, isexec)
517            return
518        if islink:
519            self.opener.symlink(data, fname)
520        else:
521            self.opener.write(fname, data)
522            if isexec:
523                self.opener.setflags(fname, False, True)
524
525    def unlink(self, fname):
526        rmdir = self.ui.configbool(b'experimental', b'removeemptydirs')
527        self.opener.unlinkpath(fname, ignoremissing=True, rmdir=rmdir)
528
529    def writerej(self, fname, failed, total, lines):
530        fname = fname + b".rej"
531        self.ui.warn(
532            _(b"%d out of %d hunks FAILED -- saving rejects to file %s\n")
533            % (failed, total, fname)
534        )
535        fp = self.opener(fname, b'w')
536        fp.writelines(lines)
537        fp.close()
538
539    def exists(self, fname):
540        return self.opener.lexists(fname)
541
542
543class workingbackend(fsbackend):
544    def __init__(self, ui, repo, similarity):
545        super(workingbackend, self).__init__(ui, repo.root)
546        self.repo = repo
547        self.similarity = similarity
548        self.removed = set()
549        self.changed = set()
550        self.copied = []
551
552    def _checkknown(self, fname):
553        if not self.repo.dirstate.get_entry(fname).any_tracked and self.exists(
554            fname
555        ):
556            raise PatchError(_(b'cannot patch %s: file is not tracked') % fname)
557
558    def setfile(self, fname, data, mode, copysource):
559        self._checkknown(fname)
560        super(workingbackend, self).setfile(fname, data, mode, copysource)
561        if copysource is not None:
562            self.copied.append((copysource, fname))
563        self.changed.add(fname)
564
565    def unlink(self, fname):
566        self._checkknown(fname)
567        super(workingbackend, self).unlink(fname)
568        self.removed.add(fname)
569        self.changed.add(fname)
570
571    def close(self):
572        wctx = self.repo[None]
573        changed = set(self.changed)
574        for src, dst in self.copied:
575            scmutil.dirstatecopy(self.ui, self.repo, wctx, src, dst)
576        if self.removed:
577            wctx.forget(sorted(self.removed))
578            for f in self.removed:
579                if f not in self.repo.dirstate:
580                    # File was deleted and no longer belongs to the
581                    # dirstate, it was probably marked added then
582                    # deleted, and should not be considered by
583                    # marktouched().
584                    changed.discard(f)
585        if changed:
586            scmutil.marktouched(self.repo, changed, self.similarity)
587        return sorted(self.changed)
588
589
590class filestore(object):
591    def __init__(self, maxsize=None):
592        self.opener = None
593        self.files = {}
594        self.created = 0
595        self.maxsize = maxsize
596        if self.maxsize is None:
597            self.maxsize = 4 * (2 ** 20)
598        self.size = 0
599        self.data = {}
600
601    def setfile(self, fname, data, mode, copied=None):
602        if self.maxsize < 0 or (len(data) + self.size) <= self.maxsize:
603            self.data[fname] = (data, mode, copied)
604            self.size += len(data)
605        else:
606            if self.opener is None:
607                root = pycompat.mkdtemp(prefix=b'hg-patch-')
608                self.opener = vfsmod.vfs(root)
609            # Avoid filename issues with these simple names
610            fn = b'%d' % self.created
611            self.opener.write(fn, data)
612            self.created += 1
613            self.files[fname] = (fn, mode, copied)
614
615    def getfile(self, fname):
616        if fname in self.data:
617            return self.data[fname]
618        if not self.opener or fname not in self.files:
619            return None, None, None
620        fn, mode, copied = self.files[fname]
621        return self.opener.read(fn), mode, copied
622
623    def close(self):
624        if self.opener:
625            shutil.rmtree(self.opener.base)
626
627
628class repobackend(abstractbackend):
629    def __init__(self, ui, repo, ctx, store):
630        super(repobackend, self).__init__(ui)
631        self.repo = repo
632        self.ctx = ctx
633        self.store = store
634        self.changed = set()
635        self.removed = set()
636        self.copied = {}
637
638    def _checkknown(self, fname):
639        if fname not in self.ctx:
640            raise PatchError(_(b'cannot patch %s: file is not tracked') % fname)
641
642    def getfile(self, fname):
643        try:
644            fctx = self.ctx[fname]
645        except error.LookupError:
646            return None, None
647        flags = fctx.flags()
648        return fctx.data(), (b'l' in flags, b'x' in flags)
649
650    def setfile(self, fname, data, mode, copysource):
651        if copysource:
652            self._checkknown(copysource)
653        if data is None:
654            data = self.ctx[fname].data()
655        self.store.setfile(fname, data, mode, copysource)
656        self.changed.add(fname)
657        if copysource:
658            self.copied[fname] = copysource
659
660    def unlink(self, fname):
661        self._checkknown(fname)
662        self.removed.add(fname)
663
664    def exists(self, fname):
665        return fname in self.ctx
666
667    def close(self):
668        return self.changed | self.removed
669
670
671# @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
672unidesc = re.compile(br'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
673contextdesc = re.compile(br'(?:---|\*\*\*) (\d+)(?:,(\d+))? (?:---|\*\*\*)')
674eolmodes = [b'strict', b'crlf', b'lf', b'auto']
675
676
677class patchfile(object):
678    def __init__(self, ui, gp, backend, store, eolmode=b'strict'):
679        self.fname = gp.path
680        self.eolmode = eolmode
681        self.eol = None
682        self.backend = backend
683        self.ui = ui
684        self.lines = []
685        self.exists = False
686        self.missing = True
687        self.mode = gp.mode
688        self.copysource = gp.oldpath
689        self.create = gp.op in (b'ADD', b'COPY', b'RENAME')
690        self.remove = gp.op == b'DELETE'
691        if self.copysource is None:
692            data, mode = backend.getfile(self.fname)
693        else:
694            data, mode = store.getfile(self.copysource)[:2]
695        if data is not None:
696            self.exists = self.copysource is None or backend.exists(self.fname)
697            self.missing = False
698            if data:
699                self.lines = mdiff.splitnewlines(data)
700            if self.mode is None:
701                self.mode = mode
702            if self.lines:
703                # Normalize line endings
704                if self.lines[0].endswith(b'\r\n'):
705                    self.eol = b'\r\n'
706                elif self.lines[0].endswith(b'\n'):
707                    self.eol = b'\n'
708                if eolmode != b'strict':
709                    nlines = []
710                    for l in self.lines:
711                        if l.endswith(b'\r\n'):
712                            l = l[:-2] + b'\n'
713                        nlines.append(l)
714                    self.lines = nlines
715        else:
716            if self.create:
717                self.missing = False
718            if self.mode is None:
719                self.mode = (False, False)
720        if self.missing:
721            self.ui.warn(_(b"unable to find '%s' for patching\n") % self.fname)
722            self.ui.warn(
723                _(
724                    b"(use '--prefix' to apply patch relative to the "
725                    b"current directory)\n"
726                )
727            )
728
729        self.hash = {}
730        self.dirty = 0
731        self.offset = 0
732        self.skew = 0
733        self.rej = []
734        self.fileprinted = False
735        self.printfile(False)
736        self.hunks = 0
737
738    def writelines(self, fname, lines, mode):
739        if self.eolmode == b'auto':
740            eol = self.eol
741        elif self.eolmode == b'crlf':
742            eol = b'\r\n'
743        else:
744            eol = b'\n'
745
746        if self.eolmode != b'strict' and eol and eol != b'\n':
747            rawlines = []
748            for l in lines:
749                if l and l.endswith(b'\n'):
750                    l = l[:-1] + eol
751                rawlines.append(l)
752            lines = rawlines
753
754        self.backend.setfile(fname, b''.join(lines), mode, self.copysource)
755
756    def printfile(self, warn):
757        if self.fileprinted:
758            return
759        if warn or self.ui.verbose:
760            self.fileprinted = True
761        s = _(b"patching file %s\n") % self.fname
762        if warn:
763            self.ui.warn(s)
764        else:
765            self.ui.note(s)
766
767    def findlines(self, l, linenum):
768        # looks through the hash and finds candidate lines.  The
769        # result is a list of line numbers sorted based on distance
770        # from linenum
771
772        cand = self.hash.get(l, [])
773        if len(cand) > 1:
774            # resort our list of potentials forward then back.
775            cand.sort(key=lambda x: abs(x - linenum))
776        return cand
777
778    def write_rej(self):
779        # our rejects are a little different from patch(1).  This always
780        # creates rejects in the same form as the original patch.  A file
781        # header is inserted so that you can run the reject through patch again
782        # without having to type the filename.
783        if not self.rej:
784            return
785        base = os.path.basename(self.fname)
786        lines = [b"--- %s\n+++ %s\n" % (base, base)]
787        for x in self.rej:
788            for l in x.hunk:
789                lines.append(l)
790                if l[-1:] != b'\n':
791                    lines.append(b'\n' + diffhelper.MISSING_NEWLINE_MARKER)
792        self.backend.writerej(self.fname, len(self.rej), self.hunks, lines)
793
794    def apply(self, h):
795        if not h.complete():
796            raise PatchError(
797                _(b"bad hunk #%d %s (%d %d %d %d)")
798                % (h.number, h.desc, len(h.a), h.lena, len(h.b), h.lenb)
799            )
800
801        self.hunks += 1
802
803        if self.missing:
804            self.rej.append(h)
805            return -1
806
807        if self.exists and self.create:
808            if self.copysource:
809                self.ui.warn(
810                    _(b"cannot create %s: destination already exists\n")
811                    % self.fname
812                )
813            else:
814                self.ui.warn(_(b"file %s already exists\n") % self.fname)
815            self.rej.append(h)
816            return -1
817
818        if isinstance(h, binhunk):
819            if self.remove:
820                self.backend.unlink(self.fname)
821            else:
822                l = h.new(self.lines)
823                self.lines[:] = l
824                self.offset += len(l)
825                self.dirty = True
826            return 0
827
828        horig = h
829        if (
830            self.eolmode in (b'crlf', b'lf')
831            or self.eolmode == b'auto'
832            and self.eol
833        ):
834            # If new eols are going to be normalized, then normalize
835            # hunk data before patching. Otherwise, preserve input
836            # line-endings.
837            h = h.getnormalized()
838
839        # fast case first, no offsets, no fuzz
840        old, oldstart, new, newstart = h.fuzzit(0, False)
841        oldstart += self.offset
842        orig_start = oldstart
843        # if there's skew we want to emit the "(offset %d lines)" even
844        # when the hunk cleanly applies at start + skew, so skip the
845        # fast case code
846        if self.skew == 0 and diffhelper.testhunk(old, self.lines, oldstart):
847            if self.remove:
848                self.backend.unlink(self.fname)
849            else:
850                self.lines[oldstart : oldstart + len(old)] = new
851                self.offset += len(new) - len(old)
852                self.dirty = True
853            return 0
854
855        # ok, we couldn't match the hunk. Lets look for offsets and fuzz it
856        self.hash = {}
857        for x, s in enumerate(self.lines):
858            self.hash.setdefault(s, []).append(x)
859
860        for fuzzlen in pycompat.xrange(
861            self.ui.configint(b"patch", b"fuzz") + 1
862        ):
863            for toponly in [True, False]:
864                old, oldstart, new, newstart = h.fuzzit(fuzzlen, toponly)
865                oldstart = oldstart + self.offset + self.skew
866                oldstart = min(oldstart, len(self.lines))
867                if old:
868                    cand = self.findlines(old[0][1:], oldstart)
869                else:
870                    # Only adding lines with no or fuzzed context, just
871                    # take the skew in account
872                    cand = [oldstart]
873
874                for l in cand:
875                    if not old or diffhelper.testhunk(old, self.lines, l):
876                        self.lines[l : l + len(old)] = new
877                        self.offset += len(new) - len(old)
878                        self.skew = l - orig_start
879                        self.dirty = True
880                        offset = l - orig_start - fuzzlen
881                        if fuzzlen:
882                            msg = _(
883                                b"Hunk #%d succeeded at %d "
884                                b"with fuzz %d "
885                                b"(offset %d lines).\n"
886                            )
887                            self.printfile(True)
888                            self.ui.warn(
889                                msg % (h.number, l + 1, fuzzlen, offset)
890                            )
891                        else:
892                            msg = _(
893                                b"Hunk #%d succeeded at %d "
894                                b"(offset %d lines).\n"
895                            )
896                            self.ui.note(msg % (h.number, l + 1, offset))
897                        return fuzzlen
898        self.printfile(True)
899        self.ui.warn(_(b"Hunk #%d FAILED at %d\n") % (h.number, orig_start))
900        self.rej.append(horig)
901        return -1
902
903    def close(self):
904        if self.dirty:
905            self.writelines(self.fname, self.lines, self.mode)
906        self.write_rej()
907        return len(self.rej)
908
909
910class header(object):
911    """patch header"""
912
913    diffgit_re = re.compile(b'diff --git a/(.*) b/(.*)$')
914    diff_re = re.compile(b'diff -r .* (.*)$')
915    allhunks_re = re.compile(b'(?:index|deleted file) ')
916    pretty_re = re.compile(b'(?:new file|deleted file) ')
917    special_re = re.compile(b'(?:index|deleted|copy|rename|new mode) ')
918    newfile_re = re.compile(b'(?:new file|copy to|rename to)')
919
920    def __init__(self, header):
921        self.header = header
922        self.hunks = []
923
924    def binary(self):
925        return any(h.startswith(b'index ') for h in self.header)
926
927    def pretty(self, fp):
928        for h in self.header:
929            if h.startswith(b'index '):
930                fp.write(_(b'this modifies a binary file (all or nothing)\n'))
931                break
932            if self.pretty_re.match(h):
933                fp.write(h)
934                if self.binary():
935                    fp.write(_(b'this is a binary file\n'))
936                break
937            if h.startswith(b'---'):
938                fp.write(
939                    _(b'%d hunks, %d lines changed\n')
940                    % (
941                        len(self.hunks),
942                        sum([max(h.added, h.removed) for h in self.hunks]),
943                    )
944                )
945                break
946            fp.write(h)
947
948    def write(self, fp):
949        fp.write(b''.join(self.header))
950
951    def allhunks(self):
952        return any(self.allhunks_re.match(h) for h in self.header)
953
954    def files(self):
955        match = self.diffgit_re.match(self.header[0])
956        if match:
957            fromfile, tofile = match.groups()
958            if fromfile == tofile:
959                return [fromfile]
960            return [fromfile, tofile]
961        else:
962            return self.diff_re.match(self.header[0]).groups()
963
964    def filename(self):
965        return self.files()[-1]
966
967    def __repr__(self):
968        return '<header %s>' % (
969            ' '.join(pycompat.rapply(pycompat.fsdecode, self.files()))
970        )
971
972    def isnewfile(self):
973        return any(self.newfile_re.match(h) for h in self.header)
974
975    def special(self):
976        # Special files are shown only at the header level and not at the hunk
977        # level for example a file that has been deleted is a special file.
978        # The user cannot change the content of the operation, in the case of
979        # the deleted file he has to take the deletion or not take it, he
980        # cannot take some of it.
981        # Newly added files are special if they are empty, they are not special
982        # if they have some content as we want to be able to change it
983        nocontent = len(self.header) == 2
984        emptynewfile = self.isnewfile() and nocontent
985        return emptynewfile or any(
986            self.special_re.match(h) for h in self.header
987        )
988
989
990class recordhunk(object):
991    """patch hunk
992
993    XXX shouldn't we merge this with the other hunk class?
994    """
995
996    def __init__(
997        self,
998        header,
999        fromline,
1000        toline,
1001        proc,
1002        before,
1003        hunk,
1004        after,
1005        maxcontext=None,
1006    ):
1007        def trimcontext(lines, reverse=False):
1008            if maxcontext is not None:
1009                delta = len(lines) - maxcontext
1010                if delta > 0:
1011                    if reverse:
1012                        return delta, lines[delta:]
1013                    else:
1014                        return delta, lines[:maxcontext]
1015            return 0, lines
1016
1017        self.header = header
1018        trimedbefore, self.before = trimcontext(before, True)
1019        self.fromline = fromline + trimedbefore
1020        self.toline = toline + trimedbefore
1021        _trimedafter, self.after = trimcontext(after, False)
1022        self.proc = proc
1023        self.hunk = hunk
1024        self.added, self.removed = self.countchanges(self.hunk)
1025
1026    def __eq__(self, v):
1027        if not isinstance(v, recordhunk):
1028            return False
1029
1030        return (
1031            (v.hunk == self.hunk)
1032            and (v.proc == self.proc)
1033            and (self.fromline == v.fromline)
1034            and (self.header.files() == v.header.files())
1035        )
1036
1037    def __hash__(self):
1038        return hash(
1039            (
1040                tuple(self.hunk),
1041                tuple(self.header.files()),
1042                self.fromline,
1043                self.proc,
1044            )
1045        )
1046
1047    def countchanges(self, hunk):
1048        """hunk -> (n+,n-)"""
1049        add = len([h for h in hunk if h.startswith(b'+')])
1050        rem = len([h for h in hunk if h.startswith(b'-')])
1051        return add, rem
1052
1053    def reversehunk(self):
1054        """return another recordhunk which is the reverse of the hunk
1055
1056        If this hunk is diff(A, B), the returned hunk is diff(B, A). To do
1057        that, swap fromline/toline and +/- signs while keep other things
1058        unchanged.
1059        """
1060        m = {b'+': b'-', b'-': b'+', b'\\': b'\\'}
1061        hunk = [b'%s%s' % (m[l[0:1]], l[1:]) for l in self.hunk]
1062        return recordhunk(
1063            self.header,
1064            self.toline,
1065            self.fromline,
1066            self.proc,
1067            self.before,
1068            hunk,
1069            self.after,
1070        )
1071
1072    def write(self, fp):
1073        delta = len(self.before) + len(self.after)
1074        if self.after and self.after[-1] == diffhelper.MISSING_NEWLINE_MARKER:
1075            delta -= 1
1076        fromlen = delta + self.removed
1077        tolen = delta + self.added
1078        fp.write(
1079            b'@@ -%d,%d +%d,%d @@%s\n'
1080            % (
1081                self.fromline,
1082                fromlen,
1083                self.toline,
1084                tolen,
1085                self.proc and (b' ' + self.proc),
1086            )
1087        )
1088        fp.write(b''.join(self.before + self.hunk + self.after))
1089
1090    pretty = write
1091
1092    def filename(self):
1093        return self.header.filename()
1094
1095    @encoding.strmethod
1096    def __repr__(self):
1097        return b'<hunk %r@%d>' % (self.filename(), self.fromline)
1098
1099
1100def getmessages():
1101    return {
1102        b'multiple': {
1103            b'apply': _(b"apply change %d/%d to '%s'?"),
1104            b'discard': _(b"discard change %d/%d to '%s'?"),
1105            b'keep': _(b"keep change %d/%d to '%s'?"),
1106            b'record': _(b"record change %d/%d to '%s'?"),
1107        },
1108        b'single': {
1109            b'apply': _(b"apply this change to '%s'?"),
1110            b'discard': _(b"discard this change to '%s'?"),
1111            b'keep': _(b"keep this change to '%s'?"),
1112            b'record': _(b"record this change to '%s'?"),
1113        },
1114        b'help': {
1115            b'apply': _(
1116                b'[Ynesfdaq?]'
1117                b'$$ &Yes, apply this change'
1118                b'$$ &No, skip this change'
1119                b'$$ &Edit this change manually'
1120                b'$$ &Skip remaining changes to this file'
1121                b'$$ Apply remaining changes to this &file'
1122                b'$$ &Done, skip remaining changes and files'
1123                b'$$ Apply &all changes to all remaining files'
1124                b'$$ &Quit, applying no changes'
1125                b'$$ &? (display help)'
1126            ),
1127            b'discard': _(
1128                b'[Ynesfdaq?]'
1129                b'$$ &Yes, discard this change'
1130                b'$$ &No, skip this change'
1131                b'$$ &Edit this change manually'
1132                b'$$ &Skip remaining changes to this file'
1133                b'$$ Discard remaining changes to this &file'
1134                b'$$ &Done, skip remaining changes and files'
1135                b'$$ Discard &all changes to all remaining files'
1136                b'$$ &Quit, discarding no changes'
1137                b'$$ &? (display help)'
1138            ),
1139            b'keep': _(
1140                b'[Ynesfdaq?]'
1141                b'$$ &Yes, keep this change'
1142                b'$$ &No, skip this change'
1143                b'$$ &Edit this change manually'
1144                b'$$ &Skip remaining changes to this file'
1145                b'$$ Keep remaining changes to this &file'
1146                b'$$ &Done, skip remaining changes and files'
1147                b'$$ Keep &all changes to all remaining files'
1148                b'$$ &Quit, keeping all changes'
1149                b'$$ &? (display help)'
1150            ),
1151            b'record': _(
1152                b'[Ynesfdaq?]'
1153                b'$$ &Yes, record this change'
1154                b'$$ &No, skip this change'
1155                b'$$ &Edit this change manually'
1156                b'$$ &Skip remaining changes to this file'
1157                b'$$ Record remaining changes to this &file'
1158                b'$$ &Done, skip remaining changes and files'
1159                b'$$ Record &all changes to all remaining files'
1160                b'$$ &Quit, recording no changes'
1161                b'$$ &? (display help)'
1162            ),
1163        },
1164    }
1165
1166
1167def filterpatch(ui, headers, match, operation=None):
1168    """Interactively filter patch chunks into applied-only chunks"""
1169    messages = getmessages()
1170
1171    if operation is None:
1172        operation = b'record'
1173
1174    def prompt(skipfile, skipall, query, chunk):
1175        """prompt query, and process base inputs
1176
1177        - y/n for the rest of file
1178        - y/n for the rest
1179        - ? (help)
1180        - q (quit)
1181
1182        Return True/False and possibly updated skipfile and skipall.
1183        """
1184        newpatches = None
1185        if skipall is not None:
1186            return skipall, skipfile, skipall, newpatches
1187        if skipfile is not None:
1188            return skipfile, skipfile, skipall, newpatches
1189        while True:
1190            resps = messages[b'help'][operation]
1191            # IMPORTANT: keep the last line of this prompt short (<40 english
1192            # chars is a good target) because of issue6158.
1193            r = ui.promptchoice(b"%s\n(enter ? for help) %s" % (query, resps))
1194            ui.write(b"\n")
1195            if r == 8:  # ?
1196                for c, t in ui.extractchoices(resps)[1]:
1197                    ui.write(b'%s - %s\n' % (c, encoding.lower(t)))
1198                continue
1199            elif r == 0:  # yes
1200                ret = True
1201            elif r == 1:  # no
1202                ret = False
1203            elif r == 2:  # Edit patch
1204                if chunk is None:
1205                    ui.write(_(b'cannot edit patch for whole file'))
1206                    ui.write(b"\n")
1207                    continue
1208                if chunk.header.binary():
1209                    ui.write(_(b'cannot edit patch for binary file'))
1210                    ui.write(b"\n")
1211                    continue
1212                # Patch comment based on the Git one (based on comment at end of
1213                # https://mercurial-scm.org/wiki/RecordExtension)
1214                phelp = b'---' + _(
1215                    b"""
1216To remove '-' lines, make them ' ' lines (context).
1217To remove '+' lines, delete them.
1218Lines starting with # will be removed from the patch.
1219
1220If the patch applies cleanly, the edited hunk will immediately be
1221added to the record list. If it does not apply cleanly, a rejects
1222file will be generated: you can use that when you try again. If
1223all lines of the hunk are removed, then the edit is aborted and
1224the hunk is left unchanged.
1225"""
1226                )
1227                (patchfd, patchfn) = pycompat.mkstemp(
1228                    prefix=b"hg-editor-", suffix=b".diff"
1229                )
1230                ncpatchfp = None
1231                try:
1232                    # Write the initial patch
1233                    f = util.nativeeolwriter(os.fdopen(patchfd, 'wb'))
1234                    chunk.header.write(f)
1235                    chunk.write(f)
1236                    f.write(
1237                        b''.join(
1238                            [b'# ' + i + b'\n' for i in phelp.splitlines()]
1239                        )
1240                    )
1241                    f.close()
1242                    # Start the editor and wait for it to complete
1243                    editor = ui.geteditor()
1244                    ret = ui.system(
1245                        b"%s \"%s\"" % (editor, patchfn),
1246                        environ={b'HGUSER': ui.username()},
1247                        blockedtag=b'filterpatch',
1248                    )
1249                    if ret != 0:
1250                        ui.warn(_(b"editor exited with exit code %d\n") % ret)
1251                        continue
1252                    # Remove comment lines
1253                    patchfp = open(patchfn, 'rb')
1254                    ncpatchfp = stringio()
1255                    for line in util.iterfile(patchfp):
1256                        line = util.fromnativeeol(line)
1257                        if not line.startswith(b'#'):
1258                            ncpatchfp.write(line)
1259                    patchfp.close()
1260                    ncpatchfp.seek(0)
1261                    newpatches = parsepatch(ncpatchfp)
1262                finally:
1263                    os.unlink(patchfn)
1264                    del ncpatchfp
1265                # Signal that the chunk shouldn't be applied as-is, but
1266                # provide the new patch to be used instead.
1267                ret = False
1268            elif r == 3:  # Skip
1269                ret = skipfile = False
1270            elif r == 4:  # file (Record remaining)
1271                ret = skipfile = True
1272            elif r == 5:  # done, skip remaining
1273                ret = skipall = False
1274            elif r == 6:  # all
1275                ret = skipall = True
1276            elif r == 7:  # quit
1277                raise error.CanceledError(_(b'user quit'))
1278            return ret, skipfile, skipall, newpatches
1279
1280    seen = set()
1281    applied = {}  # 'filename' -> [] of chunks
1282    skipfile, skipall = None, None
1283    pos, total = 1, sum(len(h.hunks) for h in headers)
1284    for h in headers:
1285        pos += len(h.hunks)
1286        skipfile = None
1287        fixoffset = 0
1288        hdr = b''.join(h.header)
1289        if hdr in seen:
1290            continue
1291        seen.add(hdr)
1292        if skipall is None:
1293            h.pretty(ui)
1294        files = h.files()
1295        msg = _(b'examine changes to %s?') % _(b' and ').join(
1296            b"'%s'" % f for f in files
1297        )
1298        if all(match.exact(f) for f in files):
1299            r, skipall, np = True, None, None
1300        else:
1301            r, skipfile, skipall, np = prompt(skipfile, skipall, msg, None)
1302        if not r:
1303            continue
1304        applied[h.filename()] = [h]
1305        if h.allhunks():
1306            applied[h.filename()] += h.hunks
1307            continue
1308        for i, chunk in enumerate(h.hunks):
1309            if skipfile is None and skipall is None:
1310                chunk.pretty(ui)
1311            if total == 1:
1312                msg = messages[b'single'][operation] % chunk.filename()
1313            else:
1314                idx = pos - len(h.hunks) + i
1315                msg = messages[b'multiple'][operation] % (
1316                    idx,
1317                    total,
1318                    chunk.filename(),
1319                )
1320            r, skipfile, skipall, newpatches = prompt(
1321                skipfile, skipall, msg, chunk
1322            )
1323            if r:
1324                if fixoffset:
1325                    chunk = copy.copy(chunk)
1326                    chunk.toline += fixoffset
1327                applied[chunk.filename()].append(chunk)
1328            elif newpatches is not None:
1329                for newpatch in newpatches:
1330                    for newhunk in newpatch.hunks:
1331                        if fixoffset:
1332                            newhunk.toline += fixoffset
1333                        applied[newhunk.filename()].append(newhunk)
1334            else:
1335                fixoffset += chunk.removed - chunk.added
1336    return (
1337        sum(
1338            [
1339                h
1340                for h in pycompat.itervalues(applied)
1341                if h[0].special() or len(h) > 1
1342            ],
1343            [],
1344        ),
1345        {},
1346    )
1347
1348
1349class hunk(object):
1350    def __init__(self, desc, num, lr, context):
1351        self.number = num
1352        self.desc = desc
1353        self.hunk = [desc]
1354        self.a = []
1355        self.b = []
1356        self.starta = self.lena = None
1357        self.startb = self.lenb = None
1358        if lr is not None:
1359            if context:
1360                self.read_context_hunk(lr)
1361            else:
1362                self.read_unified_hunk(lr)
1363
1364    def getnormalized(self):
1365        """Return a copy with line endings normalized to LF."""
1366
1367        def normalize(lines):
1368            nlines = []
1369            for line in lines:
1370                if line.endswith(b'\r\n'):
1371                    line = line[:-2] + b'\n'
1372                nlines.append(line)
1373            return nlines
1374
1375        # Dummy object, it is rebuilt manually
1376        nh = hunk(self.desc, self.number, None, None)
1377        nh.number = self.number
1378        nh.desc = self.desc
1379        nh.hunk = self.hunk
1380        nh.a = normalize(self.a)
1381        nh.b = normalize(self.b)
1382        nh.starta = self.starta
1383        nh.startb = self.startb
1384        nh.lena = self.lena
1385        nh.lenb = self.lenb
1386        return nh
1387
1388    def read_unified_hunk(self, lr):
1389        m = unidesc.match(self.desc)
1390        if not m:
1391            raise PatchError(_(b"bad hunk #%d") % self.number)
1392        self.starta, self.lena, self.startb, self.lenb = m.groups()
1393        if self.lena is None:
1394            self.lena = 1
1395        else:
1396            self.lena = int(self.lena)
1397        if self.lenb is None:
1398            self.lenb = 1
1399        else:
1400            self.lenb = int(self.lenb)
1401        self.starta = int(self.starta)
1402        self.startb = int(self.startb)
1403        try:
1404            diffhelper.addlines(
1405                lr, self.hunk, self.lena, self.lenb, self.a, self.b
1406            )
1407        except error.ParseError as e:
1408            raise PatchError(_(b"bad hunk #%d: %s") % (self.number, e))
1409        # if we hit eof before finishing out the hunk, the last line will
1410        # be zero length.  Lets try to fix it up.
1411        while len(self.hunk[-1]) == 0:
1412            del self.hunk[-1]
1413            del self.a[-1]
1414            del self.b[-1]
1415            self.lena -= 1
1416            self.lenb -= 1
1417        self._fixnewline(lr)
1418
1419    def read_context_hunk(self, lr):
1420        self.desc = lr.readline()
1421        m = contextdesc.match(self.desc)
1422        if not m:
1423            raise PatchError(_(b"bad hunk #%d") % self.number)
1424        self.starta, aend = m.groups()
1425        self.starta = int(self.starta)
1426        if aend is None:
1427            aend = self.starta
1428        self.lena = int(aend) - self.starta
1429        if self.starta:
1430            self.lena += 1
1431        for x in pycompat.xrange(self.lena):
1432            l = lr.readline()
1433            if l.startswith(b'---'):
1434                # lines addition, old block is empty
1435                lr.push(l)
1436                break
1437            s = l[2:]
1438            if l.startswith(b'- ') or l.startswith(b'! '):
1439                u = b'-' + s
1440            elif l.startswith(b'  '):
1441                u = b' ' + s
1442            else:
1443                raise PatchError(
1444                    _(b"bad hunk #%d old text line %d") % (self.number, x)
1445                )
1446            self.a.append(u)
1447            self.hunk.append(u)
1448
1449        l = lr.readline()
1450        if l.startswith(br'\ '):
1451            s = self.a[-1][:-1]
1452            self.a[-1] = s
1453            self.hunk[-1] = s
1454            l = lr.readline()
1455        m = contextdesc.match(l)
1456        if not m:
1457            raise PatchError(_(b"bad hunk #%d") % self.number)
1458        self.startb, bend = m.groups()
1459        self.startb = int(self.startb)
1460        if bend is None:
1461            bend = self.startb
1462        self.lenb = int(bend) - self.startb
1463        if self.startb:
1464            self.lenb += 1
1465        hunki = 1
1466        for x in pycompat.xrange(self.lenb):
1467            l = lr.readline()
1468            if l.startswith(br'\ '):
1469                # XXX: the only way to hit this is with an invalid line range.
1470                # The no-eol marker is not counted in the line range, but I
1471                # guess there are diff(1) out there which behave differently.
1472                s = self.b[-1][:-1]
1473                self.b[-1] = s
1474                self.hunk[hunki - 1] = s
1475                continue
1476            if not l:
1477                # line deletions, new block is empty and we hit EOF
1478                lr.push(l)
1479                break
1480            s = l[2:]
1481            if l.startswith(b'+ ') or l.startswith(b'! '):
1482                u = b'+' + s
1483            elif l.startswith(b'  '):
1484                u = b' ' + s
1485            elif len(self.b) == 0:
1486                # line deletions, new block is empty
1487                lr.push(l)
1488                break
1489            else:
1490                raise PatchError(
1491                    _(b"bad hunk #%d old text line %d") % (self.number, x)
1492                )
1493            self.b.append(s)
1494            while True:
1495                if hunki >= len(self.hunk):
1496                    h = b""
1497                else:
1498                    h = self.hunk[hunki]
1499                hunki += 1
1500                if h == u:
1501                    break
1502                elif h.startswith(b'-'):
1503                    continue
1504                else:
1505                    self.hunk.insert(hunki - 1, u)
1506                    break
1507
1508        if not self.a:
1509            # this happens when lines were only added to the hunk
1510            for x in self.hunk:
1511                if x.startswith(b'-') or x.startswith(b' '):
1512                    self.a.append(x)
1513        if not self.b:
1514            # this happens when lines were only deleted from the hunk
1515            for x in self.hunk:
1516                if x.startswith(b'+') or x.startswith(b' '):
1517                    self.b.append(x[1:])
1518        # @@ -start,len +start,len @@
1519        self.desc = b"@@ -%d,%d +%d,%d @@\n" % (
1520            self.starta,
1521            self.lena,
1522            self.startb,
1523            self.lenb,
1524        )
1525        self.hunk[0] = self.desc
1526        self._fixnewline(lr)
1527
1528    def _fixnewline(self, lr):
1529        l = lr.readline()
1530        if l.startswith(br'\ '):
1531            diffhelper.fixnewline(self.hunk, self.a, self.b)
1532        else:
1533            lr.push(l)
1534
1535    def complete(self):
1536        return len(self.a) == self.lena and len(self.b) == self.lenb
1537
1538    def _fuzzit(self, old, new, fuzz, toponly):
1539        # this removes context lines from the top and bottom of list 'l'.  It
1540        # checks the hunk to make sure only context lines are removed, and then
1541        # returns a new shortened list of lines.
1542        fuzz = min(fuzz, len(old))
1543        if fuzz:
1544            top = 0
1545            bot = 0
1546            hlen = len(self.hunk)
1547            for x in pycompat.xrange(hlen - 1):
1548                # the hunk starts with the @@ line, so use x+1
1549                if self.hunk[x + 1].startswith(b' '):
1550                    top += 1
1551                else:
1552                    break
1553            if not toponly:
1554                for x in pycompat.xrange(hlen - 1):
1555                    if self.hunk[hlen - bot - 1].startswith(b' '):
1556                        bot += 1
1557                    else:
1558                        break
1559
1560            bot = min(fuzz, bot)
1561            top = min(fuzz, top)
1562            return old[top : len(old) - bot], new[top : len(new) - bot], top
1563        return old, new, 0
1564
1565    def fuzzit(self, fuzz, toponly):
1566        old, new, top = self._fuzzit(self.a, self.b, fuzz, toponly)
1567        oldstart = self.starta + top
1568        newstart = self.startb + top
1569        # zero length hunk ranges already have their start decremented
1570        if self.lena and oldstart > 0:
1571            oldstart -= 1
1572        if self.lenb and newstart > 0:
1573            newstart -= 1
1574        return old, oldstart, new, newstart
1575
1576
1577class binhunk(object):
1578    """A binary patch file."""
1579
1580    def __init__(self, lr, fname):
1581        self.text = None
1582        self.delta = False
1583        self.hunk = [b'GIT binary patch\n']
1584        self._fname = fname
1585        self._read(lr)
1586
1587    def complete(self):
1588        return self.text is not None
1589
1590    def new(self, lines):
1591        if self.delta:
1592            return [applybindelta(self.text, b''.join(lines))]
1593        return [self.text]
1594
1595    def _read(self, lr):
1596        def getline(lr, hunk):
1597            l = lr.readline()
1598            hunk.append(l)
1599            return l.rstrip(b'\r\n')
1600
1601        while True:
1602            line = getline(lr, self.hunk)
1603            if not line:
1604                raise PatchError(
1605                    _(b'could not extract "%s" binary data') % self._fname
1606                )
1607            if line.startswith(b'literal '):
1608                size = int(line[8:].rstrip())
1609                break
1610            if line.startswith(b'delta '):
1611                size = int(line[6:].rstrip())
1612                self.delta = True
1613                break
1614        dec = []
1615        line = getline(lr, self.hunk)
1616        while len(line) > 1:
1617            l = line[0:1]
1618            if l <= b'Z' and l >= b'A':
1619                l = ord(l) - ord(b'A') + 1
1620            else:
1621                l = ord(l) - ord(b'a') + 27
1622            try:
1623                dec.append(util.b85decode(line[1:])[:l])
1624            except ValueError as e:
1625                raise PatchError(
1626                    _(b'could not decode "%s" binary patch: %s')
1627                    % (self._fname, stringutil.forcebytestr(e))
1628                )
1629            line = getline(lr, self.hunk)
1630        text = zlib.decompress(b''.join(dec))
1631        if len(text) != size:
1632            raise PatchError(
1633                _(b'"%s" length is %d bytes, should be %d')
1634                % (self._fname, len(text), size)
1635            )
1636        self.text = text
1637
1638
1639def parsefilename(str):
1640    # --- filename \t|space stuff
1641    s = str[4:].rstrip(b'\r\n')
1642    i = s.find(b'\t')
1643    if i < 0:
1644        i = s.find(b' ')
1645        if i < 0:
1646            return s
1647    return s[:i]
1648
1649
1650def reversehunks(hunks):
1651    '''reverse the signs in the hunks given as argument
1652
1653    This function operates on hunks coming out of patch.filterpatch, that is
1654    a list of the form: [header1, hunk1, hunk2, header2...]. Example usage:
1655
1656    >>> rawpatch = b"""diff --git a/folder1/g b/folder1/g
1657    ... --- a/folder1/g
1658    ... +++ b/folder1/g
1659    ... @@ -1,7 +1,7 @@
1660    ... +firstline
1661    ...  c
1662    ...  1
1663    ...  2
1664    ... + 3
1665    ... -4
1666    ...  5
1667    ...  d
1668    ... +lastline"""
1669    >>> hunks = parsepatch([rawpatch])
1670    >>> hunkscomingfromfilterpatch = []
1671    >>> for h in hunks:
1672    ...     hunkscomingfromfilterpatch.append(h)
1673    ...     hunkscomingfromfilterpatch.extend(h.hunks)
1674
1675    >>> reversedhunks = reversehunks(hunkscomingfromfilterpatch)
1676    >>> from . import util
1677    >>> fp = util.stringio()
1678    >>> for c in reversedhunks:
1679    ...      c.write(fp)
1680    >>> fp.seek(0) or None
1681    >>> reversedpatch = fp.read()
1682    >>> print(pycompat.sysstr(reversedpatch))
1683    diff --git a/folder1/g b/folder1/g
1684    --- a/folder1/g
1685    +++ b/folder1/g
1686    @@ -1,4 +1,3 @@
1687    -firstline
1688     c
1689     1
1690     2
1691    @@ -2,6 +1,6 @@
1692     c
1693     1
1694     2
1695    - 3
1696    +4
1697     5
1698     d
1699    @@ -6,3 +5,2 @@
1700     5
1701     d
1702    -lastline
1703
1704    '''
1705
1706    newhunks = []
1707    for c in hunks:
1708        if util.safehasattr(c, b'reversehunk'):
1709            c = c.reversehunk()
1710        newhunks.append(c)
1711    return newhunks
1712
1713
1714def parsepatch(originalchunks, maxcontext=None):
1715    """patch -> [] of headers -> [] of hunks
1716
1717    If maxcontext is not None, trim context lines if necessary.
1718
1719    >>> rawpatch = b'''diff --git a/folder1/g b/folder1/g
1720    ... --- a/folder1/g
1721    ... +++ b/folder1/g
1722    ... @@ -1,8 +1,10 @@
1723    ...  1
1724    ...  2
1725    ... -3
1726    ...  4
1727    ...  5
1728    ...  6
1729    ... +6.1
1730    ... +6.2
1731    ...  7
1732    ...  8
1733    ... +9'''
1734    >>> out = util.stringio()
1735    >>> headers = parsepatch([rawpatch], maxcontext=1)
1736    >>> for header in headers:
1737    ...     header.write(out)
1738    ...     for hunk in header.hunks:
1739    ...         hunk.write(out)
1740    >>> print(pycompat.sysstr(out.getvalue()))
1741    diff --git a/folder1/g b/folder1/g
1742    --- a/folder1/g
1743    +++ b/folder1/g
1744    @@ -2,3 +2,2 @@
1745     2
1746    -3
1747     4
1748    @@ -6,2 +5,4 @@
1749     6
1750    +6.1
1751    +6.2
1752     7
1753    @@ -8,1 +9,2 @@
1754     8
1755    +9
1756    """
1757
1758    class parser(object):
1759        """patch parsing state machine"""
1760
1761        def __init__(self):
1762            self.fromline = 0
1763            self.toline = 0
1764            self.proc = b''
1765            self.header = None
1766            self.context = []
1767            self.before = []
1768            self.hunk = []
1769            self.headers = []
1770
1771        def addrange(self, limits):
1772            self.addcontext([])
1773            fromstart, fromend, tostart, toend, proc = limits
1774            self.fromline = int(fromstart)
1775            self.toline = int(tostart)
1776            self.proc = proc
1777
1778        def addcontext(self, context):
1779            if self.hunk:
1780                h = recordhunk(
1781                    self.header,
1782                    self.fromline,
1783                    self.toline,
1784                    self.proc,
1785                    self.before,
1786                    self.hunk,
1787                    context,
1788                    maxcontext,
1789                )
1790                self.header.hunks.append(h)
1791                self.fromline += len(self.before) + h.removed
1792                self.toline += len(self.before) + h.added
1793                self.before = []
1794                self.hunk = []
1795            self.context = context
1796
1797        def addhunk(self, hunk):
1798            if self.context:
1799                self.before = self.context
1800                self.context = []
1801            if self.hunk:
1802                self.addcontext([])
1803            self.hunk = hunk
1804
1805        def newfile(self, hdr):
1806            self.addcontext([])
1807            h = header(hdr)
1808            self.headers.append(h)
1809            self.header = h
1810
1811        def addother(self, line):
1812            pass  # 'other' lines are ignored
1813
1814        def finished(self):
1815            self.addcontext([])
1816            return self.headers
1817
1818        transitions = {
1819            b'file': {
1820                b'context': addcontext,
1821                b'file': newfile,
1822                b'hunk': addhunk,
1823                b'range': addrange,
1824            },
1825            b'context': {
1826                b'file': newfile,
1827                b'hunk': addhunk,
1828                b'range': addrange,
1829                b'other': addother,
1830            },
1831            b'hunk': {
1832                b'context': addcontext,
1833                b'file': newfile,
1834                b'range': addrange,
1835            },
1836            b'range': {b'context': addcontext, b'hunk': addhunk},
1837            b'other': {b'other': addother},
1838        }
1839
1840    p = parser()
1841    fp = stringio()
1842    fp.write(b''.join(originalchunks))
1843    fp.seek(0)
1844
1845    state = b'context'
1846    for newstate, data in scanpatch(fp):
1847        try:
1848            p.transitions[state][newstate](p, data)
1849        except KeyError:
1850            raise PatchError(
1851                b'unhandled transition: %s -> %s' % (state, newstate)
1852            )
1853        state = newstate
1854    del fp
1855    return p.finished()
1856
1857
1858def pathtransform(path, strip, prefix):
1859    """turn a path from a patch into a path suitable for the repository
1860
1861    prefix, if not empty, is expected to be normalized with a / at the end.
1862
1863    Returns (stripped components, path in repository).
1864
1865    >>> pathtransform(b'a/b/c', 0, b'')
1866    ('', 'a/b/c')
1867    >>> pathtransform(b'   a/b/c   ', 0, b'')
1868    ('', '   a/b/c')
1869    >>> pathtransform(b'   a/b/c   ', 2, b'')
1870    ('a/b/', 'c')
1871    >>> pathtransform(b'a/b/c', 0, b'd/e/')
1872    ('', 'd/e/a/b/c')
1873    >>> pathtransform(b'   a//b/c   ', 2, b'd/e/')
1874    ('a//b/', 'd/e/c')
1875    >>> pathtransform(b'a/b/c', 3, b'')
1876    Traceback (most recent call last):
1877    PatchError: unable to strip away 1 of 3 dirs from a/b/c
1878    """
1879    pathlen = len(path)
1880    i = 0
1881    if strip == 0:
1882        return b'', prefix + path.rstrip()
1883    count = strip
1884    while count > 0:
1885        i = path.find(b'/', i)
1886        if i == -1:
1887            raise PatchError(
1888                _(b"unable to strip away %d of %d dirs from %s")
1889                % (count, strip, path)
1890            )
1891        i += 1
1892        # consume '//' in the path
1893        while i < pathlen - 1 and path[i : i + 1] == b'/':
1894            i += 1
1895        count -= 1
1896    return path[:i].lstrip(), prefix + path[i:].rstrip()
1897
1898
1899def makepatchmeta(backend, afile_orig, bfile_orig, hunk, strip, prefix):
1900    nulla = afile_orig == b"/dev/null"
1901    nullb = bfile_orig == b"/dev/null"
1902    create = nulla and hunk.starta == 0 and hunk.lena == 0
1903    remove = nullb and hunk.startb == 0 and hunk.lenb == 0
1904    abase, afile = pathtransform(afile_orig, strip, prefix)
1905    gooda = not nulla and backend.exists(afile)
1906    bbase, bfile = pathtransform(bfile_orig, strip, prefix)
1907    if afile == bfile:
1908        goodb = gooda
1909    else:
1910        goodb = not nullb and backend.exists(bfile)
1911    missing = not goodb and not gooda and not create
1912
1913    # some diff programs apparently produce patches where the afile is
1914    # not /dev/null, but afile starts with bfile
1915    abasedir = afile[: afile.rfind(b'/') + 1]
1916    bbasedir = bfile[: bfile.rfind(b'/') + 1]
1917    if (
1918        missing
1919        and abasedir == bbasedir
1920        and afile.startswith(bfile)
1921        and hunk.starta == 0
1922        and hunk.lena == 0
1923    ):
1924        create = True
1925        missing = False
1926
1927    # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the
1928    # diff is between a file and its backup. In this case, the original
1929    # file should be patched (see original mpatch code).
1930    isbackup = abase == bbase and bfile.startswith(afile)
1931    fname = None
1932    if not missing:
1933        if gooda and goodb:
1934            if isbackup:
1935                fname = afile
1936            else:
1937                fname = bfile
1938        elif gooda:
1939            fname = afile
1940
1941    if not fname:
1942        if not nullb:
1943            if isbackup:
1944                fname = afile
1945            else:
1946                fname = bfile
1947        elif not nulla:
1948            fname = afile
1949        else:
1950            raise PatchError(_(b"undefined source and destination files"))
1951
1952    gp = patchmeta(fname)
1953    if create:
1954        gp.op = b'ADD'
1955    elif remove:
1956        gp.op = b'DELETE'
1957    return gp
1958
1959
1960def scanpatch(fp):
1961    """like patch.iterhunks, but yield different events
1962
1963    - ('file',    [header_lines + fromfile + tofile])
1964    - ('context', [context_lines])
1965    - ('hunk',    [hunk_lines])
1966    - ('range',   (-start,len, +start,len, proc))
1967    """
1968    lines_re = re.compile(br'@@ -(\d+),(\d+) \+(\d+),(\d+) @@\s*(.*)')
1969    lr = linereader(fp)
1970
1971    def scanwhile(first, p):
1972        """scan lr while predicate holds"""
1973        lines = [first]
1974        for line in iter(lr.readline, b''):
1975            if p(line):
1976                lines.append(line)
1977            else:
1978                lr.push(line)
1979                break
1980        return lines
1981
1982    for line in iter(lr.readline, b''):
1983        if line.startswith(b'diff --git a/') or line.startswith(b'diff -r '):
1984
1985            def notheader(line):
1986                s = line.split(None, 1)
1987                return not s or s[0] not in (b'---', b'diff')
1988
1989            header = scanwhile(line, notheader)
1990            fromfile = lr.readline()
1991            if fromfile.startswith(b'---'):
1992                tofile = lr.readline()
1993                header += [fromfile, tofile]
1994            else:
1995                lr.push(fromfile)
1996            yield b'file', header
1997        elif line.startswith(b' '):
1998            cs = (b' ', b'\\')
1999            yield b'context', scanwhile(line, lambda l: l.startswith(cs))
2000        elif line.startswith((b'-', b'+')):
2001            cs = (b'-', b'+', b'\\')
2002            yield b'hunk', scanwhile(line, lambda l: l.startswith(cs))
2003        else:
2004            m = lines_re.match(line)
2005            if m:
2006                yield b'range', m.groups()
2007            else:
2008                yield b'other', line
2009
2010
2011def scangitpatch(lr, firstline):
2012    """
2013    Git patches can emit:
2014    - rename a to b
2015    - change b
2016    - copy a to c
2017    - change c
2018
2019    We cannot apply this sequence as-is, the renamed 'a' could not be
2020    found for it would have been renamed already. And we cannot copy
2021    from 'b' instead because 'b' would have been changed already. So
2022    we scan the git patch for copy and rename commands so we can
2023    perform the copies ahead of time.
2024    """
2025    pos = 0
2026    try:
2027        pos = lr.fp.tell()
2028        fp = lr.fp
2029    except IOError:
2030        fp = stringio(lr.fp.read())
2031    gitlr = linereader(fp)
2032    gitlr.push(firstline)
2033    gitpatches = readgitpatch(gitlr)
2034    fp.seek(pos)
2035    return gitpatches
2036
2037
2038def iterhunks(fp):
2039    """Read a patch and yield the following events:
2040    - ("file", afile, bfile, firsthunk): select a new target file.
2041    - ("hunk", hunk): a new hunk is ready to be applied, follows a
2042    "file" event.
2043    - ("git", gitchanges): current diff is in git format, gitchanges
2044    maps filenames to gitpatch records. Unique event.
2045    """
2046    afile = b""
2047    bfile = b""
2048    state = None
2049    hunknum = 0
2050    emitfile = newfile = False
2051    gitpatches = None
2052
2053    # our states
2054    BFILE = 1
2055    context = None
2056    lr = linereader(fp)
2057
2058    for x in iter(lr.readline, b''):
2059        if state == BFILE and (
2060            (not context and x.startswith(b'@'))
2061            or (context is not False and x.startswith(b'***************'))
2062            or x.startswith(b'GIT binary patch')
2063        ):
2064            gp = None
2065            if gitpatches and gitpatches[-1].ispatching(afile, bfile):
2066                gp = gitpatches.pop()
2067            if x.startswith(b'GIT binary patch'):
2068                h = binhunk(lr, gp.path)
2069            else:
2070                if context is None and x.startswith(b'***************'):
2071                    context = True
2072                h = hunk(x, hunknum + 1, lr, context)
2073            hunknum += 1
2074            if emitfile:
2075                emitfile = False
2076                yield b'file', (afile, bfile, h, gp and gp.copy() or None)
2077            yield b'hunk', h
2078        elif x.startswith(b'diff --git a/'):
2079            m = gitre.match(x.rstrip(b'\r\n'))
2080            if not m:
2081                continue
2082            if gitpatches is None:
2083                # scan whole input for git metadata
2084                gitpatches = scangitpatch(lr, x)
2085                yield b'git', [
2086                    g.copy() for g in gitpatches if g.op in (b'COPY', b'RENAME')
2087                ]
2088                gitpatches.reverse()
2089            afile = b'a/' + m.group(1)
2090            bfile = b'b/' + m.group(2)
2091            while gitpatches and not gitpatches[-1].ispatching(afile, bfile):
2092                gp = gitpatches.pop()
2093                yield b'file', (
2094                    b'a/' + gp.path,
2095                    b'b/' + gp.path,
2096                    None,
2097                    gp.copy(),
2098                )
2099            if not gitpatches:
2100                raise PatchError(
2101                    _(b'failed to synchronize metadata for "%s"') % afile[2:]
2102                )
2103            newfile = True
2104        elif x.startswith(b'---'):
2105            # check for a unified diff
2106            l2 = lr.readline()
2107            if not l2.startswith(b'+++'):
2108                lr.push(l2)
2109                continue
2110            newfile = True
2111            context = False
2112            afile = parsefilename(x)
2113            bfile = parsefilename(l2)
2114        elif x.startswith(b'***'):
2115            # check for a context diff
2116            l2 = lr.readline()
2117            if not l2.startswith(b'---'):
2118                lr.push(l2)
2119                continue
2120            l3 = lr.readline()
2121            lr.push(l3)
2122            if not l3.startswith(b"***************"):
2123                lr.push(l2)
2124                continue
2125            newfile = True
2126            context = True
2127            afile = parsefilename(x)
2128            bfile = parsefilename(l2)
2129
2130        if newfile:
2131            newfile = False
2132            emitfile = True
2133            state = BFILE
2134            hunknum = 0
2135
2136    while gitpatches:
2137        gp = gitpatches.pop()
2138        yield b'file', (b'a/' + gp.path, b'b/' + gp.path, None, gp.copy())
2139
2140
2141def applybindelta(binchunk, data):
2142    """Apply a binary delta hunk
2143    The algorithm used is the algorithm from git's patch-delta.c
2144    """
2145
2146    def deltahead(binchunk):
2147        i = 0
2148        for c in pycompat.bytestr(binchunk):
2149            i += 1
2150            if not (ord(c) & 0x80):
2151                return i
2152        return i
2153
2154    out = b""
2155    s = deltahead(binchunk)
2156    binchunk = binchunk[s:]
2157    s = deltahead(binchunk)
2158    binchunk = binchunk[s:]
2159    i = 0
2160    while i < len(binchunk):
2161        cmd = ord(binchunk[i : i + 1])
2162        i += 1
2163        if cmd & 0x80:
2164            offset = 0
2165            size = 0
2166            if cmd & 0x01:
2167                offset = ord(binchunk[i : i + 1])
2168                i += 1
2169            if cmd & 0x02:
2170                offset |= ord(binchunk[i : i + 1]) << 8
2171                i += 1
2172            if cmd & 0x04:
2173                offset |= ord(binchunk[i : i + 1]) << 16
2174                i += 1
2175            if cmd & 0x08:
2176                offset |= ord(binchunk[i : i + 1]) << 24
2177                i += 1
2178            if cmd & 0x10:
2179                size = ord(binchunk[i : i + 1])
2180                i += 1
2181            if cmd & 0x20:
2182                size |= ord(binchunk[i : i + 1]) << 8
2183                i += 1
2184            if cmd & 0x40:
2185                size |= ord(binchunk[i : i + 1]) << 16
2186                i += 1
2187            if size == 0:
2188                size = 0x10000
2189            offset_end = offset + size
2190            out += data[offset:offset_end]
2191        elif cmd != 0:
2192            offset_end = i + cmd
2193            out += binchunk[i:offset_end]
2194            i += cmd
2195        else:
2196            raise PatchError(_(b'unexpected delta opcode 0'))
2197    return out
2198
2199
2200def applydiff(ui, fp, backend, store, strip=1, prefix=b'', eolmode=b'strict'):
2201    """Reads a patch from fp and tries to apply it.
2202
2203    Returns 0 for a clean patch, -1 if any rejects were found and 1 if
2204    there was any fuzz.
2205
2206    If 'eolmode' is 'strict', the patch content and patched file are
2207    read in binary mode. Otherwise, line endings are ignored when
2208    patching then normalized according to 'eolmode'.
2209    """
2210    return _applydiff(
2211        ui,
2212        fp,
2213        patchfile,
2214        backend,
2215        store,
2216        strip=strip,
2217        prefix=prefix,
2218        eolmode=eolmode,
2219    )
2220
2221
2222def _canonprefix(repo, prefix):
2223    if prefix:
2224        prefix = pathutil.canonpath(repo.root, repo.getcwd(), prefix)
2225        if prefix != b'':
2226            prefix += b'/'
2227    return prefix
2228
2229
2230def _applydiff(
2231    ui, fp, patcher, backend, store, strip=1, prefix=b'', eolmode=b'strict'
2232):
2233    prefix = _canonprefix(backend.repo, prefix)
2234
2235    def pstrip(p):
2236        return pathtransform(p, strip - 1, prefix)[1]
2237
2238    rejects = 0
2239    err = 0
2240    current_file = None
2241
2242    for state, values in iterhunks(fp):
2243        if state == b'hunk':
2244            if not current_file:
2245                continue
2246            ret = current_file.apply(values)
2247            if ret > 0:
2248                err = 1
2249        elif state == b'file':
2250            if current_file:
2251                rejects += current_file.close()
2252                current_file = None
2253            afile, bfile, first_hunk, gp = values
2254            if gp:
2255                gp.path = pstrip(gp.path)
2256                if gp.oldpath:
2257                    gp.oldpath = pstrip(gp.oldpath)
2258            else:
2259                gp = makepatchmeta(
2260                    backend, afile, bfile, first_hunk, strip, prefix
2261                )
2262            if gp.op == b'RENAME':
2263                backend.unlink(gp.oldpath)
2264            if not first_hunk:
2265                if gp.op == b'DELETE':
2266                    backend.unlink(gp.path)
2267                    continue
2268                data, mode = None, None
2269                if gp.op in (b'RENAME', b'COPY'):
2270                    data, mode = store.getfile(gp.oldpath)[:2]
2271                    if data is None:
2272                        # This means that the old path does not exist
2273                        raise PatchError(
2274                            _(b"source file '%s' does not exist") % gp.oldpath
2275                        )
2276                if gp.mode:
2277                    mode = gp.mode
2278                    if gp.op == b'ADD':
2279                        # Added files without content have no hunk and
2280                        # must be created
2281                        data = b''
2282                if data or mode:
2283                    if gp.op in (b'ADD', b'RENAME', b'COPY') and backend.exists(
2284                        gp.path
2285                    ):
2286                        raise PatchError(
2287                            _(
2288                                b"cannot create %s: destination "
2289                                b"already exists"
2290                            )
2291                            % gp.path
2292                        )
2293                    backend.setfile(gp.path, data, mode, gp.oldpath)
2294                continue
2295            try:
2296                current_file = patcher(ui, gp, backend, store, eolmode=eolmode)
2297            except PatchError as inst:
2298                ui.warn(stringutil.forcebytestr(inst) + b'\n')
2299                current_file = None
2300                rejects += 1
2301                continue
2302        elif state == b'git':
2303            for gp in values:
2304                path = pstrip(gp.oldpath)
2305                data, mode = backend.getfile(path)
2306                if data is None:
2307                    # The error ignored here will trigger a getfile()
2308                    # error in a place more appropriate for error
2309                    # handling, and will not interrupt the patching
2310                    # process.
2311                    pass
2312                else:
2313                    store.setfile(path, data, mode)
2314        else:
2315            raise error.Abort(_(b'unsupported parser state: %s') % state)
2316
2317    if current_file:
2318        rejects += current_file.close()
2319
2320    if rejects:
2321        return -1
2322    return err
2323
2324
2325def _externalpatch(ui, repo, patcher, patchname, strip, files, similarity):
2326    """use <patcher> to apply <patchname> to the working directory.
2327    returns whether patch was applied with fuzz factor."""
2328
2329    fuzz = False
2330    args = []
2331    cwd = repo.root
2332    if cwd:
2333        args.append(b'-d %s' % procutil.shellquote(cwd))
2334    cmd = b'%s %s -p%d < %s' % (
2335        patcher,
2336        b' '.join(args),
2337        strip,
2338        procutil.shellquote(patchname),
2339    )
2340    ui.debug(b'Using external patch tool: %s\n' % cmd)
2341    fp = procutil.popen(cmd, b'rb')
2342    try:
2343        for line in util.iterfile(fp):
2344            line = line.rstrip()
2345            ui.note(line + b'\n')
2346            if line.startswith(b'patching file '):
2347                pf = util.parsepatchoutput(line)
2348                printed_file = False
2349                files.add(pf)
2350            elif line.find(b'with fuzz') >= 0:
2351                fuzz = True
2352                if not printed_file:
2353                    ui.warn(pf + b'\n')
2354                    printed_file = True
2355                ui.warn(line + b'\n')
2356            elif line.find(b'saving rejects to file') >= 0:
2357                ui.warn(line + b'\n')
2358            elif line.find(b'FAILED') >= 0:
2359                if not printed_file:
2360                    ui.warn(pf + b'\n')
2361                    printed_file = True
2362                ui.warn(line + b'\n')
2363    finally:
2364        if files:
2365            scmutil.marktouched(repo, files, similarity)
2366    code = fp.close()
2367    if code:
2368        raise PatchError(
2369            _(b"patch command failed: %s") % procutil.explainexit(code)
2370        )
2371    return fuzz
2372
2373
2374def patchbackend(
2375    ui, backend, patchobj, strip, prefix, files=None, eolmode=b'strict'
2376):
2377    if files is None:
2378        files = set()
2379    if eolmode is None:
2380        eolmode = ui.config(b'patch', b'eol')
2381    if eolmode.lower() not in eolmodes:
2382        raise error.Abort(_(b'unsupported line endings type: %s') % eolmode)
2383    eolmode = eolmode.lower()
2384
2385    store = filestore()
2386    try:
2387        fp = open(patchobj, b'rb')
2388    except TypeError:
2389        fp = patchobj
2390    try:
2391        ret = applydiff(
2392            ui, fp, backend, store, strip=strip, prefix=prefix, eolmode=eolmode
2393        )
2394    finally:
2395        if fp != patchobj:
2396            fp.close()
2397        files.update(backend.close())
2398        store.close()
2399    if ret < 0:
2400        raise PatchError(_(b'patch failed to apply'))
2401    return ret > 0
2402
2403
2404def internalpatch(
2405    ui,
2406    repo,
2407    patchobj,
2408    strip,
2409    prefix=b'',
2410    files=None,
2411    eolmode=b'strict',
2412    similarity=0,
2413):
2414    """use builtin patch to apply <patchobj> to the working directory.
2415    returns whether patch was applied with fuzz factor."""
2416    backend = workingbackend(ui, repo, similarity)
2417    return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2418
2419
2420def patchrepo(
2421    ui, repo, ctx, store, patchobj, strip, prefix, files=None, eolmode=b'strict'
2422):
2423    backend = repobackend(ui, repo, ctx, store)
2424    return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2425
2426
2427def patch(
2428    ui,
2429    repo,
2430    patchname,
2431    strip=1,
2432    prefix=b'',
2433    files=None,
2434    eolmode=b'strict',
2435    similarity=0,
2436):
2437    """Apply <patchname> to the working directory.
2438
2439    'eolmode' specifies how end of lines should be handled. It can be:
2440    - 'strict': inputs are read in binary mode, EOLs are preserved
2441    - 'crlf': EOLs are ignored when patching and reset to CRLF
2442    - 'lf': EOLs are ignored when patching and reset to LF
2443    - None: get it from user settings, default to 'strict'
2444    'eolmode' is ignored when using an external patcher program.
2445
2446    Returns whether patch was applied with fuzz factor.
2447    """
2448    patcher = ui.config(b'ui', b'patch')
2449    if files is None:
2450        files = set()
2451    if patcher:
2452        return _externalpatch(
2453            ui, repo, patcher, patchname, strip, files, similarity
2454        )
2455    return internalpatch(
2456        ui, repo, patchname, strip, prefix, files, eolmode, similarity
2457    )
2458
2459
2460def changedfiles(ui, repo, patchpath, strip=1, prefix=b''):
2461    backend = fsbackend(ui, repo.root)
2462    prefix = _canonprefix(repo, prefix)
2463    with open(patchpath, b'rb') as fp:
2464        changed = set()
2465        for state, values in iterhunks(fp):
2466            if state == b'file':
2467                afile, bfile, first_hunk, gp = values
2468                if gp:
2469                    gp.path = pathtransform(gp.path, strip - 1, prefix)[1]
2470                    if gp.oldpath:
2471                        gp.oldpath = pathtransform(
2472                            gp.oldpath, strip - 1, prefix
2473                        )[1]
2474                else:
2475                    gp = makepatchmeta(
2476                        backend, afile, bfile, first_hunk, strip, prefix
2477                    )
2478                changed.add(gp.path)
2479                if gp.op == b'RENAME':
2480                    changed.add(gp.oldpath)
2481            elif state not in (b'hunk', b'git'):
2482                raise error.Abort(_(b'unsupported parser state: %s') % state)
2483        return changed
2484
2485
2486class GitDiffRequired(Exception):
2487    pass
2488
2489
2490diffopts = diffutil.diffallopts
2491diffallopts = diffutil.diffallopts
2492difffeatureopts = diffutil.difffeatureopts
2493
2494
2495def diff(
2496    repo,
2497    node1=None,
2498    node2=None,
2499    match=None,
2500    changes=None,
2501    opts=None,
2502    losedatafn=None,
2503    pathfn=None,
2504    copy=None,
2505    copysourcematch=None,
2506    hunksfilterfn=None,
2507):
2508    """yields diff of changes to files between two nodes, or node and
2509    working directory.
2510
2511    if node1 is None, use first dirstate parent instead.
2512    if node2 is None, compare node1 with working directory.
2513
2514    losedatafn(**kwarg) is a callable run when opts.upgrade=True and
2515    every time some change cannot be represented with the current
2516    patch format. Return False to upgrade to git patch format, True to
2517    accept the loss or raise an exception to abort the diff. It is
2518    called with the name of current file being diffed as 'fn'. If set
2519    to None, patches will always be upgraded to git format when
2520    necessary.
2521
2522    prefix is a filename prefix that is prepended to all filenames on
2523    display (used for subrepos).
2524
2525    relroot, if not empty, must be normalized with a trailing /. Any match
2526    patterns that fall outside it will be ignored.
2527
2528    copy, if not empty, should contain mappings {dst@y: src@x} of copy
2529    information.
2530
2531    if copysourcematch is not None, then copy sources will be filtered by this
2532    matcher
2533
2534    hunksfilterfn, if not None, should be a function taking a filectx and
2535    hunks generator that may yield filtered hunks.
2536    """
2537    if not node1 and not node2:
2538        node1 = repo.dirstate.p1()
2539
2540    ctx1 = repo[node1]
2541    ctx2 = repo[node2]
2542
2543    for fctx1, fctx2, hdr, hunks in diffhunks(
2544        repo,
2545        ctx1=ctx1,
2546        ctx2=ctx2,
2547        match=match,
2548        changes=changes,
2549        opts=opts,
2550        losedatafn=losedatafn,
2551        pathfn=pathfn,
2552        copy=copy,
2553        copysourcematch=copysourcematch,
2554    ):
2555        if hunksfilterfn is not None:
2556            # If the file has been removed, fctx2 is None; but this should
2557            # not occur here since we catch removed files early in
2558            # logcmdutil.getlinerangerevs() for 'hg log -L'.
2559            assert (
2560                fctx2 is not None
2561            ), b'fctx2 unexpectly None in diff hunks filtering'
2562            hunks = hunksfilterfn(fctx2, hunks)
2563        text = b''.join(b''.join(hlines) for hrange, hlines in hunks)
2564        if hdr and (text or len(hdr) > 1):
2565            yield b'\n'.join(hdr) + b'\n'
2566        if text:
2567            yield text
2568
2569
2570def diffhunks(
2571    repo,
2572    ctx1,
2573    ctx2,
2574    match=None,
2575    changes=None,
2576    opts=None,
2577    losedatafn=None,
2578    pathfn=None,
2579    copy=None,
2580    copysourcematch=None,
2581):
2582    """Yield diff of changes to files in the form of (`header`, `hunks`) tuples
2583    where `header` is a list of diff headers and `hunks` is an iterable of
2584    (`hunkrange`, `hunklines`) tuples.
2585
2586    See diff() for the meaning of parameters.
2587    """
2588
2589    if opts is None:
2590        opts = mdiff.defaultopts
2591
2592    def lrugetfilectx():
2593        cache = {}
2594        order = collections.deque()
2595
2596        def getfilectx(f, ctx):
2597            fctx = ctx.filectx(f, filelog=cache.get(f))
2598            if f not in cache:
2599                if len(cache) > 20:
2600                    del cache[order.popleft()]
2601                cache[f] = fctx.filelog()
2602            else:
2603                order.remove(f)
2604            order.append(f)
2605            return fctx
2606
2607        return getfilectx
2608
2609    getfilectx = lrugetfilectx()
2610
2611    if not changes:
2612        changes = ctx1.status(ctx2, match=match)
2613    if isinstance(changes, list):
2614        modified, added, removed = changes[:3]
2615    else:
2616        modified, added, removed = (
2617            changes.modified,
2618            changes.added,
2619            changes.removed,
2620        )
2621
2622    if not modified and not added and not removed:
2623        return []
2624
2625    if repo.ui.debugflag:
2626        hexfunc = hex
2627    else:
2628        hexfunc = short
2629    revs = [hexfunc(node) for node in [ctx1.node(), ctx2.node()] if node]
2630
2631    if copy is None:
2632        copy = {}
2633        if opts.git or opts.upgrade:
2634            copy = copies.pathcopies(ctx1, ctx2, match=match)
2635
2636    if copysourcematch:
2637        # filter out copies where source side isn't inside the matcher
2638        # (copies.pathcopies() already filtered out the destination)
2639        copy = {
2640            dst: src
2641            for dst, src in pycompat.iteritems(copy)
2642            if copysourcematch(src)
2643        }
2644
2645    modifiedset = set(modified)
2646    addedset = set(added)
2647    removedset = set(removed)
2648    for f in modified:
2649        if f not in ctx1:
2650            # Fix up added, since merged-in additions appear as
2651            # modifications during merges
2652            modifiedset.remove(f)
2653            addedset.add(f)
2654    for f in removed:
2655        if f not in ctx1:
2656            # Merged-in additions that are then removed are reported as removed.
2657            # They are not in ctx1, so We don't want to show them in the diff.
2658            removedset.remove(f)
2659    modified = sorted(modifiedset)
2660    added = sorted(addedset)
2661    removed = sorted(removedset)
2662    for dst, src in list(copy.items()):
2663        if src not in ctx1:
2664            # Files merged in during a merge and then copied/renamed are
2665            # reported as copies. We want to show them in the diff as additions.
2666            del copy[dst]
2667
2668    prefetchmatch = scmutil.matchfiles(
2669        repo, list(modifiedset | addedset | removedset)
2670    )
2671    revmatches = [
2672        (ctx1.rev(), prefetchmatch),
2673        (ctx2.rev(), prefetchmatch),
2674    ]
2675    scmutil.prefetchfiles(repo, revmatches)
2676
2677    def difffn(opts, losedata):
2678        return trydiff(
2679            repo,
2680            revs,
2681            ctx1,
2682            ctx2,
2683            modified,
2684            added,
2685            removed,
2686            copy,
2687            getfilectx,
2688            opts,
2689            losedata,
2690            pathfn,
2691        )
2692
2693    if opts.upgrade and not opts.git:
2694        try:
2695
2696            def losedata(fn):
2697                if not losedatafn or not losedatafn(fn=fn):
2698                    raise GitDiffRequired
2699
2700            # Buffer the whole output until we are sure it can be generated
2701            return list(difffn(opts.copy(git=False), losedata))
2702        except GitDiffRequired:
2703            return difffn(opts.copy(git=True), None)
2704    else:
2705        return difffn(opts, None)
2706
2707
2708def diffsinglehunk(hunklines):
2709    """yield tokens for a list of lines in a single hunk"""
2710    for line in hunklines:
2711        # chomp
2712        chompline = line.rstrip(b'\r\n')
2713        # highlight tabs and trailing whitespace
2714        stripline = chompline.rstrip()
2715        if line.startswith(b'-'):
2716            label = b'diff.deleted'
2717        elif line.startswith(b'+'):
2718            label = b'diff.inserted'
2719        else:
2720            raise error.ProgrammingError(b'unexpected hunk line: %s' % line)
2721        for token in tabsplitter.findall(stripline):
2722            if token.startswith(b'\t'):
2723                yield (token, b'diff.tab')
2724            else:
2725                yield (token, label)
2726
2727        if chompline != stripline:
2728            yield (chompline[len(stripline) :], b'diff.trailingwhitespace')
2729        if chompline != line:
2730            yield (line[len(chompline) :], b'')
2731
2732
2733def diffsinglehunkinline(hunklines):
2734    """yield tokens for a list of lines in a single hunk, with inline colors"""
2735    # prepare deleted, and inserted content
2736    a = bytearray()
2737    b = bytearray()
2738    for line in hunklines:
2739        if line[0:1] == b'-':
2740            a += line[1:]
2741        elif line[0:1] == b'+':
2742            b += line[1:]
2743        else:
2744            raise error.ProgrammingError(b'unexpected hunk line: %s' % line)
2745    # fast path: if either side is empty, use diffsinglehunk
2746    if not a or not b:
2747        for t in diffsinglehunk(hunklines):
2748            yield t
2749        return
2750    # re-split the content into words
2751    al = wordsplitter.findall(bytes(a))
2752    bl = wordsplitter.findall(bytes(b))
2753    # re-arrange the words to lines since the diff algorithm is line-based
2754    aln = [s if s == b'\n' else s + b'\n' for s in al]
2755    bln = [s if s == b'\n' else s + b'\n' for s in bl]
2756    an = b''.join(aln)
2757    bn = b''.join(bln)
2758    # run the diff algorithm, prepare atokens and btokens
2759    atokens = []
2760    btokens = []
2761    blocks = mdiff.allblocks(an, bn, lines1=aln, lines2=bln)
2762    for (a1, a2, b1, b2), btype in blocks:
2763        changed = btype == b'!'
2764        for token in mdiff.splitnewlines(b''.join(al[a1:a2])):
2765            atokens.append((changed, token))
2766        for token in mdiff.splitnewlines(b''.join(bl[b1:b2])):
2767            btokens.append((changed, token))
2768
2769    # yield deleted tokens, then inserted ones
2770    for prefix, label, tokens in [
2771        (b'-', b'diff.deleted', atokens),
2772        (b'+', b'diff.inserted', btokens),
2773    ]:
2774        nextisnewline = True
2775        for changed, token in tokens:
2776            if nextisnewline:
2777                yield (prefix, label)
2778                nextisnewline = False
2779            # special handling line end
2780            isendofline = token.endswith(b'\n')
2781            if isendofline:
2782                chomp = token[:-1]  # chomp
2783                if chomp.endswith(b'\r'):
2784                    chomp = chomp[:-1]
2785                endofline = token[len(chomp) :]
2786                token = chomp.rstrip()  # detect spaces at the end
2787                endspaces = chomp[len(token) :]
2788            # scan tabs
2789            for maybetab in tabsplitter.findall(token):
2790                if b'\t' == maybetab[0:1]:
2791                    currentlabel = b'diff.tab'
2792                else:
2793                    if changed:
2794                        currentlabel = label + b'.changed'
2795                    else:
2796                        currentlabel = label + b'.unchanged'
2797                yield (maybetab, currentlabel)
2798            if isendofline:
2799                if endspaces:
2800                    yield (endspaces, b'diff.trailingwhitespace')
2801                yield (endofline, b'')
2802                nextisnewline = True
2803
2804
2805def difflabel(func, *args, **kw):
2806    '''yields 2-tuples of (output, label) based on the output of func()'''
2807    if kw.get('opts') and kw['opts'].worddiff:
2808        dodiffhunk = diffsinglehunkinline
2809    else:
2810        dodiffhunk = diffsinglehunk
2811    headprefixes = [
2812        (b'diff', b'diff.diffline'),
2813        (b'copy', b'diff.extended'),
2814        (b'rename', b'diff.extended'),
2815        (b'old', b'diff.extended'),
2816        (b'new', b'diff.extended'),
2817        (b'deleted', b'diff.extended'),
2818        (b'index', b'diff.extended'),
2819        (b'similarity', b'diff.extended'),
2820        (b'---', b'diff.file_a'),
2821        (b'+++', b'diff.file_b'),
2822    ]
2823    textprefixes = [
2824        (b'@', b'diff.hunk'),
2825        # - and + are handled by diffsinglehunk
2826    ]
2827    head = False
2828
2829    # buffers a hunk, i.e. adjacent "-", "+" lines without other changes.
2830    hunkbuffer = []
2831
2832    def consumehunkbuffer():
2833        if hunkbuffer:
2834            for token in dodiffhunk(hunkbuffer):
2835                yield token
2836            hunkbuffer[:] = []
2837
2838    for chunk in func(*args, **kw):
2839        lines = chunk.split(b'\n')
2840        linecount = len(lines)
2841        for i, line in enumerate(lines):
2842            if head:
2843                if line.startswith(b'@'):
2844                    head = False
2845            else:
2846                if line and not line.startswith(
2847                    (b' ', b'+', b'-', b'@', b'\\')
2848                ):
2849                    head = True
2850            diffline = False
2851            if not head and line and line.startswith((b'+', b'-')):
2852                diffline = True
2853
2854            prefixes = textprefixes
2855            if head:
2856                prefixes = headprefixes
2857            if diffline:
2858                # buffered
2859                bufferedline = line
2860                if i + 1 < linecount:
2861                    bufferedline += b"\n"
2862                hunkbuffer.append(bufferedline)
2863            else:
2864                # unbuffered
2865                for token in consumehunkbuffer():
2866                    yield token
2867                stripline = line.rstrip()
2868                for prefix, label in prefixes:
2869                    if stripline.startswith(prefix):
2870                        yield (stripline, label)
2871                        if line != stripline:
2872                            yield (
2873                                line[len(stripline) :],
2874                                b'diff.trailingwhitespace',
2875                            )
2876                        break
2877                else:
2878                    yield (line, b'')
2879                if i + 1 < linecount:
2880                    yield (b'\n', b'')
2881        for token in consumehunkbuffer():
2882            yield token
2883
2884
2885def diffui(*args, **kw):
2886    '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
2887    return difflabel(diff, *args, **kw)
2888
2889
2890def _filepairs(modified, added, removed, copy, opts):
2891    """generates tuples (f1, f2, copyop), where f1 is the name of the file
2892    before and f2 is the the name after. For added files, f1 will be None,
2893    and for removed files, f2 will be None. copyop may be set to None, 'copy'
2894    or 'rename' (the latter two only if opts.git is set)."""
2895    gone = set()
2896
2897    copyto = {v: k for k, v in copy.items()}
2898
2899    addedset, removedset = set(added), set(removed)
2900
2901    for f in sorted(modified + added + removed):
2902        copyop = None
2903        f1, f2 = f, f
2904        if f in addedset:
2905            f1 = None
2906            if f in copy:
2907                if opts.git:
2908                    f1 = copy[f]
2909                    if f1 in removedset and f1 not in gone:
2910                        copyop = b'rename'
2911                        gone.add(f1)
2912                    else:
2913                        copyop = b'copy'
2914        elif f in removedset:
2915            f2 = None
2916            if opts.git:
2917                # have we already reported a copy above?
2918                if (
2919                    f in copyto
2920                    and copyto[f] in addedset
2921                    and copy[copyto[f]] == f
2922                ):
2923                    continue
2924        yield f1, f2, copyop
2925
2926
2927def _gitindex(text):
2928    if not text:
2929        text = b""
2930    l = len(text)
2931    s = hashutil.sha1(b'blob %d\0' % l)
2932    s.update(text)
2933    return hex(s.digest())
2934
2935
2936_gitmode = {b'l': b'120000', b'x': b'100755', b'': b'100644'}
2937
2938
2939def trydiff(
2940    repo,
2941    revs,
2942    ctx1,
2943    ctx2,
2944    modified,
2945    added,
2946    removed,
2947    copy,
2948    getfilectx,
2949    opts,
2950    losedatafn,
2951    pathfn,
2952):
2953    """given input data, generate a diff and yield it in blocks
2954
2955    If generating a diff would lose data like flags or binary data and
2956    losedatafn is not None, it will be called.
2957
2958    pathfn is applied to every path in the diff output.
2959    """
2960
2961    if opts.noprefix:
2962        aprefix = bprefix = b''
2963    else:
2964        aprefix = b'a/'
2965        bprefix = b'b/'
2966
2967    def diffline(f, revs):
2968        revinfo = b' '.join([b"-r %s" % rev for rev in revs])
2969        return b'diff %s %s' % (revinfo, f)
2970
2971    def isempty(fctx):
2972        return fctx is None or fctx.size() == 0
2973
2974    date1 = dateutil.datestr(ctx1.date())
2975    date2 = dateutil.datestr(ctx2.date())
2976
2977    if not pathfn:
2978        pathfn = lambda f: f
2979
2980    for f1, f2, copyop in _filepairs(modified, added, removed, copy, opts):
2981        content1 = None
2982        content2 = None
2983        fctx1 = None
2984        fctx2 = None
2985        flag1 = None
2986        flag2 = None
2987        if f1:
2988            fctx1 = getfilectx(f1, ctx1)
2989            if opts.git or losedatafn:
2990                flag1 = ctx1.flags(f1)
2991        if f2:
2992            fctx2 = getfilectx(f2, ctx2)
2993            if opts.git or losedatafn:
2994                flag2 = ctx2.flags(f2)
2995        # if binary is True, output "summary" or "base85", but not "text diff"
2996        if opts.text:
2997            binary = False
2998        else:
2999            binary = any(f.isbinary() for f in [fctx1, fctx2] if f is not None)
3000
3001        if losedatafn and not opts.git:
3002            if (
3003                binary
3004                or
3005                # copy/rename
3006                f2 in copy
3007                or
3008                # empty file creation
3009                (not f1 and isempty(fctx2))
3010                or
3011                # empty file deletion
3012                (isempty(fctx1) and not f2)
3013                or
3014                # create with flags
3015                (not f1 and flag2)
3016                or
3017                # change flags
3018                (f1 and f2 and flag1 != flag2)
3019            ):
3020                losedatafn(f2 or f1)
3021
3022        path1 = pathfn(f1 or f2)
3023        path2 = pathfn(f2 or f1)
3024        header = []
3025        if opts.git:
3026            header.append(
3027                b'diff --git %s%s %s%s' % (aprefix, path1, bprefix, path2)
3028            )
3029            if not f1:  # added
3030                header.append(b'new file mode %s' % _gitmode[flag2])
3031            elif not f2:  # removed
3032                header.append(b'deleted file mode %s' % _gitmode[flag1])
3033            else:  # modified/copied/renamed
3034                mode1, mode2 = _gitmode[flag1], _gitmode[flag2]
3035                if mode1 != mode2:
3036                    header.append(b'old mode %s' % mode1)
3037                    header.append(b'new mode %s' % mode2)
3038                if copyop is not None:
3039                    if opts.showsimilarity:
3040                        sim = similar.score(ctx1[path1], ctx2[path2]) * 100
3041                        header.append(b'similarity index %d%%' % sim)
3042                    header.append(b'%s from %s' % (copyop, path1))
3043                    header.append(b'%s to %s' % (copyop, path2))
3044        elif revs:
3045            header.append(diffline(path1, revs))
3046
3047        #  fctx.is  | diffopts                | what to   | is fctx.data()
3048        #  binary() | text nobinary git index | output?   | outputted?
3049        # ------------------------------------|----------------------------
3050        #  yes      | no   no       no  *     | summary   | no
3051        #  yes      | no   no       yes *     | base85    | yes
3052        #  yes      | no   yes      no  *     | summary   | no
3053        #  yes      | no   yes      yes 0     | summary   | no
3054        #  yes      | no   yes      yes >0    | summary   | semi [1]
3055        #  yes      | yes  *        *   *     | text diff | yes
3056        #  no       | *    *        *   *     | text diff | yes
3057        # [1]: hash(fctx.data()) is outputted. so fctx.data() cannot be faked
3058        if binary and (
3059            not opts.git or (opts.git and opts.nobinary and not opts.index)
3060        ):
3061            # fast path: no binary content will be displayed, content1 and
3062            # content2 are only used for equivalent test. cmp() could have a
3063            # fast path.
3064            if fctx1 is not None:
3065                content1 = b'\0'
3066            if fctx2 is not None:
3067                if fctx1 is not None and not fctx1.cmp(fctx2):
3068                    content2 = b'\0'  # not different
3069                else:
3070                    content2 = b'\0\0'
3071        else:
3072            # normal path: load contents
3073            if fctx1 is not None:
3074                content1 = fctx1.data()
3075            if fctx2 is not None:
3076                content2 = fctx2.data()
3077
3078        data1 = (ctx1, fctx1, path1, flag1, content1, date1)
3079        data2 = (ctx2, fctx2, path2, flag2, content2, date2)
3080        yield diffcontent(data1, data2, header, binary, opts)
3081
3082
3083def diffcontent(data1, data2, header, binary, opts):
3084    """diffs two versions of a file.
3085
3086    data1 and data2 are tuples containg:
3087
3088        * ctx: changeset for the file
3089        * fctx: file context for that file
3090        * path1: name of the file
3091        * flag: flags of the file
3092        * content: full content of the file (can be null in case of binary)
3093        * date: date of the changeset
3094
3095    header: the patch header
3096    binary: whether the any of the version of file is binary or not
3097    opts:   user passed options
3098
3099    It exists as a separate function so that extensions like extdiff can wrap
3100    it and use the file content directly.
3101    """
3102
3103    ctx1, fctx1, path1, flag1, content1, date1 = data1
3104    ctx2, fctx2, path2, flag2, content2, date2 = data2
3105    index1 = _gitindex(content1) if path1 in ctx1 else sha1nodeconstants.nullhex
3106    index2 = _gitindex(content2) if path2 in ctx2 else sha1nodeconstants.nullhex
3107    if binary and opts.git and not opts.nobinary:
3108        text = mdiff.b85diff(content1, content2)
3109        if text:
3110            header.append(b'index %s..%s' % (index1, index2))
3111        hunks = ((None, [text]),)
3112    else:
3113        if opts.git and opts.index > 0:
3114            flag = flag1
3115            if flag is None:
3116                flag = flag2
3117            header.append(
3118                b'index %s..%s %s'
3119                % (
3120                    index1[0 : opts.index],
3121                    index2[0 : opts.index],
3122                    _gitmode[flag],
3123                )
3124            )
3125
3126        uheaders, hunks = mdiff.unidiff(
3127            content1,
3128            date1,
3129            content2,
3130            date2,
3131            path1,
3132            path2,
3133            binary=binary,
3134            opts=opts,
3135        )
3136        header.extend(uheaders)
3137    return fctx1, fctx2, header, hunks
3138
3139
3140def diffstatsum(stats):
3141    maxfile, maxtotal, addtotal, removetotal, binary = 0, 0, 0, 0, False
3142    for f, a, r, b in stats:
3143        maxfile = max(maxfile, encoding.colwidth(f))
3144        maxtotal = max(maxtotal, a + r)
3145        addtotal += a
3146        removetotal += r
3147        binary = binary or b
3148
3149    return maxfile, maxtotal, addtotal, removetotal, binary
3150
3151
3152def diffstatdata(lines):
3153    diffre = re.compile(br'^diff .*-r [a-z0-9]+\s(.*)$')
3154
3155    results = []
3156    filename, adds, removes, isbinary = None, 0, 0, False
3157
3158    def addresult():
3159        if filename:
3160            results.append((filename, adds, removes, isbinary))
3161
3162    # inheader is used to track if a line is in the
3163    # header portion of the diff.  This helps properly account
3164    # for lines that start with '--' or '++'
3165    inheader = False
3166
3167    for line in lines:
3168        if line.startswith(b'diff'):
3169            addresult()
3170            # starting a new file diff
3171            # set numbers to 0 and reset inheader
3172            inheader = True
3173            adds, removes, isbinary = 0, 0, False
3174            if line.startswith(b'diff --git a/'):
3175                filename = gitre.search(line).group(2)
3176            elif line.startswith(b'diff -r'):
3177                # format: "diff -r ... -r ... filename"
3178                filename = diffre.search(line).group(1)
3179        elif line.startswith(b'@@'):
3180            inheader = False
3181        elif line.startswith(b'+') and not inheader:
3182            adds += 1
3183        elif line.startswith(b'-') and not inheader:
3184            removes += 1
3185        elif line.startswith(b'GIT binary patch') or line.startswith(
3186            b'Binary file'
3187        ):
3188            isbinary = True
3189        elif line.startswith(b'rename from'):
3190            filename = line[12:]
3191        elif line.startswith(b'rename to'):
3192            filename += b' => %s' % line[10:]
3193    addresult()
3194    return results
3195
3196
3197def diffstat(lines, width=80):
3198    output = []
3199    stats = diffstatdata(lines)
3200    maxname, maxtotal, totaladds, totalremoves, hasbinary = diffstatsum(stats)
3201
3202    countwidth = len(str(maxtotal))
3203    if hasbinary and countwidth < 3:
3204        countwidth = 3
3205    graphwidth = width - countwidth - maxname - 6
3206    if graphwidth < 10:
3207        graphwidth = 10
3208
3209    def scale(i):
3210        if maxtotal <= graphwidth:
3211            return i
3212        # If diffstat runs out of room it doesn't print anything,
3213        # which isn't very useful, so always print at least one + or -
3214        # if there were at least some changes.
3215        return max(i * graphwidth // maxtotal, int(bool(i)))
3216
3217    for filename, adds, removes, isbinary in stats:
3218        if isbinary:
3219            count = b'Bin'
3220        else:
3221            count = b'%d' % (adds + removes)
3222        pluses = b'+' * scale(adds)
3223        minuses = b'-' * scale(removes)
3224        output.append(
3225            b' %s%s |  %*s %s%s\n'
3226            % (
3227                filename,
3228                b' ' * (maxname - encoding.colwidth(filename)),
3229                countwidth,
3230                count,
3231                pluses,
3232                minuses,
3233            )
3234        )
3235
3236    if stats:
3237        output.append(
3238            _(b' %d files changed, %d insertions(+), %d deletions(-)\n')
3239            % (len(stats), totaladds, totalremoves)
3240        )
3241
3242    return b''.join(output)
3243
3244
3245def diffstatui(*args, **kw):
3246    """like diffstat(), but yields 2-tuples of (output, label) for
3247    ui.write()
3248    """
3249
3250    for line in diffstat(*args, **kw).splitlines():
3251        if line and line[-1] in b'+-':
3252            name, graph = line.rsplit(b' ', 1)
3253            yield (name + b' ', b'')
3254            m = re.search(br'\++', graph)
3255            if m:
3256                yield (m.group(0), b'diffstat.inserted')
3257            m = re.search(br'-+', graph)
3258            if m:
3259                yield (m.group(0), b'diffstat.deleted')
3260        else:
3261            yield (line, b'')
3262        yield (b'\n', b'')
3263