1# Copyright (C) 2005-2014 Canonical Ltd.
2#
3# This program is free software; you can redistribute it and/or modify
4# it under the terms of the GNU General Public License as published by
5# the Free Software Foundation; either version 2 of the License, or
6# (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17import contextlib
18import difflib
19import os
20import re
21import sys
22
23from .lazy_import import lazy_import
24lazy_import(globals(), """
25import errno
26import patiencediff
27import subprocess
28import tempfile
29
30from breezy import (
31    controldir,
32    osutils,
33    textfile,
34    timestamp,
35    views,
36    )
37
38from breezy.workingtree import WorkingTree
39from breezy.i18n import gettext
40""")
41
42from . import (
43    errors,
44    )
45from .registry import (
46    Registry,
47    )
48from .trace import mutter, note, warning
49from .tree import FileTimestampUnavailable
50
51
52DEFAULT_CONTEXT_AMOUNT = 3
53
54
55# TODO: Rather than building a changeset object, we should probably
56# invoke callbacks on an object.  That object can either accumulate a
57# list, write them out directly, etc etc.
58
59
60class _PrematchedMatcher(difflib.SequenceMatcher):
61    """Allow SequenceMatcher operations to use predetermined blocks"""
62
63    def __init__(self, matching_blocks):
64        difflib.SequenceMatcher(self, None, None)
65        self.matching_blocks = matching_blocks
66        self.opcodes = None
67
68
69def internal_diff(old_label, oldlines, new_label, newlines, to_file,
70                  allow_binary=False, sequence_matcher=None,
71                  path_encoding='utf8', context_lines=DEFAULT_CONTEXT_AMOUNT):
72    # FIXME: difflib is wrong if there is no trailing newline.
73    # The syntax used by patch seems to be "\ No newline at
74    # end of file" following the last diff line from that
75    # file.  This is not trivial to insert into the
76    # unified_diff output and it might be better to just fix
77    # or replace that function.
78
79    # In the meantime we at least make sure the patch isn't
80    # mangled.
81
82    if allow_binary is False:
83        textfile.check_text_lines(oldlines)
84        textfile.check_text_lines(newlines)
85
86    if sequence_matcher is None:
87        sequence_matcher = patiencediff.PatienceSequenceMatcher
88    ud = unified_diff_bytes(
89        oldlines, newlines,
90        fromfile=old_label.encode(path_encoding, 'replace'),
91        tofile=new_label.encode(path_encoding, 'replace'),
92        n=context_lines, sequencematcher=sequence_matcher)
93
94    ud = list(ud)
95    if len(ud) == 0:  # Identical contents, nothing to do
96        return
97    # work-around for difflib being too smart for its own good
98    # if /dev/null is "1,0", patch won't recognize it as /dev/null
99    if not oldlines:
100        ud[2] = ud[2].replace(b'-1,0', b'-0,0')
101    elif not newlines:
102        ud[2] = ud[2].replace(b'+1,0', b'+0,0')
103
104    for line in ud:
105        to_file.write(line)
106        if not line.endswith(b'\n'):
107            to_file.write(b"\n\\ No newline at end of file\n")
108    to_file.write(b'\n')
109
110
111def unified_diff_bytes(a, b, fromfile=b'', tofile=b'', fromfiledate=b'',
112                       tofiledate=b'', n=3, lineterm=b'\n', sequencematcher=None):
113    r"""
114    Compare two sequences of lines; generate the delta as a unified diff.
115
116    Unified diffs are a compact way of showing line changes and a few
117    lines of context.  The number of context lines is set by 'n' which
118    defaults to three.
119
120    By default, the diff control lines (those with ---, +++, or @@) are
121    created with a trailing newline.  This is helpful so that inputs
122    created from file.readlines() result in diffs that are suitable for
123    file.writelines() since both the inputs and outputs have trailing
124    newlines.
125
126    For inputs that do not have trailing newlines, set the lineterm
127    argument to "" so that the output will be uniformly newline free.
128
129    The unidiff format normally has a header for filenames and modification
130    times.  Any or all of these may be specified using strings for
131    'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.  The modification
132    times are normally expressed in the format returned by time.ctime().
133
134    Example:
135
136    >>> for line in bytes_unified_diff(b'one two three four'.split(),
137    ...             b'zero one tree four'.split(), b'Original', b'Current',
138    ...             b'Sat Jan 26 23:30:50 1991', b'Fri Jun 06 10:20:52 2003',
139    ...             lineterm=b''):
140    ...     print line
141    --- Original Sat Jan 26 23:30:50 1991
142    +++ Current Fri Jun 06 10:20:52 2003
143    @@ -1,4 +1,4 @@
144    +zero
145     one
146    -two
147    -three
148    +tree
149     four
150    """
151    if sequencematcher is None:
152        sequencematcher = difflib.SequenceMatcher
153
154    if fromfiledate:
155        fromfiledate = b'\t' + bytes(fromfiledate)
156    if tofiledate:
157        tofiledate = b'\t' + bytes(tofiledate)
158
159    started = False
160    for group in sequencematcher(None, a, b).get_grouped_opcodes(n):
161        if not started:
162            yield b'--- %s%s%s' % (fromfile, fromfiledate, lineterm)
163            yield b'+++ %s%s%s' % (tofile, tofiledate, lineterm)
164            started = True
165        i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
166        yield b"@@ -%d,%d +%d,%d @@%s" % (i1 + 1, i2 - i1, j1 + 1, j2 - j1, lineterm)
167        for tag, i1, i2, j1, j2 in group:
168            if tag == 'equal':
169                for line in a[i1:i2]:
170                    yield b' ' + line
171                continue
172            if tag == 'replace' or tag == 'delete':
173                for line in a[i1:i2]:
174                    yield b'-' + line
175            if tag == 'replace' or tag == 'insert':
176                for line in b[j1:j2]:
177                    yield b'+' + line
178
179
180def _spawn_external_diff(diffcmd, capture_errors=True):
181    """Spawn the external diff process, and return the child handle.
182
183    :param diffcmd: The command list to spawn
184    :param capture_errors: Capture stderr as well as setting LANG=C
185        and LC_ALL=C. This lets us read and understand the output of diff,
186        and respond to any errors.
187    :return: A Popen object.
188    """
189    if capture_errors:
190        # construct minimal environment
191        env = {}
192        path = os.environ.get('PATH')
193        if path is not None:
194            env['PATH'] = path
195        env['LANGUAGE'] = 'C'   # on win32 only LANGUAGE has effect
196        env['LANG'] = 'C'
197        env['LC_ALL'] = 'C'
198        stderr = subprocess.PIPE
199    else:
200        env = None
201        stderr = None
202
203    try:
204        pipe = subprocess.Popen(diffcmd,
205                                stdin=subprocess.PIPE,
206                                stdout=subprocess.PIPE,
207                                stderr=stderr,
208                                env=env)
209    except OSError as e:
210        if e.errno == errno.ENOENT:
211            raise errors.NoDiff(str(e))
212        raise
213
214    return pipe
215
216
217# diff style options as of GNU diff v3.2
218style_option_list = ['-c', '-C', '--context',
219                     '-e', '--ed',
220                     '-f', '--forward-ed',
221                     '-q', '--brief',
222                     '--normal',
223                     '-n', '--rcs',
224                     '-u', '-U', '--unified',
225                     '-y', '--side-by-side',
226                     '-D', '--ifdef']
227
228
229def default_style_unified(diff_opts):
230    """Default to unified diff style if alternative not specified in diff_opts.
231
232        diff only allows one style to be specified; they don't override.
233        Note that some of these take optargs, and the optargs can be
234        directly appended to the options.
235        This is only an approximate parser; it doesn't properly understand
236        the grammar.
237
238    :param diff_opts: List of options for external (GNU) diff.
239    :return: List of options with default style=='unified'.
240    """
241    for s in style_option_list:
242        for j in diff_opts:
243            if j.startswith(s):
244                break
245        else:
246            continue
247        break
248    else:
249        diff_opts.append('-u')
250    return diff_opts
251
252
253def external_diff(old_label, oldlines, new_label, newlines, to_file,
254                  diff_opts):
255    """Display a diff by calling out to the external diff program."""
256    # make sure our own output is properly ordered before the diff
257    to_file.flush()
258
259    oldtmp_fd, old_abspath = tempfile.mkstemp(prefix='brz-diff-old-')
260    newtmp_fd, new_abspath = tempfile.mkstemp(prefix='brz-diff-new-')
261    oldtmpf = os.fdopen(oldtmp_fd, 'wb')
262    newtmpf = os.fdopen(newtmp_fd, 'wb')
263
264    try:
265        # TODO: perhaps a special case for comparing to or from the empty
266        # sequence; can just use /dev/null on Unix
267
268        # TODO: if either of the files being compared already exists as a
269        # regular named file (e.g. in the working directory) then we can
270        # compare directly to that, rather than copying it.
271
272        oldtmpf.writelines(oldlines)
273        newtmpf.writelines(newlines)
274
275        oldtmpf.close()
276        newtmpf.close()
277
278        if not diff_opts:
279            diff_opts = []
280        if sys.platform == 'win32':
281            # Popen doesn't do the proper encoding for external commands
282            # Since we are dealing with an ANSI api, use mbcs encoding
283            old_label = old_label.encode('mbcs')
284            new_label = new_label.encode('mbcs')
285        diffcmd = ['diff',
286                   '--label', old_label,
287                   old_abspath,
288                   '--label', new_label,
289                   new_abspath,
290                   '--binary',
291                   ]
292
293        diff_opts = default_style_unified(diff_opts)
294
295        if diff_opts:
296            diffcmd.extend(diff_opts)
297
298        pipe = _spawn_external_diff(diffcmd, capture_errors=True)
299        out, err = pipe.communicate()
300        rc = pipe.returncode
301
302        # internal_diff() adds a trailing newline, add one here for consistency
303        out += b'\n'
304        if rc == 2:
305            # 'diff' gives retcode == 2 for all sorts of errors
306            # one of those is 'Binary files differ'.
307            # Bad options could also be the problem.
308            # 'Binary files' is not a real error, so we suppress that error.
309            lang_c_out = out
310
311            # Since we got here, we want to make sure to give an i18n error
312            pipe = _spawn_external_diff(diffcmd, capture_errors=False)
313            out, err = pipe.communicate()
314
315            # Write out the new i18n diff response
316            to_file.write(out + b'\n')
317            if pipe.returncode != 2:
318                raise errors.BzrError(
319                    'external diff failed with exit code 2'
320                    ' when run with LANG=C and LC_ALL=C,'
321                    ' but not when run natively: %r' % (diffcmd,))
322
323            first_line = lang_c_out.split(b'\n', 1)[0]
324            # Starting with diffutils 2.8.4 the word "binary" was dropped.
325            m = re.match(b'^(binary )?files.*differ$', first_line, re.I)
326            if m is None:
327                raise errors.BzrError('external diff failed with exit code 2;'
328                                      ' command: %r' % (diffcmd,))
329            else:
330                # Binary files differ, just return
331                return
332
333        # If we got to here, we haven't written out the output of diff
334        # do so now
335        to_file.write(out)
336        if rc not in (0, 1):
337            # returns 1 if files differ; that's OK
338            if rc < 0:
339                msg = 'signal %d' % (-rc)
340            else:
341                msg = 'exit code %d' % rc
342
343            raise errors.BzrError('external diff failed with %s; command: %r'
344                                  % (msg, diffcmd))
345
346    finally:
347        oldtmpf.close()                 # and delete
348        newtmpf.close()
349
350        def cleanup(path):
351            # Warn in case the file couldn't be deleted (in case windows still
352            # holds the file open, but not if the files have already been
353            # deleted)
354            try:
355                os.remove(path)
356            except OSError as e:
357                if e.errno not in (errno.ENOENT,):
358                    warning('Failed to delete temporary file: %s %s', path, e)
359
360        cleanup(old_abspath)
361        cleanup(new_abspath)
362
363
364def get_trees_and_branches_to_diff_locked(
365        path_list, revision_specs, old_url, new_url, exit_stack, apply_view=True):
366    """Get the trees and specific files to diff given a list of paths.
367
368    This method works out the trees to be diff'ed and the files of
369    interest within those trees.
370
371    :param path_list:
372        the list of arguments passed to the diff command
373    :param revision_specs:
374        Zero, one or two RevisionSpecs from the diff command line,
375        saying what revisions to compare.
376    :param old_url:
377        The url of the old branch or tree. If None, the tree to use is
378        taken from the first path, if any, or the current working tree.
379    :param new_url:
380        The url of the new branch or tree. If None, the tree to use is
381        taken from the first path, if any, or the current working tree.
382    :param exit_stack:
383        an ExitStack object. get_trees_and_branches_to_diff
384        will register cleanups that must be run to unlock the trees, etc.
385    :param apply_view:
386        if True and a view is set, apply the view or check that the paths
387        are within it
388    :returns:
389        a tuple of (old_tree, new_tree, old_branch, new_branch,
390        specific_files, extra_trees) where extra_trees is a sequence of
391        additional trees to search in for file-ids.  The trees and branches
392        will be read-locked until the cleanups registered via the exit_stack
393        param are run.
394    """
395    # Get the old and new revision specs
396    old_revision_spec = None
397    new_revision_spec = None
398    if revision_specs is not None:
399        if len(revision_specs) > 0:
400            old_revision_spec = revision_specs[0]
401            if old_url is None:
402                old_url = old_revision_spec.get_branch()
403        if len(revision_specs) > 1:
404            new_revision_spec = revision_specs[1]
405            if new_url is None:
406                new_url = new_revision_spec.get_branch()
407
408    other_paths = []
409    make_paths_wt_relative = True
410    consider_relpath = True
411    if path_list is None or len(path_list) == 0:
412        # If no path is given, the current working tree is used
413        default_location = u'.'
414        consider_relpath = False
415    elif old_url is not None and new_url is not None:
416        other_paths = path_list
417        make_paths_wt_relative = False
418    else:
419        default_location = path_list[0]
420        other_paths = path_list[1:]
421
422    def lock_tree_or_branch(wt, br):
423        if wt is not None:
424            exit_stack.enter_context(wt.lock_read())
425        elif br is not None:
426            exit_stack.enter_context(br.lock_read())
427
428    # Get the old location
429    specific_files = []
430    if old_url is None:
431        old_url = default_location
432    working_tree, branch, relpath = \
433        controldir.ControlDir.open_containing_tree_or_branch(old_url)
434    lock_tree_or_branch(working_tree, branch)
435    if consider_relpath and relpath != '':
436        if working_tree is not None and apply_view:
437            views.check_path_in_view(working_tree, relpath)
438        specific_files.append(relpath)
439    old_tree = _get_tree_to_diff(old_revision_spec, working_tree, branch)
440    old_branch = branch
441
442    # Get the new location
443    if new_url is None:
444        new_url = default_location
445    if new_url != old_url:
446        working_tree, branch, relpath = \
447            controldir.ControlDir.open_containing_tree_or_branch(new_url)
448        lock_tree_or_branch(working_tree, branch)
449        if consider_relpath and relpath != '':
450            if working_tree is not None and apply_view:
451                views.check_path_in_view(working_tree, relpath)
452            specific_files.append(relpath)
453    new_tree = _get_tree_to_diff(new_revision_spec, working_tree, branch,
454                                 basis_is_default=working_tree is None)
455    new_branch = branch
456
457    # Get the specific files (all files is None, no files is [])
458    if make_paths_wt_relative and working_tree is not None:
459        other_paths = working_tree.safe_relpath_files(
460            other_paths,
461            apply_view=apply_view)
462    specific_files.extend(other_paths)
463    if len(specific_files) == 0:
464        specific_files = None
465        if (working_tree is not None and working_tree.supports_views() and
466                apply_view):
467            view_files = working_tree.views.lookup_view()
468            if view_files:
469                specific_files = view_files
470                view_str = views.view_display_str(view_files)
471                note(gettext("*** Ignoring files outside view. View is %s") % view_str)
472
473    # Get extra trees that ought to be searched for file-ids
474    extra_trees = None
475    if working_tree is not None and working_tree not in (old_tree, new_tree):
476        extra_trees = (working_tree,)
477    return (old_tree, new_tree, old_branch, new_branch,
478            specific_files, extra_trees)
479
480
481def _get_tree_to_diff(spec, tree=None, branch=None, basis_is_default=True):
482    if branch is None and tree is not None:
483        branch = tree.branch
484    if spec is None or spec.spec is None:
485        if basis_is_default:
486            if tree is not None:
487                return tree.basis_tree()
488            else:
489                return branch.basis_tree()
490        else:
491            return tree
492    return spec.as_tree(branch)
493
494
495def show_diff_trees(old_tree, new_tree, to_file, specific_files=None,
496                    external_diff_options=None,
497                    old_label='a/', new_label='b/',
498                    extra_trees=None,
499                    path_encoding='utf8',
500                    using=None,
501                    format_cls=None,
502                    context=DEFAULT_CONTEXT_AMOUNT):
503    """Show in text form the changes from one tree to another.
504
505    :param to_file: The output stream.
506    :param specific_files: Include only changes to these files - None for all
507        changes.
508    :param external_diff_options: If set, use an external GNU diff and pass
509        these options.
510    :param extra_trees: If set, more Trees to use for looking up file ids
511    :param path_encoding: If set, the path will be encoded as specified,
512        otherwise is supposed to be utf8
513    :param format_cls: Formatter class (DiffTree subclass)
514    """
515    if context is None:
516        context = DEFAULT_CONTEXT_AMOUNT
517    if format_cls is None:
518        format_cls = DiffTree
519    with contextlib.ExitStack() as exit_stack:
520        exit_stack.enter_context(old_tree.lock_read())
521        if extra_trees is not None:
522            for tree in extra_trees:
523                exit_stack.enter_context(tree.lock_read())
524        exit_stack.enter_context(new_tree.lock_read())
525        differ = format_cls.from_trees_options(old_tree, new_tree, to_file,
526                                               path_encoding,
527                                               external_diff_options,
528                                               old_label, new_label, using,
529                                               context_lines=context)
530        return differ.show_diff(specific_files, extra_trees)
531
532
533def _patch_header_date(tree, path):
534    """Returns a timestamp suitable for use in a patch header."""
535    try:
536        mtime = tree.get_file_mtime(path)
537    except FileTimestampUnavailable:
538        mtime = 0
539    return timestamp.format_patch_date(mtime)
540
541
542def get_executable_change(old_is_x, new_is_x):
543    descr = {True: b"+x", False: b"-x", None: b"??"}
544    if old_is_x != new_is_x:
545        return [b"%s to %s" % (descr[old_is_x], descr[new_is_x],)]
546    else:
547        return []
548
549
550class DiffPath(object):
551    """Base type for command object that compare files"""
552
553    # The type or contents of the file were unsuitable for diffing
554    CANNOT_DIFF = 'CANNOT_DIFF'
555    # The file has changed in a semantic way
556    CHANGED = 'CHANGED'
557    # The file content may have changed, but there is no semantic change
558    UNCHANGED = 'UNCHANGED'
559
560    def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8'):
561        """Constructor.
562
563        :param old_tree: The tree to show as the old tree in the comparison
564        :param new_tree: The tree to show as new in the comparison
565        :param to_file: The file to write comparison data to
566        :param path_encoding: The character encoding to write paths in
567        """
568        self.old_tree = old_tree
569        self.new_tree = new_tree
570        self.to_file = to_file
571        self.path_encoding = path_encoding
572
573    def finish(self):
574        pass
575
576    @classmethod
577    def from_diff_tree(klass, diff_tree):
578        return klass(diff_tree.old_tree, diff_tree.new_tree,
579                     diff_tree.to_file, diff_tree.path_encoding)
580
581    @staticmethod
582    def _diff_many(differs, old_path, new_path, old_kind, new_kind):
583        for file_differ in differs:
584            result = file_differ.diff(old_path, new_path, old_kind, new_kind)
585            if result is not DiffPath.CANNOT_DIFF:
586                return result
587        else:
588            return DiffPath.CANNOT_DIFF
589
590
591class DiffKindChange(object):
592    """Special differ for file kind changes.
593
594    Represents kind change as deletion + creation.  Uses the other differs
595    to do this.
596    """
597
598    def __init__(self, differs):
599        self.differs = differs
600
601    def finish(self):
602        pass
603
604    @classmethod
605    def from_diff_tree(klass, diff_tree):
606        return klass(diff_tree.differs)
607
608    def diff(self, old_path, new_path, old_kind, new_kind):
609        """Perform comparison
610
611        :param old_path: Path of the file in the old tree
612        :param new_path: Path of the file in the new tree
613        :param old_kind: Old file-kind of the file
614        :param new_kind: New file-kind of the file
615        """
616        if None in (old_kind, new_kind):
617            return DiffPath.CANNOT_DIFF
618        result = DiffPath._diff_many(
619            self.differs, old_path, new_path, old_kind, None)
620        if result is DiffPath.CANNOT_DIFF:
621            return result
622        return DiffPath._diff_many(
623            self.differs, old_path, new_path, None, new_kind)
624
625
626class DiffTreeReference(DiffPath):
627
628    def diff(self, old_path, new_path, old_kind, new_kind):
629        """Perform comparison between two tree references.  (dummy)
630
631        """
632        if 'tree-reference' not in (old_kind, new_kind):
633            return self.CANNOT_DIFF
634        if old_kind not in ('tree-reference', None):
635            return self.CANNOT_DIFF
636        if new_kind not in ('tree-reference', None):
637            return self.CANNOT_DIFF
638        return self.CHANGED
639
640
641class DiffDirectory(DiffPath):
642
643    def diff(self, old_path, new_path, old_kind, new_kind):
644        """Perform comparison between two directories.  (dummy)
645
646        """
647        if 'directory' not in (old_kind, new_kind):
648            return self.CANNOT_DIFF
649        if old_kind not in ('directory', None):
650            return self.CANNOT_DIFF
651        if new_kind not in ('directory', None):
652            return self.CANNOT_DIFF
653        return self.CHANGED
654
655
656class DiffSymlink(DiffPath):
657
658    def diff(self, old_path, new_path, old_kind, new_kind):
659        """Perform comparison between two symlinks
660
661        :param old_path: Path of the file in the old tree
662        :param new_path: Path of the file in the new tree
663        :param old_kind: Old file-kind of the file
664        :param new_kind: New file-kind of the file
665        """
666        if 'symlink' not in (old_kind, new_kind):
667            return self.CANNOT_DIFF
668        if old_kind == 'symlink':
669            old_target = self.old_tree.get_symlink_target(old_path)
670        elif old_kind is None:
671            old_target = None
672        else:
673            return self.CANNOT_DIFF
674        if new_kind == 'symlink':
675            new_target = self.new_tree.get_symlink_target(new_path)
676        elif new_kind is None:
677            new_target = None
678        else:
679            return self.CANNOT_DIFF
680        return self.diff_symlink(old_target, new_target)
681
682    def diff_symlink(self, old_target, new_target):
683        if old_target is None:
684            self.to_file.write(b'=== target is \'%s\'\n' %
685                               new_target.encode(self.path_encoding, 'replace'))
686        elif new_target is None:
687            self.to_file.write(b'=== target was \'%s\'\n' %
688                               old_target.encode(self.path_encoding, 'replace'))
689        else:
690            self.to_file.write(b'=== target changed \'%s\' => \'%s\'\n' %
691                               (old_target.encode(self.path_encoding, 'replace'),
692                                new_target.encode(self.path_encoding, 'replace')))
693        return self.CHANGED
694
695
696class DiffText(DiffPath):
697
698    # GNU Patch uses the epoch date to detect files that are being added
699    # or removed in a diff.
700    EPOCH_DATE = '1970-01-01 00:00:00 +0000'
701
702    def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8',
703                 old_label='', new_label='', text_differ=internal_diff,
704                 context_lines=DEFAULT_CONTEXT_AMOUNT):
705        DiffPath.__init__(self, old_tree, new_tree, to_file, path_encoding)
706        self.text_differ = text_differ
707        self.old_label = old_label
708        self.new_label = new_label
709        self.path_encoding = path_encoding
710        self.context_lines = context_lines
711
712    def diff(self, old_path, new_path, old_kind, new_kind):
713        """Compare two files in unified diff format
714
715        :param old_path: Path of the file in the old tree
716        :param new_path: Path of the file in the new tree
717        :param old_kind: Old file-kind of the file
718        :param new_kind: New file-kind of the file
719        """
720        if 'file' not in (old_kind, new_kind):
721            return self.CANNOT_DIFF
722        if old_kind == 'file':
723            old_date = _patch_header_date(self.old_tree, old_path)
724        elif old_kind is None:
725            old_date = self.EPOCH_DATE
726        else:
727            return self.CANNOT_DIFF
728        if new_kind == 'file':
729            new_date = _patch_header_date(self.new_tree, new_path)
730        elif new_kind is None:
731            new_date = self.EPOCH_DATE
732        else:
733            return self.CANNOT_DIFF
734        from_label = '%s%s\t%s' % (
735            self.old_label, old_path or new_path, old_date)
736        to_label = '%s%s\t%s' % (
737            self.new_label, new_path or old_path, new_date)
738        return self.diff_text(old_path, new_path, from_label, to_label)
739
740    def diff_text(self, from_path, to_path, from_label, to_label):
741        """Diff the content of given files in two trees
742
743        :param from_path: The path in the from tree. If None,
744            the file is not present in the from tree.
745        :param to_path: The path in the to tree. This may refer
746            to a different file from from_path.  If None,
747            the file is not present in the to tree.
748        """
749        def _get_text(tree, path):
750            if path is None:
751                return []
752            try:
753                return tree.get_file_lines(path)
754            except errors.NoSuchFile:
755                return []
756        try:
757            from_text = _get_text(self.old_tree, from_path)
758            to_text = _get_text(self.new_tree, to_path)
759            self.text_differ(from_label, from_text, to_label, to_text,
760                             self.to_file, path_encoding=self.path_encoding,
761                             context_lines=self.context_lines)
762        except errors.BinaryFile:
763            self.to_file.write(
764                ("Binary files %s%s and %s%s differ\n" %
765                 (self.old_label, from_path or to_path,
766                  self.new_label, to_path or from_path)
767                 ).encode(self.path_encoding, 'replace'))
768        return self.CHANGED
769
770
771class DiffFromTool(DiffPath):
772
773    def __init__(self, command_template, old_tree, new_tree, to_file,
774                 path_encoding='utf-8'):
775        DiffPath.__init__(self, old_tree, new_tree, to_file, path_encoding)
776        self.command_template = command_template
777        self._root = osutils.mkdtemp(prefix='brz-diff-')
778
779    @classmethod
780    def from_string(klass, command_template, old_tree, new_tree, to_file,
781                    path_encoding='utf-8'):
782        return klass(command_template, old_tree, new_tree, to_file,
783                     path_encoding)
784
785    @classmethod
786    def make_from_diff_tree(klass, command_string, external_diff_options=None):
787        def from_diff_tree(diff_tree):
788            full_command_string = [command_string]
789            if external_diff_options is not None:
790                full_command_string += ' ' + external_diff_options
791            return klass.from_string(full_command_string, diff_tree.old_tree,
792                                     diff_tree.new_tree, diff_tree.to_file)
793        return from_diff_tree
794
795    def _get_command(self, old_path, new_path):
796        my_map = {'old_path': old_path, 'new_path': new_path}
797        command = [t.format(**my_map) for t in
798                   self.command_template]
799        if command == self.command_template:
800            command += [old_path, new_path]
801        if sys.platform == 'win32':  # Popen doesn't accept unicode on win32
802            command_encoded = []
803            for c in command:
804                if isinstance(c, str):
805                    command_encoded.append(c.encode('mbcs'))
806                else:
807                    command_encoded.append(c)
808            return command_encoded
809        else:
810            return command
811
812    def _execute(self, old_path, new_path):
813        command = self._get_command(old_path, new_path)
814        try:
815            proc = subprocess.Popen(command, stdout=subprocess.PIPE,
816                                    cwd=self._root)
817        except OSError as e:
818            if e.errno == errno.ENOENT:
819                raise errors.ExecutableMissing(command[0])
820            else:
821                raise
822        self.to_file.write(proc.stdout.read())
823        proc.stdout.close()
824        return proc.wait()
825
826    def _try_symlink_root(self, tree, prefix):
827        if (getattr(tree, 'abspath', None) is None or
828                not osutils.host_os_dereferences_symlinks()):
829            return False
830        try:
831            os.symlink(tree.abspath(''), osutils.pathjoin(self._root, prefix))
832        except OSError as e:
833            if e.errno != errno.EEXIST:
834                raise
835        return True
836
837    @staticmethod
838    def _fenc():
839        """Returns safe encoding for passing file path to diff tool"""
840        if sys.platform == 'win32':
841            return 'mbcs'
842        else:
843            # Don't fallback to 'utf-8' because subprocess may not be able to
844            # handle utf-8 correctly when locale is not utf-8.
845            return sys.getfilesystemencoding() or 'ascii'
846
847    def _is_safepath(self, path):
848        """Return true if `path` may be able to pass to subprocess."""
849        fenc = self._fenc()
850        try:
851            return path == path.encode(fenc).decode(fenc)
852        except UnicodeError:
853            return False
854
855    def _safe_filename(self, prefix, relpath):
856        """Replace unsafe character in `relpath` then join `self._root`,
857        `prefix` and `relpath`."""
858        fenc = self._fenc()
859        # encoded_str.replace('?', '_') may break multibyte char.
860        # So we should encode, decode, then replace(u'?', u'_')
861        relpath_tmp = relpath.encode(fenc, 'replace').decode(fenc, 'replace')
862        relpath_tmp = relpath_tmp.replace(u'?', u'_')
863        return osutils.pathjoin(self._root, prefix, relpath_tmp)
864
865    def _write_file(self, relpath, tree, prefix, force_temp=False,
866                    allow_write=False):
867        if not force_temp and isinstance(tree, WorkingTree):
868            full_path = tree.abspath(relpath)
869            if self._is_safepath(full_path):
870                return full_path
871
872        full_path = self._safe_filename(prefix, relpath)
873        if not force_temp and self._try_symlink_root(tree, prefix):
874            return full_path
875        parent_dir = osutils.dirname(full_path)
876        try:
877            os.makedirs(parent_dir)
878        except OSError as e:
879            if e.errno != errno.EEXIST:
880                raise
881        with tree.get_file(relpath) as source, \
882                open(full_path, 'wb') as target:
883            osutils.pumpfile(source, target)
884        try:
885            mtime = tree.get_file_mtime(relpath)
886        except FileTimestampUnavailable:
887            pass
888        else:
889            os.utime(full_path, (mtime, mtime))
890        if not allow_write:
891            osutils.make_readonly(full_path)
892        return full_path
893
894    def _prepare_files(self, old_path, new_path, force_temp=False,
895                       allow_write_new=False):
896        old_disk_path = self._write_file(
897            old_path, self.old_tree, 'old', force_temp)
898        new_disk_path = self._write_file(
899            new_path, self.new_tree, 'new', force_temp,
900            allow_write=allow_write_new)
901        return old_disk_path, new_disk_path
902
903    def finish(self):
904        try:
905            osutils.rmtree(self._root)
906        except OSError as e:
907            if e.errno != errno.ENOENT:
908                mutter("The temporary directory \"%s\" was not "
909                       "cleanly removed: %s." % (self._root, e))
910
911    def diff(self, old_path, new_path, old_kind, new_kind):
912        if (old_kind, new_kind) != ('file', 'file'):
913            return DiffPath.CANNOT_DIFF
914        (old_disk_path, new_disk_path) = self._prepare_files(
915            old_path, new_path)
916        self._execute(old_disk_path, new_disk_path)
917
918    def edit_file(self, old_path, new_path):
919        """Use this tool to edit a file.
920
921        A temporary copy will be edited, and the new contents will be
922        returned.
923
924        :return: The new contents of the file.
925        """
926        old_abs_path, new_abs_path = self._prepare_files(
927            old_path, new_path, allow_write_new=True, force_temp=True)
928        command = self._get_command(old_abs_path, new_abs_path)
929        subprocess.call(command, cwd=self._root)
930        with open(new_abs_path, 'rb') as new_file:
931            return new_file.read()
932
933
934class DiffTree(object):
935    """Provides textual representations of the difference between two trees.
936
937    A DiffTree examines two trees and where a file-id has altered
938    between them, generates a textual representation of the difference.
939    DiffTree uses a sequence of DiffPath objects which are each
940    given the opportunity to handle a given altered fileid. The list
941    of DiffPath objects can be extended globally by appending to
942    DiffTree.diff_factories, or for a specific diff operation by
943    supplying the extra_factories option to the appropriate method.
944    """
945
946    # list of factories that can provide instances of DiffPath objects
947    # may be extended by plugins.
948    diff_factories = [DiffSymlink.from_diff_tree,
949                      DiffDirectory.from_diff_tree,
950                      DiffTreeReference.from_diff_tree]
951
952    def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8',
953                 diff_text=None, extra_factories=None):
954        """Constructor
955
956        :param old_tree: Tree to show as old in the comparison
957        :param new_tree: Tree to show as new in the comparison
958        :param to_file: File to write comparision to
959        :param path_encoding: Character encoding to write paths in
960        :param diff_text: DiffPath-type object to use as a last resort for
961            diffing text files.
962        :param extra_factories: Factories of DiffPaths to try before any other
963            DiffPaths"""
964        if diff_text is None:
965            diff_text = DiffText(old_tree, new_tree, to_file, path_encoding,
966                                 '', '', internal_diff)
967        self.old_tree = old_tree
968        self.new_tree = new_tree
969        self.to_file = to_file
970        self.path_encoding = path_encoding
971        self.differs = []
972        if extra_factories is not None:
973            self.differs.extend(f(self) for f in extra_factories)
974        self.differs.extend(f(self) for f in self.diff_factories)
975        self.differs.extend([diff_text, DiffKindChange.from_diff_tree(self)])
976
977    @classmethod
978    def from_trees_options(klass, old_tree, new_tree, to_file,
979                           path_encoding, external_diff_options, old_label,
980                           new_label, using, context_lines):
981        """Factory for producing a DiffTree.
982
983        Designed to accept options used by show_diff_trees.
984
985        :param old_tree: The tree to show as old in the comparison
986        :param new_tree: The tree to show as new in the comparison
987        :param to_file: File to write comparisons to
988        :param path_encoding: Character encoding to use for writing paths
989        :param external_diff_options: If supplied, use the installed diff
990            binary to perform file comparison, using supplied options.
991        :param old_label: Prefix to use for old file labels
992        :param new_label: Prefix to use for new file labels
993        :param using: Commandline to use to invoke an external diff tool
994        """
995        if using is not None:
996            extra_factories = [DiffFromTool.make_from_diff_tree(
997                using, external_diff_options)]
998        else:
999            extra_factories = []
1000        if external_diff_options:
1001            opts = external_diff_options.split()
1002
1003            def diff_file(olab, olines, nlab, nlines, to_file, path_encoding=None, context_lines=None):
1004                """:param path_encoding: not used but required
1005                        to match the signature of internal_diff.
1006                """
1007                external_diff(olab, olines, nlab, nlines, to_file, opts)
1008        else:
1009            diff_file = internal_diff
1010        diff_text = DiffText(old_tree, new_tree, to_file, path_encoding,
1011                             old_label, new_label, diff_file, context_lines=context_lines)
1012        return klass(old_tree, new_tree, to_file, path_encoding, diff_text,
1013                     extra_factories)
1014
1015    def show_diff(self, specific_files, extra_trees=None):
1016        """Write tree diff to self.to_file
1017
1018        :param specific_files: the specific files to compare (recursive)
1019        :param extra_trees: extra trees to use for mapping paths to file_ids
1020        """
1021        try:
1022            return self._show_diff(specific_files, extra_trees)
1023        finally:
1024            for differ in self.differs:
1025                differ.finish()
1026
1027    def _show_diff(self, specific_files, extra_trees):
1028        # TODO: Generation of pseudo-diffs for added/deleted files could
1029        # be usefully made into a much faster special case.
1030        iterator = self.new_tree.iter_changes(self.old_tree,
1031                                              specific_files=specific_files,
1032                                              extra_trees=extra_trees,
1033                                              require_versioned=True)
1034        has_changes = 0
1035
1036        def changes_key(change):
1037            old_path, new_path = change.path
1038            path = new_path
1039            if path is None:
1040                path = old_path
1041            return path
1042
1043        def get_encoded_path(path):
1044            if path is not None:
1045                return path.encode(self.path_encoding, "replace")
1046        for change in sorted(iterator, key=changes_key):
1047            # The root does not get diffed, and items with no known kind (that
1048            # is, missing) in both trees are skipped as well.
1049            if change.parent_id == (None, None) or change.kind == (None, None):
1050                continue
1051            if change.kind[0] == 'symlink' and not self.new_tree.supports_symlinks():
1052                warning(
1053                    'Ignoring "%s" as symlinks are not '
1054                    'supported on this filesystem.' % (change.path[0],))
1055                continue
1056            oldpath, newpath = change.path
1057            oldpath_encoded = get_encoded_path(oldpath)
1058            newpath_encoded = get_encoded_path(newpath)
1059            old_present = (change.kind[0] is not None and change.versioned[0])
1060            new_present = (change.kind[1] is not None and change.versioned[1])
1061            executable = change.executable
1062            kind = change.kind
1063            renamed = (change.parent_id[0], change.name[0]) != (change.parent_id[1], change.name[1])
1064
1065            properties_changed = []
1066            properties_changed.extend(
1067                get_executable_change(executable[0], executable[1]))
1068
1069            if properties_changed:
1070                prop_str = b" (properties changed: %s)" % (
1071                    b", ".join(properties_changed),)
1072            else:
1073                prop_str = b""
1074
1075            if (old_present, new_present) == (True, False):
1076                self.to_file.write(b"=== removed %s '%s'\n" %
1077                                   (kind[0].encode('ascii'), oldpath_encoded))
1078            elif (old_present, new_present) == (False, True):
1079                self.to_file.write(b"=== added %s '%s'\n" %
1080                                   (kind[1].encode('ascii'), newpath_encoded))
1081            elif renamed:
1082                self.to_file.write(b"=== renamed %s '%s' => '%s'%s\n" %
1083                                   (kind[0].encode('ascii'), oldpath_encoded, newpath_encoded, prop_str))
1084            else:
1085                # if it was produced by iter_changes, it must be
1086                # modified *somehow*, either content or execute bit.
1087                self.to_file.write(b"=== modified %s '%s'%s\n" % (kind[0].encode('ascii'),
1088                                                                  newpath_encoded, prop_str))
1089            if change.changed_content:
1090                self._diff(oldpath, newpath, kind[0], kind[1])
1091                has_changes = 1
1092            if renamed:
1093                has_changes = 1
1094        return has_changes
1095
1096    def diff(self, old_path, new_path):
1097        """Perform a diff of a single file
1098
1099        :param old_path: The path of the file in the old tree
1100        :param new_path: The path of the file in the new tree
1101        """
1102        if old_path is None:
1103            old_kind = None
1104        else:
1105            old_kind = self.old_tree.kind(old_path)
1106        if new_path is None:
1107            new_kind = None
1108        else:
1109            new_kind = self.new_tree.kind(new_path)
1110        self._diff(old_path, new_path, old_kind, new_kind)
1111
1112    def _diff(self, old_path, new_path, old_kind, new_kind):
1113        result = DiffPath._diff_many(
1114            self.differs, old_path, new_path, old_kind, new_kind)
1115        if result is DiffPath.CANNOT_DIFF:
1116            error_path = new_path
1117            if error_path is None:
1118                error_path = old_path
1119            raise errors.NoDiffFound(error_path)
1120
1121
1122format_registry = Registry()
1123format_registry.register('default', DiffTree)
1124