1# Copyright (C) 2005-2014 Canonical Ltd. 2# 3# This program is free software; you can redistribute it and/or modify 4# it under the terms of the GNU General Public License as published by 5# the Free Software Foundation; either version 2 of the License, or 6# (at your option) any later version. 7# 8# This program is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11# GNU General Public License for more details. 12# 13# You should have received a copy of the GNU General Public License 14# along with this program; if not, write to the Free Software 15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16 17import contextlib 18import difflib 19import os 20import re 21import sys 22 23from .lazy_import import lazy_import 24lazy_import(globals(), """ 25import errno 26import patiencediff 27import subprocess 28import tempfile 29 30from breezy import ( 31 controldir, 32 osutils, 33 textfile, 34 timestamp, 35 views, 36 ) 37 38from breezy.workingtree import WorkingTree 39from breezy.i18n import gettext 40""") 41 42from . import ( 43 errors, 44 ) 45from .registry import ( 46 Registry, 47 ) 48from .trace import mutter, note, warning 49from .tree import FileTimestampUnavailable 50 51 52DEFAULT_CONTEXT_AMOUNT = 3 53 54 55# TODO: Rather than building a changeset object, we should probably 56# invoke callbacks on an object. That object can either accumulate a 57# list, write them out directly, etc etc. 58 59 60class _PrematchedMatcher(difflib.SequenceMatcher): 61 """Allow SequenceMatcher operations to use predetermined blocks""" 62 63 def __init__(self, matching_blocks): 64 difflib.SequenceMatcher(self, None, None) 65 self.matching_blocks = matching_blocks 66 self.opcodes = None 67 68 69def internal_diff(old_label, oldlines, new_label, newlines, to_file, 70 allow_binary=False, sequence_matcher=None, 71 path_encoding='utf8', context_lines=DEFAULT_CONTEXT_AMOUNT): 72 # FIXME: difflib is wrong if there is no trailing newline. 73 # The syntax used by patch seems to be "\ No newline at 74 # end of file" following the last diff line from that 75 # file. This is not trivial to insert into the 76 # unified_diff output and it might be better to just fix 77 # or replace that function. 78 79 # In the meantime we at least make sure the patch isn't 80 # mangled. 81 82 if allow_binary is False: 83 textfile.check_text_lines(oldlines) 84 textfile.check_text_lines(newlines) 85 86 if sequence_matcher is None: 87 sequence_matcher = patiencediff.PatienceSequenceMatcher 88 ud = unified_diff_bytes( 89 oldlines, newlines, 90 fromfile=old_label.encode(path_encoding, 'replace'), 91 tofile=new_label.encode(path_encoding, 'replace'), 92 n=context_lines, sequencematcher=sequence_matcher) 93 94 ud = list(ud) 95 if len(ud) == 0: # Identical contents, nothing to do 96 return 97 # work-around for difflib being too smart for its own good 98 # if /dev/null is "1,0", patch won't recognize it as /dev/null 99 if not oldlines: 100 ud[2] = ud[2].replace(b'-1,0', b'-0,0') 101 elif not newlines: 102 ud[2] = ud[2].replace(b'+1,0', b'+0,0') 103 104 for line in ud: 105 to_file.write(line) 106 if not line.endswith(b'\n'): 107 to_file.write(b"\n\\ No newline at end of file\n") 108 to_file.write(b'\n') 109 110 111def unified_diff_bytes(a, b, fromfile=b'', tofile=b'', fromfiledate=b'', 112 tofiledate=b'', n=3, lineterm=b'\n', sequencematcher=None): 113 r""" 114 Compare two sequences of lines; generate the delta as a unified diff. 115 116 Unified diffs are a compact way of showing line changes and a few 117 lines of context. The number of context lines is set by 'n' which 118 defaults to three. 119 120 By default, the diff control lines (those with ---, +++, or @@) are 121 created with a trailing newline. This is helpful so that inputs 122 created from file.readlines() result in diffs that are suitable for 123 file.writelines() since both the inputs and outputs have trailing 124 newlines. 125 126 For inputs that do not have trailing newlines, set the lineterm 127 argument to "" so that the output will be uniformly newline free. 128 129 The unidiff format normally has a header for filenames and modification 130 times. Any or all of these may be specified using strings for 131 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification 132 times are normally expressed in the format returned by time.ctime(). 133 134 Example: 135 136 >>> for line in bytes_unified_diff(b'one two three four'.split(), 137 ... b'zero one tree four'.split(), b'Original', b'Current', 138 ... b'Sat Jan 26 23:30:50 1991', b'Fri Jun 06 10:20:52 2003', 139 ... lineterm=b''): 140 ... print line 141 --- Original Sat Jan 26 23:30:50 1991 142 +++ Current Fri Jun 06 10:20:52 2003 143 @@ -1,4 +1,4 @@ 144 +zero 145 one 146 -two 147 -three 148 +tree 149 four 150 """ 151 if sequencematcher is None: 152 sequencematcher = difflib.SequenceMatcher 153 154 if fromfiledate: 155 fromfiledate = b'\t' + bytes(fromfiledate) 156 if tofiledate: 157 tofiledate = b'\t' + bytes(tofiledate) 158 159 started = False 160 for group in sequencematcher(None, a, b).get_grouped_opcodes(n): 161 if not started: 162 yield b'--- %s%s%s' % (fromfile, fromfiledate, lineterm) 163 yield b'+++ %s%s%s' % (tofile, tofiledate, lineterm) 164 started = True 165 i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4] 166 yield b"@@ -%d,%d +%d,%d @@%s" % (i1 + 1, i2 - i1, j1 + 1, j2 - j1, lineterm) 167 for tag, i1, i2, j1, j2 in group: 168 if tag == 'equal': 169 for line in a[i1:i2]: 170 yield b' ' + line 171 continue 172 if tag == 'replace' or tag == 'delete': 173 for line in a[i1:i2]: 174 yield b'-' + line 175 if tag == 'replace' or tag == 'insert': 176 for line in b[j1:j2]: 177 yield b'+' + line 178 179 180def _spawn_external_diff(diffcmd, capture_errors=True): 181 """Spawn the external diff process, and return the child handle. 182 183 :param diffcmd: The command list to spawn 184 :param capture_errors: Capture stderr as well as setting LANG=C 185 and LC_ALL=C. This lets us read and understand the output of diff, 186 and respond to any errors. 187 :return: A Popen object. 188 """ 189 if capture_errors: 190 # construct minimal environment 191 env = {} 192 path = os.environ.get('PATH') 193 if path is not None: 194 env['PATH'] = path 195 env['LANGUAGE'] = 'C' # on win32 only LANGUAGE has effect 196 env['LANG'] = 'C' 197 env['LC_ALL'] = 'C' 198 stderr = subprocess.PIPE 199 else: 200 env = None 201 stderr = None 202 203 try: 204 pipe = subprocess.Popen(diffcmd, 205 stdin=subprocess.PIPE, 206 stdout=subprocess.PIPE, 207 stderr=stderr, 208 env=env) 209 except OSError as e: 210 if e.errno == errno.ENOENT: 211 raise errors.NoDiff(str(e)) 212 raise 213 214 return pipe 215 216 217# diff style options as of GNU diff v3.2 218style_option_list = ['-c', '-C', '--context', 219 '-e', '--ed', 220 '-f', '--forward-ed', 221 '-q', '--brief', 222 '--normal', 223 '-n', '--rcs', 224 '-u', '-U', '--unified', 225 '-y', '--side-by-side', 226 '-D', '--ifdef'] 227 228 229def default_style_unified(diff_opts): 230 """Default to unified diff style if alternative not specified in diff_opts. 231 232 diff only allows one style to be specified; they don't override. 233 Note that some of these take optargs, and the optargs can be 234 directly appended to the options. 235 This is only an approximate parser; it doesn't properly understand 236 the grammar. 237 238 :param diff_opts: List of options for external (GNU) diff. 239 :return: List of options with default style=='unified'. 240 """ 241 for s in style_option_list: 242 for j in diff_opts: 243 if j.startswith(s): 244 break 245 else: 246 continue 247 break 248 else: 249 diff_opts.append('-u') 250 return diff_opts 251 252 253def external_diff(old_label, oldlines, new_label, newlines, to_file, 254 diff_opts): 255 """Display a diff by calling out to the external diff program.""" 256 # make sure our own output is properly ordered before the diff 257 to_file.flush() 258 259 oldtmp_fd, old_abspath = tempfile.mkstemp(prefix='brz-diff-old-') 260 newtmp_fd, new_abspath = tempfile.mkstemp(prefix='brz-diff-new-') 261 oldtmpf = os.fdopen(oldtmp_fd, 'wb') 262 newtmpf = os.fdopen(newtmp_fd, 'wb') 263 264 try: 265 # TODO: perhaps a special case for comparing to or from the empty 266 # sequence; can just use /dev/null on Unix 267 268 # TODO: if either of the files being compared already exists as a 269 # regular named file (e.g. in the working directory) then we can 270 # compare directly to that, rather than copying it. 271 272 oldtmpf.writelines(oldlines) 273 newtmpf.writelines(newlines) 274 275 oldtmpf.close() 276 newtmpf.close() 277 278 if not diff_opts: 279 diff_opts = [] 280 if sys.platform == 'win32': 281 # Popen doesn't do the proper encoding for external commands 282 # Since we are dealing with an ANSI api, use mbcs encoding 283 old_label = old_label.encode('mbcs') 284 new_label = new_label.encode('mbcs') 285 diffcmd = ['diff', 286 '--label', old_label, 287 old_abspath, 288 '--label', new_label, 289 new_abspath, 290 '--binary', 291 ] 292 293 diff_opts = default_style_unified(diff_opts) 294 295 if diff_opts: 296 diffcmd.extend(diff_opts) 297 298 pipe = _spawn_external_diff(diffcmd, capture_errors=True) 299 out, err = pipe.communicate() 300 rc = pipe.returncode 301 302 # internal_diff() adds a trailing newline, add one here for consistency 303 out += b'\n' 304 if rc == 2: 305 # 'diff' gives retcode == 2 for all sorts of errors 306 # one of those is 'Binary files differ'. 307 # Bad options could also be the problem. 308 # 'Binary files' is not a real error, so we suppress that error. 309 lang_c_out = out 310 311 # Since we got here, we want to make sure to give an i18n error 312 pipe = _spawn_external_diff(diffcmd, capture_errors=False) 313 out, err = pipe.communicate() 314 315 # Write out the new i18n diff response 316 to_file.write(out + b'\n') 317 if pipe.returncode != 2: 318 raise errors.BzrError( 319 'external diff failed with exit code 2' 320 ' when run with LANG=C and LC_ALL=C,' 321 ' but not when run natively: %r' % (diffcmd,)) 322 323 first_line = lang_c_out.split(b'\n', 1)[0] 324 # Starting with diffutils 2.8.4 the word "binary" was dropped. 325 m = re.match(b'^(binary )?files.*differ$', first_line, re.I) 326 if m is None: 327 raise errors.BzrError('external diff failed with exit code 2;' 328 ' command: %r' % (diffcmd,)) 329 else: 330 # Binary files differ, just return 331 return 332 333 # If we got to here, we haven't written out the output of diff 334 # do so now 335 to_file.write(out) 336 if rc not in (0, 1): 337 # returns 1 if files differ; that's OK 338 if rc < 0: 339 msg = 'signal %d' % (-rc) 340 else: 341 msg = 'exit code %d' % rc 342 343 raise errors.BzrError('external diff failed with %s; command: %r' 344 % (msg, diffcmd)) 345 346 finally: 347 oldtmpf.close() # and delete 348 newtmpf.close() 349 350 def cleanup(path): 351 # Warn in case the file couldn't be deleted (in case windows still 352 # holds the file open, but not if the files have already been 353 # deleted) 354 try: 355 os.remove(path) 356 except OSError as e: 357 if e.errno not in (errno.ENOENT,): 358 warning('Failed to delete temporary file: %s %s', path, e) 359 360 cleanup(old_abspath) 361 cleanup(new_abspath) 362 363 364def get_trees_and_branches_to_diff_locked( 365 path_list, revision_specs, old_url, new_url, exit_stack, apply_view=True): 366 """Get the trees and specific files to diff given a list of paths. 367 368 This method works out the trees to be diff'ed and the files of 369 interest within those trees. 370 371 :param path_list: 372 the list of arguments passed to the diff command 373 :param revision_specs: 374 Zero, one or two RevisionSpecs from the diff command line, 375 saying what revisions to compare. 376 :param old_url: 377 The url of the old branch or tree. If None, the tree to use is 378 taken from the first path, if any, or the current working tree. 379 :param new_url: 380 The url of the new branch or tree. If None, the tree to use is 381 taken from the first path, if any, or the current working tree. 382 :param exit_stack: 383 an ExitStack object. get_trees_and_branches_to_diff 384 will register cleanups that must be run to unlock the trees, etc. 385 :param apply_view: 386 if True and a view is set, apply the view or check that the paths 387 are within it 388 :returns: 389 a tuple of (old_tree, new_tree, old_branch, new_branch, 390 specific_files, extra_trees) where extra_trees is a sequence of 391 additional trees to search in for file-ids. The trees and branches 392 will be read-locked until the cleanups registered via the exit_stack 393 param are run. 394 """ 395 # Get the old and new revision specs 396 old_revision_spec = None 397 new_revision_spec = None 398 if revision_specs is not None: 399 if len(revision_specs) > 0: 400 old_revision_spec = revision_specs[0] 401 if old_url is None: 402 old_url = old_revision_spec.get_branch() 403 if len(revision_specs) > 1: 404 new_revision_spec = revision_specs[1] 405 if new_url is None: 406 new_url = new_revision_spec.get_branch() 407 408 other_paths = [] 409 make_paths_wt_relative = True 410 consider_relpath = True 411 if path_list is None or len(path_list) == 0: 412 # If no path is given, the current working tree is used 413 default_location = u'.' 414 consider_relpath = False 415 elif old_url is not None and new_url is not None: 416 other_paths = path_list 417 make_paths_wt_relative = False 418 else: 419 default_location = path_list[0] 420 other_paths = path_list[1:] 421 422 def lock_tree_or_branch(wt, br): 423 if wt is not None: 424 exit_stack.enter_context(wt.lock_read()) 425 elif br is not None: 426 exit_stack.enter_context(br.lock_read()) 427 428 # Get the old location 429 specific_files = [] 430 if old_url is None: 431 old_url = default_location 432 working_tree, branch, relpath = \ 433 controldir.ControlDir.open_containing_tree_or_branch(old_url) 434 lock_tree_or_branch(working_tree, branch) 435 if consider_relpath and relpath != '': 436 if working_tree is not None and apply_view: 437 views.check_path_in_view(working_tree, relpath) 438 specific_files.append(relpath) 439 old_tree = _get_tree_to_diff(old_revision_spec, working_tree, branch) 440 old_branch = branch 441 442 # Get the new location 443 if new_url is None: 444 new_url = default_location 445 if new_url != old_url: 446 working_tree, branch, relpath = \ 447 controldir.ControlDir.open_containing_tree_or_branch(new_url) 448 lock_tree_or_branch(working_tree, branch) 449 if consider_relpath and relpath != '': 450 if working_tree is not None and apply_view: 451 views.check_path_in_view(working_tree, relpath) 452 specific_files.append(relpath) 453 new_tree = _get_tree_to_diff(new_revision_spec, working_tree, branch, 454 basis_is_default=working_tree is None) 455 new_branch = branch 456 457 # Get the specific files (all files is None, no files is []) 458 if make_paths_wt_relative and working_tree is not None: 459 other_paths = working_tree.safe_relpath_files( 460 other_paths, 461 apply_view=apply_view) 462 specific_files.extend(other_paths) 463 if len(specific_files) == 0: 464 specific_files = None 465 if (working_tree is not None and working_tree.supports_views() and 466 apply_view): 467 view_files = working_tree.views.lookup_view() 468 if view_files: 469 specific_files = view_files 470 view_str = views.view_display_str(view_files) 471 note(gettext("*** Ignoring files outside view. View is %s") % view_str) 472 473 # Get extra trees that ought to be searched for file-ids 474 extra_trees = None 475 if working_tree is not None and working_tree not in (old_tree, new_tree): 476 extra_trees = (working_tree,) 477 return (old_tree, new_tree, old_branch, new_branch, 478 specific_files, extra_trees) 479 480 481def _get_tree_to_diff(spec, tree=None, branch=None, basis_is_default=True): 482 if branch is None and tree is not None: 483 branch = tree.branch 484 if spec is None or spec.spec is None: 485 if basis_is_default: 486 if tree is not None: 487 return tree.basis_tree() 488 else: 489 return branch.basis_tree() 490 else: 491 return tree 492 return spec.as_tree(branch) 493 494 495def show_diff_trees(old_tree, new_tree, to_file, specific_files=None, 496 external_diff_options=None, 497 old_label='a/', new_label='b/', 498 extra_trees=None, 499 path_encoding='utf8', 500 using=None, 501 format_cls=None, 502 context=DEFAULT_CONTEXT_AMOUNT): 503 """Show in text form the changes from one tree to another. 504 505 :param to_file: The output stream. 506 :param specific_files: Include only changes to these files - None for all 507 changes. 508 :param external_diff_options: If set, use an external GNU diff and pass 509 these options. 510 :param extra_trees: If set, more Trees to use for looking up file ids 511 :param path_encoding: If set, the path will be encoded as specified, 512 otherwise is supposed to be utf8 513 :param format_cls: Formatter class (DiffTree subclass) 514 """ 515 if context is None: 516 context = DEFAULT_CONTEXT_AMOUNT 517 if format_cls is None: 518 format_cls = DiffTree 519 with contextlib.ExitStack() as exit_stack: 520 exit_stack.enter_context(old_tree.lock_read()) 521 if extra_trees is not None: 522 for tree in extra_trees: 523 exit_stack.enter_context(tree.lock_read()) 524 exit_stack.enter_context(new_tree.lock_read()) 525 differ = format_cls.from_trees_options(old_tree, new_tree, to_file, 526 path_encoding, 527 external_diff_options, 528 old_label, new_label, using, 529 context_lines=context) 530 return differ.show_diff(specific_files, extra_trees) 531 532 533def _patch_header_date(tree, path): 534 """Returns a timestamp suitable for use in a patch header.""" 535 try: 536 mtime = tree.get_file_mtime(path) 537 except FileTimestampUnavailable: 538 mtime = 0 539 return timestamp.format_patch_date(mtime) 540 541 542def get_executable_change(old_is_x, new_is_x): 543 descr = {True: b"+x", False: b"-x", None: b"??"} 544 if old_is_x != new_is_x: 545 return [b"%s to %s" % (descr[old_is_x], descr[new_is_x],)] 546 else: 547 return [] 548 549 550class DiffPath(object): 551 """Base type for command object that compare files""" 552 553 # The type or contents of the file were unsuitable for diffing 554 CANNOT_DIFF = 'CANNOT_DIFF' 555 # The file has changed in a semantic way 556 CHANGED = 'CHANGED' 557 # The file content may have changed, but there is no semantic change 558 UNCHANGED = 'UNCHANGED' 559 560 def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8'): 561 """Constructor. 562 563 :param old_tree: The tree to show as the old tree in the comparison 564 :param new_tree: The tree to show as new in the comparison 565 :param to_file: The file to write comparison data to 566 :param path_encoding: The character encoding to write paths in 567 """ 568 self.old_tree = old_tree 569 self.new_tree = new_tree 570 self.to_file = to_file 571 self.path_encoding = path_encoding 572 573 def finish(self): 574 pass 575 576 @classmethod 577 def from_diff_tree(klass, diff_tree): 578 return klass(diff_tree.old_tree, diff_tree.new_tree, 579 diff_tree.to_file, diff_tree.path_encoding) 580 581 @staticmethod 582 def _diff_many(differs, old_path, new_path, old_kind, new_kind): 583 for file_differ in differs: 584 result = file_differ.diff(old_path, new_path, old_kind, new_kind) 585 if result is not DiffPath.CANNOT_DIFF: 586 return result 587 else: 588 return DiffPath.CANNOT_DIFF 589 590 591class DiffKindChange(object): 592 """Special differ for file kind changes. 593 594 Represents kind change as deletion + creation. Uses the other differs 595 to do this. 596 """ 597 598 def __init__(self, differs): 599 self.differs = differs 600 601 def finish(self): 602 pass 603 604 @classmethod 605 def from_diff_tree(klass, diff_tree): 606 return klass(diff_tree.differs) 607 608 def diff(self, old_path, new_path, old_kind, new_kind): 609 """Perform comparison 610 611 :param old_path: Path of the file in the old tree 612 :param new_path: Path of the file in the new tree 613 :param old_kind: Old file-kind of the file 614 :param new_kind: New file-kind of the file 615 """ 616 if None in (old_kind, new_kind): 617 return DiffPath.CANNOT_DIFF 618 result = DiffPath._diff_many( 619 self.differs, old_path, new_path, old_kind, None) 620 if result is DiffPath.CANNOT_DIFF: 621 return result 622 return DiffPath._diff_many( 623 self.differs, old_path, new_path, None, new_kind) 624 625 626class DiffTreeReference(DiffPath): 627 628 def diff(self, old_path, new_path, old_kind, new_kind): 629 """Perform comparison between two tree references. (dummy) 630 631 """ 632 if 'tree-reference' not in (old_kind, new_kind): 633 return self.CANNOT_DIFF 634 if old_kind not in ('tree-reference', None): 635 return self.CANNOT_DIFF 636 if new_kind not in ('tree-reference', None): 637 return self.CANNOT_DIFF 638 return self.CHANGED 639 640 641class DiffDirectory(DiffPath): 642 643 def diff(self, old_path, new_path, old_kind, new_kind): 644 """Perform comparison between two directories. (dummy) 645 646 """ 647 if 'directory' not in (old_kind, new_kind): 648 return self.CANNOT_DIFF 649 if old_kind not in ('directory', None): 650 return self.CANNOT_DIFF 651 if new_kind not in ('directory', None): 652 return self.CANNOT_DIFF 653 return self.CHANGED 654 655 656class DiffSymlink(DiffPath): 657 658 def diff(self, old_path, new_path, old_kind, new_kind): 659 """Perform comparison between two symlinks 660 661 :param old_path: Path of the file in the old tree 662 :param new_path: Path of the file in the new tree 663 :param old_kind: Old file-kind of the file 664 :param new_kind: New file-kind of the file 665 """ 666 if 'symlink' not in (old_kind, new_kind): 667 return self.CANNOT_DIFF 668 if old_kind == 'symlink': 669 old_target = self.old_tree.get_symlink_target(old_path) 670 elif old_kind is None: 671 old_target = None 672 else: 673 return self.CANNOT_DIFF 674 if new_kind == 'symlink': 675 new_target = self.new_tree.get_symlink_target(new_path) 676 elif new_kind is None: 677 new_target = None 678 else: 679 return self.CANNOT_DIFF 680 return self.diff_symlink(old_target, new_target) 681 682 def diff_symlink(self, old_target, new_target): 683 if old_target is None: 684 self.to_file.write(b'=== target is \'%s\'\n' % 685 new_target.encode(self.path_encoding, 'replace')) 686 elif new_target is None: 687 self.to_file.write(b'=== target was \'%s\'\n' % 688 old_target.encode(self.path_encoding, 'replace')) 689 else: 690 self.to_file.write(b'=== target changed \'%s\' => \'%s\'\n' % 691 (old_target.encode(self.path_encoding, 'replace'), 692 new_target.encode(self.path_encoding, 'replace'))) 693 return self.CHANGED 694 695 696class DiffText(DiffPath): 697 698 # GNU Patch uses the epoch date to detect files that are being added 699 # or removed in a diff. 700 EPOCH_DATE = '1970-01-01 00:00:00 +0000' 701 702 def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8', 703 old_label='', new_label='', text_differ=internal_diff, 704 context_lines=DEFAULT_CONTEXT_AMOUNT): 705 DiffPath.__init__(self, old_tree, new_tree, to_file, path_encoding) 706 self.text_differ = text_differ 707 self.old_label = old_label 708 self.new_label = new_label 709 self.path_encoding = path_encoding 710 self.context_lines = context_lines 711 712 def diff(self, old_path, new_path, old_kind, new_kind): 713 """Compare two files in unified diff format 714 715 :param old_path: Path of the file in the old tree 716 :param new_path: Path of the file in the new tree 717 :param old_kind: Old file-kind of the file 718 :param new_kind: New file-kind of the file 719 """ 720 if 'file' not in (old_kind, new_kind): 721 return self.CANNOT_DIFF 722 if old_kind == 'file': 723 old_date = _patch_header_date(self.old_tree, old_path) 724 elif old_kind is None: 725 old_date = self.EPOCH_DATE 726 else: 727 return self.CANNOT_DIFF 728 if new_kind == 'file': 729 new_date = _patch_header_date(self.new_tree, new_path) 730 elif new_kind is None: 731 new_date = self.EPOCH_DATE 732 else: 733 return self.CANNOT_DIFF 734 from_label = '%s%s\t%s' % ( 735 self.old_label, old_path or new_path, old_date) 736 to_label = '%s%s\t%s' % ( 737 self.new_label, new_path or old_path, new_date) 738 return self.diff_text(old_path, new_path, from_label, to_label) 739 740 def diff_text(self, from_path, to_path, from_label, to_label): 741 """Diff the content of given files in two trees 742 743 :param from_path: The path in the from tree. If None, 744 the file is not present in the from tree. 745 :param to_path: The path in the to tree. This may refer 746 to a different file from from_path. If None, 747 the file is not present in the to tree. 748 """ 749 def _get_text(tree, path): 750 if path is None: 751 return [] 752 try: 753 return tree.get_file_lines(path) 754 except errors.NoSuchFile: 755 return [] 756 try: 757 from_text = _get_text(self.old_tree, from_path) 758 to_text = _get_text(self.new_tree, to_path) 759 self.text_differ(from_label, from_text, to_label, to_text, 760 self.to_file, path_encoding=self.path_encoding, 761 context_lines=self.context_lines) 762 except errors.BinaryFile: 763 self.to_file.write( 764 ("Binary files %s%s and %s%s differ\n" % 765 (self.old_label, from_path or to_path, 766 self.new_label, to_path or from_path) 767 ).encode(self.path_encoding, 'replace')) 768 return self.CHANGED 769 770 771class DiffFromTool(DiffPath): 772 773 def __init__(self, command_template, old_tree, new_tree, to_file, 774 path_encoding='utf-8'): 775 DiffPath.__init__(self, old_tree, new_tree, to_file, path_encoding) 776 self.command_template = command_template 777 self._root = osutils.mkdtemp(prefix='brz-diff-') 778 779 @classmethod 780 def from_string(klass, command_template, old_tree, new_tree, to_file, 781 path_encoding='utf-8'): 782 return klass(command_template, old_tree, new_tree, to_file, 783 path_encoding) 784 785 @classmethod 786 def make_from_diff_tree(klass, command_string, external_diff_options=None): 787 def from_diff_tree(diff_tree): 788 full_command_string = [command_string] 789 if external_diff_options is not None: 790 full_command_string += ' ' + external_diff_options 791 return klass.from_string(full_command_string, diff_tree.old_tree, 792 diff_tree.new_tree, diff_tree.to_file) 793 return from_diff_tree 794 795 def _get_command(self, old_path, new_path): 796 my_map = {'old_path': old_path, 'new_path': new_path} 797 command = [t.format(**my_map) for t in 798 self.command_template] 799 if command == self.command_template: 800 command += [old_path, new_path] 801 if sys.platform == 'win32': # Popen doesn't accept unicode on win32 802 command_encoded = [] 803 for c in command: 804 if isinstance(c, str): 805 command_encoded.append(c.encode('mbcs')) 806 else: 807 command_encoded.append(c) 808 return command_encoded 809 else: 810 return command 811 812 def _execute(self, old_path, new_path): 813 command = self._get_command(old_path, new_path) 814 try: 815 proc = subprocess.Popen(command, stdout=subprocess.PIPE, 816 cwd=self._root) 817 except OSError as e: 818 if e.errno == errno.ENOENT: 819 raise errors.ExecutableMissing(command[0]) 820 else: 821 raise 822 self.to_file.write(proc.stdout.read()) 823 proc.stdout.close() 824 return proc.wait() 825 826 def _try_symlink_root(self, tree, prefix): 827 if (getattr(tree, 'abspath', None) is None or 828 not osutils.host_os_dereferences_symlinks()): 829 return False 830 try: 831 os.symlink(tree.abspath(''), osutils.pathjoin(self._root, prefix)) 832 except OSError as e: 833 if e.errno != errno.EEXIST: 834 raise 835 return True 836 837 @staticmethod 838 def _fenc(): 839 """Returns safe encoding for passing file path to diff tool""" 840 if sys.platform == 'win32': 841 return 'mbcs' 842 else: 843 # Don't fallback to 'utf-8' because subprocess may not be able to 844 # handle utf-8 correctly when locale is not utf-8. 845 return sys.getfilesystemencoding() or 'ascii' 846 847 def _is_safepath(self, path): 848 """Return true if `path` may be able to pass to subprocess.""" 849 fenc = self._fenc() 850 try: 851 return path == path.encode(fenc).decode(fenc) 852 except UnicodeError: 853 return False 854 855 def _safe_filename(self, prefix, relpath): 856 """Replace unsafe character in `relpath` then join `self._root`, 857 `prefix` and `relpath`.""" 858 fenc = self._fenc() 859 # encoded_str.replace('?', '_') may break multibyte char. 860 # So we should encode, decode, then replace(u'?', u'_') 861 relpath_tmp = relpath.encode(fenc, 'replace').decode(fenc, 'replace') 862 relpath_tmp = relpath_tmp.replace(u'?', u'_') 863 return osutils.pathjoin(self._root, prefix, relpath_tmp) 864 865 def _write_file(self, relpath, tree, prefix, force_temp=False, 866 allow_write=False): 867 if not force_temp and isinstance(tree, WorkingTree): 868 full_path = tree.abspath(relpath) 869 if self._is_safepath(full_path): 870 return full_path 871 872 full_path = self._safe_filename(prefix, relpath) 873 if not force_temp and self._try_symlink_root(tree, prefix): 874 return full_path 875 parent_dir = osutils.dirname(full_path) 876 try: 877 os.makedirs(parent_dir) 878 except OSError as e: 879 if e.errno != errno.EEXIST: 880 raise 881 with tree.get_file(relpath) as source, \ 882 open(full_path, 'wb') as target: 883 osutils.pumpfile(source, target) 884 try: 885 mtime = tree.get_file_mtime(relpath) 886 except FileTimestampUnavailable: 887 pass 888 else: 889 os.utime(full_path, (mtime, mtime)) 890 if not allow_write: 891 osutils.make_readonly(full_path) 892 return full_path 893 894 def _prepare_files(self, old_path, new_path, force_temp=False, 895 allow_write_new=False): 896 old_disk_path = self._write_file( 897 old_path, self.old_tree, 'old', force_temp) 898 new_disk_path = self._write_file( 899 new_path, self.new_tree, 'new', force_temp, 900 allow_write=allow_write_new) 901 return old_disk_path, new_disk_path 902 903 def finish(self): 904 try: 905 osutils.rmtree(self._root) 906 except OSError as e: 907 if e.errno != errno.ENOENT: 908 mutter("The temporary directory \"%s\" was not " 909 "cleanly removed: %s." % (self._root, e)) 910 911 def diff(self, old_path, new_path, old_kind, new_kind): 912 if (old_kind, new_kind) != ('file', 'file'): 913 return DiffPath.CANNOT_DIFF 914 (old_disk_path, new_disk_path) = self._prepare_files( 915 old_path, new_path) 916 self._execute(old_disk_path, new_disk_path) 917 918 def edit_file(self, old_path, new_path): 919 """Use this tool to edit a file. 920 921 A temporary copy will be edited, and the new contents will be 922 returned. 923 924 :return: The new contents of the file. 925 """ 926 old_abs_path, new_abs_path = self._prepare_files( 927 old_path, new_path, allow_write_new=True, force_temp=True) 928 command = self._get_command(old_abs_path, new_abs_path) 929 subprocess.call(command, cwd=self._root) 930 with open(new_abs_path, 'rb') as new_file: 931 return new_file.read() 932 933 934class DiffTree(object): 935 """Provides textual representations of the difference between two trees. 936 937 A DiffTree examines two trees and where a file-id has altered 938 between them, generates a textual representation of the difference. 939 DiffTree uses a sequence of DiffPath objects which are each 940 given the opportunity to handle a given altered fileid. The list 941 of DiffPath objects can be extended globally by appending to 942 DiffTree.diff_factories, or for a specific diff operation by 943 supplying the extra_factories option to the appropriate method. 944 """ 945 946 # list of factories that can provide instances of DiffPath objects 947 # may be extended by plugins. 948 diff_factories = [DiffSymlink.from_diff_tree, 949 DiffDirectory.from_diff_tree, 950 DiffTreeReference.from_diff_tree] 951 952 def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8', 953 diff_text=None, extra_factories=None): 954 """Constructor 955 956 :param old_tree: Tree to show as old in the comparison 957 :param new_tree: Tree to show as new in the comparison 958 :param to_file: File to write comparision to 959 :param path_encoding: Character encoding to write paths in 960 :param diff_text: DiffPath-type object to use as a last resort for 961 diffing text files. 962 :param extra_factories: Factories of DiffPaths to try before any other 963 DiffPaths""" 964 if diff_text is None: 965 diff_text = DiffText(old_tree, new_tree, to_file, path_encoding, 966 '', '', internal_diff) 967 self.old_tree = old_tree 968 self.new_tree = new_tree 969 self.to_file = to_file 970 self.path_encoding = path_encoding 971 self.differs = [] 972 if extra_factories is not None: 973 self.differs.extend(f(self) for f in extra_factories) 974 self.differs.extend(f(self) for f in self.diff_factories) 975 self.differs.extend([diff_text, DiffKindChange.from_diff_tree(self)]) 976 977 @classmethod 978 def from_trees_options(klass, old_tree, new_tree, to_file, 979 path_encoding, external_diff_options, old_label, 980 new_label, using, context_lines): 981 """Factory for producing a DiffTree. 982 983 Designed to accept options used by show_diff_trees. 984 985 :param old_tree: The tree to show as old in the comparison 986 :param new_tree: The tree to show as new in the comparison 987 :param to_file: File to write comparisons to 988 :param path_encoding: Character encoding to use for writing paths 989 :param external_diff_options: If supplied, use the installed diff 990 binary to perform file comparison, using supplied options. 991 :param old_label: Prefix to use for old file labels 992 :param new_label: Prefix to use for new file labels 993 :param using: Commandline to use to invoke an external diff tool 994 """ 995 if using is not None: 996 extra_factories = [DiffFromTool.make_from_diff_tree( 997 using, external_diff_options)] 998 else: 999 extra_factories = [] 1000 if external_diff_options: 1001 opts = external_diff_options.split() 1002 1003 def diff_file(olab, olines, nlab, nlines, to_file, path_encoding=None, context_lines=None): 1004 """:param path_encoding: not used but required 1005 to match the signature of internal_diff. 1006 """ 1007 external_diff(olab, olines, nlab, nlines, to_file, opts) 1008 else: 1009 diff_file = internal_diff 1010 diff_text = DiffText(old_tree, new_tree, to_file, path_encoding, 1011 old_label, new_label, diff_file, context_lines=context_lines) 1012 return klass(old_tree, new_tree, to_file, path_encoding, diff_text, 1013 extra_factories) 1014 1015 def show_diff(self, specific_files, extra_trees=None): 1016 """Write tree diff to self.to_file 1017 1018 :param specific_files: the specific files to compare (recursive) 1019 :param extra_trees: extra trees to use for mapping paths to file_ids 1020 """ 1021 try: 1022 return self._show_diff(specific_files, extra_trees) 1023 finally: 1024 for differ in self.differs: 1025 differ.finish() 1026 1027 def _show_diff(self, specific_files, extra_trees): 1028 # TODO: Generation of pseudo-diffs for added/deleted files could 1029 # be usefully made into a much faster special case. 1030 iterator = self.new_tree.iter_changes(self.old_tree, 1031 specific_files=specific_files, 1032 extra_trees=extra_trees, 1033 require_versioned=True) 1034 has_changes = 0 1035 1036 def changes_key(change): 1037 old_path, new_path = change.path 1038 path = new_path 1039 if path is None: 1040 path = old_path 1041 return path 1042 1043 def get_encoded_path(path): 1044 if path is not None: 1045 return path.encode(self.path_encoding, "replace") 1046 for change in sorted(iterator, key=changes_key): 1047 # The root does not get diffed, and items with no known kind (that 1048 # is, missing) in both trees are skipped as well. 1049 if change.parent_id == (None, None) or change.kind == (None, None): 1050 continue 1051 if change.kind[0] == 'symlink' and not self.new_tree.supports_symlinks(): 1052 warning( 1053 'Ignoring "%s" as symlinks are not ' 1054 'supported on this filesystem.' % (change.path[0],)) 1055 continue 1056 oldpath, newpath = change.path 1057 oldpath_encoded = get_encoded_path(oldpath) 1058 newpath_encoded = get_encoded_path(newpath) 1059 old_present = (change.kind[0] is not None and change.versioned[0]) 1060 new_present = (change.kind[1] is not None and change.versioned[1]) 1061 executable = change.executable 1062 kind = change.kind 1063 renamed = (change.parent_id[0], change.name[0]) != (change.parent_id[1], change.name[1]) 1064 1065 properties_changed = [] 1066 properties_changed.extend( 1067 get_executable_change(executable[0], executable[1])) 1068 1069 if properties_changed: 1070 prop_str = b" (properties changed: %s)" % ( 1071 b", ".join(properties_changed),) 1072 else: 1073 prop_str = b"" 1074 1075 if (old_present, new_present) == (True, False): 1076 self.to_file.write(b"=== removed %s '%s'\n" % 1077 (kind[0].encode('ascii'), oldpath_encoded)) 1078 elif (old_present, new_present) == (False, True): 1079 self.to_file.write(b"=== added %s '%s'\n" % 1080 (kind[1].encode('ascii'), newpath_encoded)) 1081 elif renamed: 1082 self.to_file.write(b"=== renamed %s '%s' => '%s'%s\n" % 1083 (kind[0].encode('ascii'), oldpath_encoded, newpath_encoded, prop_str)) 1084 else: 1085 # if it was produced by iter_changes, it must be 1086 # modified *somehow*, either content or execute bit. 1087 self.to_file.write(b"=== modified %s '%s'%s\n" % (kind[0].encode('ascii'), 1088 newpath_encoded, prop_str)) 1089 if change.changed_content: 1090 self._diff(oldpath, newpath, kind[0], kind[1]) 1091 has_changes = 1 1092 if renamed: 1093 has_changes = 1 1094 return has_changes 1095 1096 def diff(self, old_path, new_path): 1097 """Perform a diff of a single file 1098 1099 :param old_path: The path of the file in the old tree 1100 :param new_path: The path of the file in the new tree 1101 """ 1102 if old_path is None: 1103 old_kind = None 1104 else: 1105 old_kind = self.old_tree.kind(old_path) 1106 if new_path is None: 1107 new_kind = None 1108 else: 1109 new_kind = self.new_tree.kind(new_path) 1110 self._diff(old_path, new_path, old_kind, new_kind) 1111 1112 def _diff(self, old_path, new_path, old_kind, new_kind): 1113 result = DiffPath._diff_many( 1114 self.differs, old_path, new_path, old_kind, new_kind) 1115 if result is DiffPath.CANNOT_DIFF: 1116 error_path = new_path 1117 if error_path is None: 1118 error_path = old_path 1119 raise errors.NoDiffFound(error_path) 1120 1121 1122format_registry = Registry() 1123format_registry.register('default', DiffTree) 1124