1# Copyright (C) 2005-2011 Canonical Ltd
2#
3# This program is free software; you can redistribute it and/or modify
4# it under the terms of the GNU General Public License as published by
5# the Free Software Foundation; either version 2 of the License, or
6# (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17import contextlib
18
19from .lazy_import import lazy_import
20lazy_import(globals(), """
21import patiencediff
22
23from breezy import (
24    branch as _mod_branch,
25    conflicts as _mod_conflicts,
26    debug,
27    graph as _mod_graph,
28    merge3,
29    osutils,
30    revision as _mod_revision,
31    textfile,
32    trace,
33    tree as _mod_tree,
34    tsort,
35    ui,
36    workingtree,
37    )
38from breezy.bzr import (
39    conflicts as _mod_bzr_conflicts,
40    generate_ids,
41    versionedfile,
42    )
43from breezy.i18n import gettext
44""")
45from breezy.bzr.conflicts import Conflict as BzrConflict
46from . import (
47    decorators,
48    errors,
49    hooks,
50    registry,
51    transform,
52    )
53# TODO: Report back as changes are merged in
54
55
56def transform_tree(from_tree, to_tree, interesting_files=None):
57    with from_tree.lock_tree_write():
58        merge_inner(from_tree.branch, to_tree, from_tree,
59                    ignore_zero=True, this_tree=from_tree,
60                    interesting_files=interesting_files)
61
62
63class MergeHooks(hooks.Hooks):
64
65    def __init__(self):
66        hooks.Hooks.__init__(self, "breezy.merge", "Merger.hooks")
67        self.add_hook('merge_file_content',
68                      "Called with a breezy.merge.Merger object to create a per file "
69                      "merge object when starting a merge. "
70                      "Should return either None or a subclass of "
71                      "``breezy.merge.AbstractPerFileMerger``. "
72                      "Such objects will then be called per file "
73                      "that needs to be merged (including when one "
74                      "side has deleted the file and the other has changed it). "
75                      "See the AbstractPerFileMerger API docs for details on how it is "
76                      "used by merge.",
77                      (2, 1))
78        self.add_hook('pre_merge',
79                      'Called before a merge. '
80                      'Receives a Merger object as the single argument.',
81                      (2, 5))
82        self.add_hook('post_merge',
83                      'Called after a merge. '
84                      'Receives a Merger object as the single argument. '
85                      'The return value is ignored.',
86                      (2, 5))
87
88
89class AbstractPerFileMerger(object):
90    """PerFileMerger objects are used by plugins extending merge for breezy.
91
92    See ``breezy.plugins.news_merge.news_merge`` for an example concrete class.
93
94    :ivar merger: The Merge3Merger performing the merge.
95    """
96
97    def __init__(self, merger):
98        """Create a PerFileMerger for use with merger."""
99        self.merger = merger
100
101    def merge_contents(self, merge_params):
102        """Attempt to merge the contents of a single file.
103
104        :param merge_params: A breezy.merge.MergeFileHookParams
105        :return: A tuple of (status, chunks), where status is one of
106            'not_applicable', 'success', 'conflicted', or 'delete'.  If status
107            is 'success' or 'conflicted', then chunks should be an iterable of
108            strings for the new file contents.
109        """
110        return ('not applicable', None)
111
112
113class PerFileMerger(AbstractPerFileMerger):
114    """Merge individual files when self.file_matches returns True.
115
116    This class is intended to be subclassed.  The file_matches and
117    merge_matching methods should be overridden with concrete implementations.
118    """
119
120    def file_matches(self, params):
121        """Return True if merge_matching should be called on this file.
122
123        Only called with merges of plain files with no clear winner.
124
125        Subclasses must override this.
126        """
127        raise NotImplementedError(self.file_matches)
128
129    def merge_contents(self, params):
130        """Merge the contents of a single file."""
131        # Check whether this custom merge logic should be used.
132        if (
133            # OTHER is a straight winner, rely on default merge.
134            params.winner == 'other' or
135            # THIS and OTHER aren't both files.
136            not params.is_file_merge() or
137            # The filename doesn't match
138                not self.file_matches(params)):
139            return 'not_applicable', None
140        return self.merge_matching(params)
141
142    def merge_matching(self, params):
143        """Merge the contents of a single file that has matched the criteria
144        in PerFileMerger.merge_contents (is a conflict, is a file,
145        self.file_matches is True).
146
147        Subclasses must override this.
148        """
149        raise NotImplementedError(self.merge_matching)
150
151
152class ConfigurableFileMerger(PerFileMerger):
153    """Merge individual files when configured via a .conf file.
154
155    This is a base class for concrete custom file merging logic. Concrete
156    classes should implement ``merge_text``.
157
158    See ``breezy.plugins.news_merge.news_merge`` for an example concrete class.
159
160    :ivar affected_files: The configured file paths to merge.
161
162    :cvar name_prefix: The prefix to use when looking up configuration
163        details. <name_prefix>_merge_files describes the files targeted by the
164        hook for example.
165
166    :cvar default_files: The default file paths to merge when no configuration
167        is present.
168    """
169
170    name_prefix = None
171    default_files = None
172
173    def __init__(self, merger):
174        super(ConfigurableFileMerger, self).__init__(merger)
175        self.affected_files = None
176        self.default_files = self.__class__.default_files or []
177        self.name_prefix = self.__class__.name_prefix
178        if self.name_prefix is None:
179            raise ValueError("name_prefix must be set.")
180
181    def file_matches(self, params):
182        """Check whether the file should call the merge hook.
183
184        <name_prefix>_merge_files configuration variable is a list of files
185        that should use the hook.
186        """
187        affected_files = self.affected_files
188        if affected_files is None:
189            config = self.merger.this_branch.get_config()
190            # Until bzr provides a better policy for caching the config, we
191            # just add the part we're interested in to the params to avoid
192            # reading the config files repeatedly (breezy.conf, location.conf,
193            # branch.conf).
194            config_key = self.name_prefix + '_merge_files'
195            affected_files = config.get_user_option_as_list(config_key)
196            if affected_files is None:
197                # If nothing was specified in the config, use the default.
198                affected_files = self.default_files
199            self.affected_files = affected_files
200        if affected_files:
201            filepath = params.this_path
202            if filepath in affected_files:
203                return True
204        return False
205
206    def merge_matching(self, params):
207        return self.merge_text(params)
208
209    def merge_text(self, params):
210        """Merge the byte contents of a single file.
211
212        This is called after checking that the merge should be performed in
213        merge_contents, and it should behave as per
214        ``breezy.merge.AbstractPerFileMerger.merge_contents``.
215        """
216        raise NotImplementedError(self.merge_text)
217
218
219class MergeFileHookParams(object):
220    """Object holding parameters passed to merge_file_content hooks.
221
222    There are some fields hooks can access:
223
224    :ivar base_path: Path in base tree
225    :ivar other_path: Path in other tree
226    :ivar this_path: Path in this tree
227    :ivar trans_id: the transform ID for the merge of this file
228    :ivar this_kind: kind of file in 'this' tree
229    :ivar other_kind: kind of file in 'other' tree
230    :ivar winner: one of 'this', 'other', 'conflict'
231    """
232
233    def __init__(self, merger, paths, trans_id, this_kind, other_kind,
234                 winner):
235        self._merger = merger
236        self.paths = paths
237        self.base_path, self.other_path, self.this_path = paths
238        self.trans_id = trans_id
239        self.this_kind = this_kind
240        self.other_kind = other_kind
241        self.winner = winner
242
243    def is_file_merge(self):
244        """True if this_kind and other_kind are both 'file'."""
245        return self.this_kind == 'file' and self.other_kind == 'file'
246
247    @decorators.cachedproperty
248    def base_lines(self):
249        """The lines of the 'base' version of the file."""
250        return self._merger.get_lines(self._merger.base_tree, self.base_path)
251
252    @decorators.cachedproperty
253    def this_lines(self):
254        """The lines of the 'this' version of the file."""
255        return self._merger.get_lines(self._merger.this_tree, self.this_path)
256
257    @decorators.cachedproperty
258    def other_lines(self):
259        """The lines of the 'other' version of the file."""
260        return self._merger.get_lines(self._merger.other_tree, self.other_path)
261
262
263class Merger(object):
264
265    hooks = MergeHooks()
266
267    def __init__(self, this_branch, other_tree=None, base_tree=None,
268                 this_tree=None, change_reporter=None,
269                 recurse='down', revision_graph=None):
270        object.__init__(self)
271        self.this_branch = this_branch
272        self.this_basis = _mod_revision.ensure_null(
273            this_branch.last_revision())
274        self.this_rev_id = None
275        self.this_tree = this_tree
276        self.this_revision_tree = None
277        self.this_basis_tree = None
278        self.other_tree = other_tree
279        self.other_branch = None
280        self.base_tree = base_tree
281        self.ignore_zero = False
282        self.backup_files = False
283        self.interesting_files = None
284        self.show_base = False
285        self.reprocess = False
286        self.pp = None
287        self.recurse = recurse
288        self.change_reporter = change_reporter
289        self._cached_trees = {}
290        self._revision_graph = revision_graph
291        self._base_is_ancestor = None
292        self._base_is_other_ancestor = None
293        self._is_criss_cross = None
294        self._lca_trees = None
295
296    def cache_trees_with_revision_ids(self, trees):
297        """Cache any tree in trees if it has a revision_id."""
298        for maybe_tree in trees:
299            if maybe_tree is None:
300                continue
301            try:
302                rev_id = maybe_tree.get_revision_id()
303            except AttributeError:
304                continue
305            self._cached_trees[rev_id] = maybe_tree
306
307    @property
308    def revision_graph(self):
309        if self._revision_graph is None:
310            self._revision_graph = self.this_branch.repository.get_graph()
311        return self._revision_graph
312
313    def _set_base_is_ancestor(self, value):
314        self._base_is_ancestor = value
315
316    def _get_base_is_ancestor(self):
317        if self._base_is_ancestor is None:
318            self._base_is_ancestor = self.revision_graph.is_ancestor(
319                self.base_rev_id, self.this_basis)
320        return self._base_is_ancestor
321
322    base_is_ancestor = property(_get_base_is_ancestor, _set_base_is_ancestor)
323
324    def _set_base_is_other_ancestor(self, value):
325        self._base_is_other_ancestor = value
326
327    def _get_base_is_other_ancestor(self):
328        if self._base_is_other_ancestor is None:
329            if self.other_basis is None:
330                return True
331            self._base_is_other_ancestor = self.revision_graph.is_ancestor(
332                self.base_rev_id, self.other_basis)
333        return self._base_is_other_ancestor
334
335    base_is_other_ancestor = property(_get_base_is_other_ancestor,
336                                      _set_base_is_other_ancestor)
337
338    @staticmethod
339    def from_uncommitted(tree, other_tree, base_tree=None):
340        """Return a Merger for uncommitted changes in other_tree.
341
342        :param tree: The tree to merge into
343        :param other_tree: The tree to get uncommitted changes from
344        :param base_tree: The basis to use for the merge.  If unspecified,
345            other_tree.basis_tree() will be used.
346        """
347        if base_tree is None:
348            base_tree = other_tree.basis_tree()
349        merger = Merger(tree.branch, other_tree, base_tree, tree)
350        merger.base_rev_id = merger.base_tree.get_revision_id()
351        merger.other_rev_id = None
352        merger.other_basis = merger.base_rev_id
353        return merger
354
355    @classmethod
356    def from_mergeable(klass, tree, mergeable):
357        """Return a Merger for a bundle or merge directive.
358
359        :param tree: The tree to merge changes into
360        :param mergeable: A merge directive or bundle
361        """
362        mergeable.install_revisions(tree.branch.repository)
363        base_revision_id, other_revision_id, verified =\
364            mergeable.get_merge_request(tree.branch.repository)
365        revision_graph = tree.branch.repository.get_graph()
366        if base_revision_id is not None:
367            if (base_revision_id != _mod_revision.NULL_REVISION and
368                revision_graph.is_ancestor(
369                    base_revision_id, tree.branch.last_revision())):
370                base_revision_id = None
371            else:
372                trace.warning('Performing cherrypick')
373        merger = klass.from_revision_ids(tree, other_revision_id,
374                                         base_revision_id, revision_graph=revision_graph)
375        return merger, verified
376
377    @staticmethod
378    def from_revision_ids(tree, other, base=None, other_branch=None,
379                          base_branch=None, revision_graph=None,
380                          tree_branch=None):
381        """Return a Merger for revision-ids.
382
383        :param tree: The tree to merge changes into
384        :param other: The revision-id to use as OTHER
385        :param base: The revision-id to use as BASE.  If not specified, will
386            be auto-selected.
387        :param other_branch: A branch containing the other revision-id.  If
388            not supplied, tree.branch is used.
389        :param base_branch: A branch containing the base revision-id.  If
390            not supplied, other_branch or tree.branch will be used.
391        :param revision_graph: If you have a revision_graph precomputed, pass
392            it in, otherwise it will be created for you.
393        :param tree_branch: The branch associated with tree.  If not supplied,
394            tree.branch will be used.
395        """
396        if tree_branch is None:
397            tree_branch = tree.branch
398        merger = Merger(tree_branch, this_tree=tree,
399                        revision_graph=revision_graph)
400        if other_branch is None:
401            other_branch = tree.branch
402        merger.set_other_revision(other, other_branch)
403        if base is None:
404            merger.find_base()
405        else:
406            if base_branch is None:
407                base_branch = other_branch
408            merger.set_base_revision(base, base_branch)
409        return merger
410
411    def revision_tree(self, revision_id, branch=None):
412        if revision_id not in self._cached_trees:
413            if branch is None:
414                branch = self.this_branch
415            try:
416                tree = self.this_tree.revision_tree(revision_id)
417            except errors.NoSuchRevisionInTree:
418                tree = branch.repository.revision_tree(revision_id)
419            self._cached_trees[revision_id] = tree
420        return self._cached_trees[revision_id]
421
422    def _get_tree(self, treespec, possible_transports=None):
423        location, revno = treespec
424        if revno is None:
425            tree = workingtree.WorkingTree.open_containing(location)[0]
426            return tree.branch, tree
427        branch = _mod_branch.Branch.open_containing(
428            location, possible_transports)[0]
429        if revno == -1:
430            revision_id = branch.last_revision()
431        else:
432            revision_id = branch.get_rev_id(revno)
433        revision_id = _mod_revision.ensure_null(revision_id)
434        return branch, self.revision_tree(revision_id, branch)
435
436    def set_interesting_files(self, file_list):
437        self.interesting_files = file_list
438
439    def set_pending(self):
440        if (not self.base_is_ancestor or not self.base_is_other_ancestor
441                or self.other_rev_id is None):
442            return
443        self._add_parent()
444
445    def _add_parent(self):
446        new_parents = self.this_tree.get_parent_ids() + [self.other_rev_id]
447        new_parent_trees = []
448        with contextlib.ExitStack() as stack:
449            for revision_id in new_parents:
450                try:
451                    tree = self.revision_tree(revision_id)
452                except errors.NoSuchRevision:
453                    tree = None
454                else:
455                    stack.enter_context(tree.lock_read())
456                new_parent_trees.append((revision_id, tree))
457            self.this_tree.set_parent_trees(new_parent_trees, allow_leftmost_as_ghost=True)
458
459    def set_other(self, other_revision, possible_transports=None):
460        """Set the revision and tree to merge from.
461
462        This sets the other_tree, other_rev_id, other_basis attributes.
463
464        :param other_revision: The [path, revision] list to merge from.
465        """
466        self.other_branch, self.other_tree = self._get_tree(other_revision,
467                                                            possible_transports)
468        if other_revision[1] == -1:
469            self.other_rev_id = _mod_revision.ensure_null(
470                self.other_branch.last_revision())
471            if _mod_revision.is_null(self.other_rev_id):
472                raise errors.NoCommits(self.other_branch)
473            self.other_basis = self.other_rev_id
474        elif other_revision[1] is not None:
475            self.other_rev_id = self.other_branch.get_rev_id(other_revision[1])
476            self.other_basis = self.other_rev_id
477        else:
478            self.other_rev_id = None
479            self.other_basis = self.other_branch.last_revision()
480            if self.other_basis is None:
481                raise errors.NoCommits(self.other_branch)
482        if self.other_rev_id is not None:
483            self._cached_trees[self.other_rev_id] = self.other_tree
484        self._maybe_fetch(self.other_branch,
485                          self.this_branch, self.other_basis)
486
487    def set_other_revision(self, revision_id, other_branch):
488        """Set 'other' based on a branch and revision id
489
490        :param revision_id: The revision to use for a tree
491        :param other_branch: The branch containing this tree
492        """
493        self.other_rev_id = revision_id
494        self.other_branch = other_branch
495        self._maybe_fetch(other_branch, self.this_branch, self.other_rev_id)
496        self.other_tree = self.revision_tree(revision_id)
497        self.other_basis = revision_id
498
499    def set_base_revision(self, revision_id, branch):
500        """Set 'base' based on a branch and revision id
501
502        :param revision_id: The revision to use for a tree
503        :param branch: The branch containing this tree
504        """
505        self.base_rev_id = revision_id
506        self.base_branch = branch
507        self._maybe_fetch(branch, self.this_branch, revision_id)
508        self.base_tree = self.revision_tree(revision_id)
509
510    def _maybe_fetch(self, source, target, revision_id):
511        if not source.repository.has_same_location(target.repository):
512            target.fetch(source, revision_id)
513
514    def find_base(self):
515        revisions = [_mod_revision.ensure_null(self.this_basis),
516                     _mod_revision.ensure_null(self.other_basis)]
517        if _mod_revision.NULL_REVISION in revisions:
518            self.base_rev_id = _mod_revision.NULL_REVISION
519            self.base_tree = self.revision_tree(self.base_rev_id)
520            self._is_criss_cross = False
521        else:
522            lcas = self.revision_graph.find_lca(revisions[0], revisions[1])
523            self._is_criss_cross = False
524            if len(lcas) == 0:
525                self.base_rev_id = _mod_revision.NULL_REVISION
526            elif len(lcas) == 1:
527                self.base_rev_id = list(lcas)[0]
528            else:  # len(lcas) > 1
529                self._is_criss_cross = True
530                if len(lcas) > 2:
531                    # find_unique_lca can only handle 2 nodes, so we have to
532                    # start back at the beginning. It is a shame to traverse
533                    # the graph again, but better than re-implementing
534                    # find_unique_lca.
535                    self.base_rev_id = self.revision_graph.find_unique_lca(
536                        revisions[0], revisions[1])
537                else:
538                    self.base_rev_id = self.revision_graph.find_unique_lca(
539                        *lcas)
540                sorted_lca_keys = self.revision_graph.find_merge_order(
541                    revisions[0], lcas)
542                if self.base_rev_id == _mod_revision.NULL_REVISION:
543                    self.base_rev_id = sorted_lca_keys[0]
544
545            if self.base_rev_id == _mod_revision.NULL_REVISION:
546                raise errors.UnrelatedBranches()
547            if self._is_criss_cross:
548                trace.warning('Warning: criss-cross merge encountered.  See bzr'
549                              ' help criss-cross.')
550                trace.mutter('Criss-cross lcas: %r' % lcas)
551                if self.base_rev_id in lcas:
552                    trace.mutter('Unable to find unique lca. '
553                                 'Fallback %r as best option.'
554                                 % self.base_rev_id)
555                interesting_revision_ids = set(lcas)
556                interesting_revision_ids.add(self.base_rev_id)
557                interesting_trees = dict((t.get_revision_id(), t)
558                                         for t in self.this_branch.repository.revision_trees(
559                    interesting_revision_ids))
560                self._cached_trees.update(interesting_trees)
561                if self.base_rev_id in lcas:
562                    self.base_tree = interesting_trees[self.base_rev_id]
563                else:
564                    self.base_tree = interesting_trees.pop(self.base_rev_id)
565                self._lca_trees = [interesting_trees[key]
566                                   for key in sorted_lca_keys]
567            else:
568                self.base_tree = self.revision_tree(self.base_rev_id)
569        self.base_is_ancestor = True
570        self.base_is_other_ancestor = True
571        trace.mutter('Base revid: %r' % self.base_rev_id)
572
573    def set_base(self, base_revision):
574        """Set the base revision to use for the merge.
575
576        :param base_revision: A 2-list containing a path and revision number.
577        """
578        trace.mutter("doing merge() with no base_revision specified")
579        if base_revision == [None, None]:
580            self.find_base()
581        else:
582            base_branch, self.base_tree = self._get_tree(base_revision)
583            if base_revision[1] == -1:
584                self.base_rev_id = base_branch.last_revision()
585            elif base_revision[1] is None:
586                self.base_rev_id = _mod_revision.NULL_REVISION
587            else:
588                self.base_rev_id = _mod_revision.ensure_null(
589                    base_branch.get_rev_id(base_revision[1]))
590            self._maybe_fetch(base_branch, self.this_branch, self.base_rev_id)
591
592    def make_merger(self):
593        kwargs = {'working_tree': self.this_tree, 'this_tree': self.this_tree,
594                  'other_tree': self.other_tree,
595                  'interesting_files': self.interesting_files,
596                  'this_branch': self.this_branch,
597                  'other_branch': self.other_branch,
598                  'do_merge': False}
599        if self.merge_type.requires_base:
600            kwargs['base_tree'] = self.base_tree
601        if self.merge_type.supports_reprocess:
602            kwargs['reprocess'] = self.reprocess
603        elif self.reprocess:
604            raise errors.BzrError(
605                "Conflict reduction is not supported for merge"
606                " type %s." % self.merge_type)
607        if self.merge_type.supports_show_base:
608            kwargs['show_base'] = self.show_base
609        elif self.show_base:
610            raise errors.BzrError("Showing base is not supported for this"
611                                  " merge type. %s" % self.merge_type)
612        if (not getattr(self.merge_type, 'supports_reverse_cherrypick', True)
613                and not self.base_is_other_ancestor):
614            raise errors.CannotReverseCherrypick()
615        if self.merge_type.supports_cherrypick:
616            kwargs['cherrypick'] = (not self.base_is_ancestor or
617                                    not self.base_is_other_ancestor)
618        if self._is_criss_cross and getattr(self.merge_type,
619                                            'supports_lca_trees', False):
620            kwargs['lca_trees'] = self._lca_trees
621        return self.merge_type(change_reporter=self.change_reporter,
622                               **kwargs)
623
624    def _do_merge_to(self):
625        merge = self.make_merger()
626        if self.other_branch is not None:
627            self.other_branch.update_references(self.this_branch)
628        for hook in Merger.hooks['pre_merge']:
629            hook(merge)
630        merge.do_merge()
631        for hook in Merger.hooks['post_merge']:
632            hook(merge)
633        if self.recurse == 'down':
634            for relpath in self.this_tree.iter_references():
635                sub_tree = self.this_tree.get_nested_tree(relpath)
636                other_revision = self.other_tree.get_reference_revision(
637                    relpath)
638                if other_revision == sub_tree.last_revision():
639                    continue
640                sub_merge = Merger(sub_tree.branch, this_tree=sub_tree)
641                sub_merge.merge_type = self.merge_type
642                other_branch = self.other_tree.reference_parent(relpath)
643                sub_merge.set_other_revision(other_revision, other_branch)
644                base_tree_path = _mod_tree.find_previous_path(
645                    self.this_tree, self.base_tree, relpath)
646                base_revision = self.base_tree.get_reference_revision(
647                    base_tree_path)
648                sub_merge.base_tree = \
649                    sub_tree.branch.repository.revision_tree(base_revision)
650                sub_merge.base_rev_id = base_revision
651                sub_merge.do_merge()
652        return merge
653
654    def do_merge(self):
655        with contextlib.ExitStack() as stack:
656            stack.enter_context(self.this_tree.lock_tree_write())
657            if self.base_tree is not None:
658                stack.enter_context(self.base_tree.lock_read())
659            if self.other_tree is not None:
660                stack.enter_context(self.other_tree.lock_read())
661            merge = self._do_merge_to()
662        if len(merge.cooked_conflicts) == 0:
663            if not self.ignore_zero and not trace.is_quiet():
664                trace.note(gettext("All changes applied successfully."))
665        else:
666            trace.note(gettext("%d conflicts encountered.")
667                       % len(merge.cooked_conflicts))
668
669        return merge.cooked_conflicts
670
671
672class _InventoryNoneEntry(object):
673    """This represents an inventory entry which *isn't there*.
674
675    It simplifies the merging logic if we always have an InventoryEntry, even
676    if it isn't actually present
677    """
678    executable = None
679    kind = None
680    name = None
681    parent_id = None
682    revision = None
683    symlink_target = None
684    text_sha1 = None
685
686    def is_unmodified(self, other):
687        return other is self
688
689
690_none_entry = _InventoryNoneEntry()
691
692
693class Merge3Merger(object):
694    """Three-way merger that uses the merge3 text merger"""
695    requires_base = True
696    supports_reprocess = True
697    supports_show_base = True
698    history_based = False
699    supports_cherrypick = True
700    supports_reverse_cherrypick = True
701    winner_idx = {"this": 2, "other": 1, "conflict": 1}
702    supports_lca_trees = True
703    requires_file_merge_plan = False
704
705    def __init__(self, working_tree, this_tree, base_tree, other_tree,
706                 reprocess=False, show_base=False,
707                 change_reporter=None, interesting_files=None, do_merge=True,
708                 cherrypick=False, lca_trees=None, this_branch=None,
709                 other_branch=None):
710        """Initialize the merger object and perform the merge.
711
712        :param working_tree: The working tree to apply the merge to
713        :param this_tree: The local tree in the merge operation
714        :param base_tree: The common tree in the merge operation
715        :param other_tree: The other tree to merge changes from
716        :param this_branch: The branch associated with this_tree.  Defaults to
717            this_tree.branch if not supplied.
718        :param other_branch: The branch associated with other_tree, if any.
719        :param: reprocess If True, perform conflict-reduction processing.
720        :param show_base: If True, show the base revision in text conflicts.
721            (incompatible with reprocess)
722        :param change_reporter: An object that should report changes made
723        :param interesting_files: The tree-relative paths of files that should
724            participate in the merge.  If these paths refer to directories,
725            the contents of those directories will also be included.  If not
726            specified, all files may participate in the
727            merge.
728        :param lca_trees: Can be set to a dictionary of {revision_id:rev_tree}
729            if the ancestry was found to include a criss-cross merge.
730            Otherwise should be None.
731        """
732        object.__init__(self)
733        if this_branch is None:
734            this_branch = this_tree.branch
735        self.interesting_files = interesting_files
736        self.working_tree = working_tree
737        self.this_tree = this_tree
738        self.base_tree = base_tree
739        self.other_tree = other_tree
740        self.this_branch = this_branch
741        self.other_branch = other_branch
742        self._raw_conflicts = []
743        self.cooked_conflicts = []
744        self.reprocess = reprocess
745        self.show_base = show_base
746        self._lca_trees = lca_trees
747        # Uncommenting this will change the default algorithm to always use
748        # _entries_lca. This can be useful for running the test suite and
749        # making sure we haven't missed any corner cases.
750        # if lca_trees is None:
751        #     self._lca_trees = [self.base_tree]
752        self.change_reporter = change_reporter
753        self.cherrypick = cherrypick
754        if do_merge:
755            self.do_merge()
756
757    def do_merge(self):
758        with contextlib.ExitStack() as stack:
759            stack.enter_context(self.working_tree.lock_tree_write())
760            stack.enter_context(self.this_tree.lock_read())
761            stack.enter_context(self.base_tree.lock_read())
762            stack.enter_context(self.other_tree.lock_read())
763            self.tt = self.working_tree.transform()
764            stack.enter_context(self.tt)
765            self._compute_transform()
766            results = self.tt.apply(no_conflicts=True)
767            self.write_modified(results)
768            try:
769                self.working_tree.add_conflicts(self.cooked_conflicts)
770            except errors.UnsupportedOperation:
771                pass
772
773    def make_preview_transform(self):
774        with self.base_tree.lock_read(), self.other_tree.lock_read():
775            self.tt = self.working_tree.preview_transform()
776            self._compute_transform()
777            return self.tt
778
779    def _compute_transform(self):
780        if self._lca_trees is None:
781            entries = list(self._entries3())
782            resolver = self._three_way
783        else:
784            entries = list(self._entries_lca())
785            resolver = self._lca_multi_way
786        # Prepare merge hooks
787        factories = Merger.hooks['merge_file_content']
788        # One hook for each registered one plus our default merger
789        hooks = [factory(self) for factory in factories] + [self]
790        self.active_hooks = [hook for hook in hooks if hook is not None]
791        with ui.ui_factory.nested_progress_bar() as child_pb:
792            for num, (file_id, changed, paths3, parents3, names3,
793                      executable3, copied) in enumerate(entries):
794                if copied:
795                    # Treat copies as simple adds for now
796                    paths3 = (None, paths3[1], None)
797                    parents3 = (None, parents3[1], None)
798                    names3 = (None, names3[1], None)
799                    executable3 = (None, executable3[1], None)
800                    changed = True
801                    copied = False
802                trans_id = self.tt.trans_id_file_id(file_id)
803                # Try merging each entry
804                child_pb.update(gettext('Preparing file merge'),
805                                num, len(entries))
806                self._merge_names(trans_id, file_id, paths3, parents3,
807                                  names3, resolver=resolver)
808                if changed:
809                    file_status = self._do_merge_contents(paths3, trans_id, file_id)
810                else:
811                    file_status = 'unmodified'
812                self._merge_executable(paths3, trans_id, executable3,
813                                       file_status, resolver=resolver)
814        self.tt.fixup_new_roots()
815        self._finish_computing_transform()
816
817    def _finish_computing_transform(self):
818        """Finalize the transform and report the changes.
819
820        This is the second half of _compute_transform.
821        """
822        with ui.ui_factory.nested_progress_bar() as child_pb:
823            fs_conflicts = transform.resolve_conflicts(
824                self.tt, child_pb,
825                lambda t, c: transform.conflict_pass(t, c, self.other_tree))
826        if self.change_reporter is not None:
827            from breezy import delta
828            delta.report_changes(
829                self.tt.iter_changes(), self.change_reporter)
830        self.cook_conflicts(fs_conflicts)
831        for conflict in self.cooked_conflicts:
832            trace.warning('%s', conflict.describe())
833
834    def _entries3(self):
835        """Gather data about files modified between three trees.
836
837        Return a list of tuples of file_id, changed, parents3, names3,
838        executable3.  changed is a boolean indicating whether the file contents
839        or kind were changed.  parents3 is a tuple of parent ids for base,
840        other and this.  names3 is a tuple of names for base, other and this.
841        executable3 is a tuple of execute-bit values for base, other and this.
842        """
843        iterator = self.other_tree.iter_changes(self.base_tree,
844                                                specific_files=self.interesting_files,
845                                                extra_trees=[self.this_tree])
846        this_interesting_files = self.this_tree.find_related_paths_across_trees(
847            self.interesting_files, trees=[self.other_tree])
848        this_entries = dict(self.this_tree.iter_entries_by_dir(
849                            specific_files=this_interesting_files))
850        for change in iterator:
851            if change.path[0] is not None:
852                this_path = _mod_tree.find_previous_path(
853                    self.base_tree, self.this_tree, change.path[0])
854            else:
855                this_path = _mod_tree.find_previous_path(
856                    self.other_tree, self.this_tree, change.path[1])
857            this_entry = this_entries.get(this_path)
858            if this_entry is not None:
859                this_name = this_entry.name
860                this_parent = this_entry.parent_id
861                this_executable = this_entry.executable
862            else:
863                this_name = None
864                this_parent = None
865                this_executable = None
866            parents3 = change.parent_id + (this_parent,)
867            names3 = change.name + (this_name,)
868            paths3 = change.path + (this_path, )
869            executable3 = change.executable + (this_executable,)
870            yield (
871                (change.file_id, change.changed_content, paths3,
872                 parents3, names3, executable3, change.copied))
873
874    def _entries_lca(self):
875        """Gather data about files modified between multiple trees.
876
877        This compares OTHER versus all LCA trees, and for interesting entries,
878        it then compares with THIS and BASE.
879
880        For the multi-valued entries, the format will be (BASE, [lca1, lca2])
881
882        :return: [(file_id, changed, paths, parents, names, executable, copied)], where:
883
884            * file_id: Simple file_id of the entry
885            * changed: Boolean, True if the kind or contents changed else False
886            * paths: ((base, [path, in, lcas]), path_other, path_this)
887            * parents: ((base, [parent_id, in, lcas]), parent_id_other,
888                        parent_id_this)
889            * names:   ((base, [name, in, lcas]), name_in_other, name_in_this)
890            * executable: ((base, [exec, in, lcas]), exec_in_other,
891                        exec_in_this)
892        """
893        if self.interesting_files is not None:
894            lookup_trees = [self.this_tree, self.base_tree]
895            lookup_trees.extend(self._lca_trees)
896            # I think we should include the lca trees as well
897            interesting_files = self.other_tree.find_related_paths_across_trees(
898                self.interesting_files, lookup_trees)
899        else:
900            interesting_files = None
901        from .multiwalker import MultiWalker
902        walker = MultiWalker(self.other_tree, self._lca_trees)
903
904        for other_path, file_id, other_ie, lca_values in walker.iter_all():
905            # Is this modified at all from any of the other trees?
906            if other_ie is None:
907                other_ie = _none_entry
908                other_path = None
909            if interesting_files is not None and other_path not in interesting_files:
910                continue
911
912            # If other_revision is found in any of the lcas, that means this
913            # node is uninteresting. This is because when merging, if there are
914            # multiple heads(), we have to create a new node. So if we didn't,
915            # we know that the ancestry is linear, and that OTHER did not
916            # modify anything
917            # See doc/developers/lca_merge_resolution.txt for details
918            # We can't use this shortcut when other_revision is None,
919            # because it may be None because things are WorkingTrees, and
920            # not because it is *actually* None.
921            is_unmodified = False
922            for lca_path, ie in lca_values:
923                if ie is not None and other_ie.is_unmodified(ie):
924                    is_unmodified = True
925                    break
926            if is_unmodified:
927                continue
928
929            lca_entries = []
930            lca_paths = []
931            for lca_path, lca_ie in lca_values:
932                if lca_ie is None:
933                    lca_entries.append(_none_entry)
934                    lca_paths.append(None)
935                else:
936                    lca_entries.append(lca_ie)
937                    lca_paths.append(lca_path)
938
939            try:
940                base_path = self.base_tree.id2path(file_id)
941            except errors.NoSuchId:
942                base_path = None
943                base_ie = _none_entry
944            else:
945                base_ie = next(self.base_tree.iter_entries_by_dir(specific_files=[base_path]))[1]
946
947            try:
948                this_path = self.this_tree.id2path(file_id)
949            except errors.NoSuchId:
950                this_ie = _none_entry
951                this_path = None
952            else:
953                this_ie = next(self.this_tree.iter_entries_by_dir(specific_files=[this_path]))[1]
954
955            lca_kinds = []
956            lca_parent_ids = []
957            lca_names = []
958            lca_executable = []
959            for lca_ie in lca_entries:
960                lca_kinds.append(lca_ie.kind)
961                lca_parent_ids.append(lca_ie.parent_id)
962                lca_names.append(lca_ie.name)
963                lca_executable.append(lca_ie.executable)
964
965            kind_winner = self._lca_multi_way(
966                (base_ie.kind, lca_kinds),
967                other_ie.kind, this_ie.kind)
968            parent_id_winner = self._lca_multi_way(
969                (base_ie.parent_id, lca_parent_ids),
970                other_ie.parent_id, this_ie.parent_id)
971            name_winner = self._lca_multi_way(
972                (base_ie.name, lca_names),
973                other_ie.name, this_ie.name)
974
975            content_changed = True
976            if kind_winner == 'this':
977                # No kind change in OTHER, see if there are *any* changes
978                if other_ie.kind == 'directory':
979                    if parent_id_winner == 'this' and name_winner == 'this':
980                        # No change for this directory in OTHER, skip
981                        continue
982                    content_changed = False
983                elif other_ie.kind is None or other_ie.kind == 'file':
984                    def get_sha1(tree, path):
985                        if path is None:
986                            return None
987                        try:
988                            return tree.get_file_sha1(path)
989                        except errors.NoSuchFile:
990                            return None
991                    base_sha1 = get_sha1(self.base_tree, base_path)
992                    lca_sha1s = [get_sha1(tree, lca_path)
993                                 for tree, lca_path
994                                 in zip(self._lca_trees, lca_paths)]
995                    this_sha1 = get_sha1(self.this_tree, this_path)
996                    other_sha1 = get_sha1(self.other_tree, other_path)
997                    sha1_winner = self._lca_multi_way(
998                        (base_sha1, lca_sha1s), other_sha1, this_sha1,
999                        allow_overriding_lca=False)
1000                    exec_winner = self._lca_multi_way(
1001                        (base_ie.executable, lca_executable),
1002                        other_ie.executable, this_ie.executable)
1003                    if (parent_id_winner == 'this' and name_winner == 'this'
1004                            and sha1_winner == 'this' and exec_winner == 'this'):
1005                        # No kind, parent, name, exec, or content change for
1006                        # OTHER, so this node is not considered interesting
1007                        continue
1008                    if sha1_winner == 'this':
1009                        content_changed = False
1010                elif other_ie.kind == 'symlink':
1011                    def get_target(ie, tree, path):
1012                        if ie.kind != 'symlink':
1013                            return None
1014                        return tree.get_symlink_target(path)
1015                    base_target = get_target(base_ie, self.base_tree, base_path)
1016                    lca_targets = [get_target(ie, tree, lca_path) for ie, tree, lca_path
1017                                   in zip(lca_entries, self._lca_trees, lca_paths)]
1018                    this_target = get_target(
1019                        this_ie, self.this_tree, this_path)
1020                    other_target = get_target(
1021                        other_ie, self.other_tree, other_path)
1022                    target_winner = self._lca_multi_way(
1023                        (base_target, lca_targets),
1024                        other_target, this_target)
1025                    if (parent_id_winner == 'this' and name_winner == 'this'
1026                            and target_winner == 'this'):
1027                        # No kind, parent, name, or symlink target change
1028                        # not interesting
1029                        continue
1030                    if target_winner == 'this':
1031                        content_changed = False
1032                elif other_ie.kind == 'tree-reference':
1033                    # The 'changed' information seems to be handled at a higher
1034                    # level. At least, _entries3 returns False for content
1035                    # changed, even when at a new revision_id.
1036                    content_changed = False
1037                    if (parent_id_winner == 'this' and name_winner == 'this'):
1038                        # Nothing interesting
1039                        continue
1040                else:
1041                    raise AssertionError('unhandled kind: %s' % other_ie.kind)
1042
1043            # If we have gotten this far, that means something has changed
1044            yield (file_id, content_changed,
1045                           ((base_path, lca_paths),
1046                            other_path, this_path),
1047                           ((base_ie.parent_id, lca_parent_ids),
1048                            other_ie.parent_id, this_ie.parent_id),
1049                           ((base_ie.name, lca_names),
1050                            other_ie.name, this_ie.name),
1051                           ((base_ie.executable, lca_executable),
1052                            other_ie.executable, this_ie.executable),
1053                           # Copy detection is not yet supported, so nothing is
1054                           # a copy:
1055                           False
1056                           )
1057
1058    def write_modified(self, results):
1059        if not self.working_tree.supports_merge_modified():
1060            return
1061        modified_hashes = {}
1062        for path in results.modified_paths:
1063            wt_relpath = self.working_tree.relpath(path)
1064            if not self.working_tree.is_versioned(wt_relpath):
1065                continue
1066            hash = self.working_tree.get_file_sha1(wt_relpath)
1067            if hash is None:
1068                continue
1069            modified_hashes[wt_relpath] = hash
1070        self.working_tree.set_merge_modified(modified_hashes)
1071
1072    @staticmethod
1073    def parent(entry):
1074        """Determine the parent for a file_id (used as a key method)"""
1075        if entry is None:
1076            return None
1077        return entry.parent_id
1078
1079    @staticmethod
1080    def name(entry):
1081        """Determine the name for a file_id (used as a key method)"""
1082        if entry is None:
1083            return None
1084        return entry.name
1085
1086    @staticmethod
1087    def contents_sha1(tree, path):
1088        """Determine the sha1 of the file contents (used as a key method)."""
1089        try:
1090            return tree.get_file_sha1(path)
1091        except errors.NoSuchFile:
1092            return None
1093
1094    @staticmethod
1095    def executable(tree, path):
1096        """Determine the executability of a file-id (used as a key method)."""
1097        try:
1098            if tree.kind(path) != "file":
1099                return False
1100        except errors.NoSuchFile:
1101            return None
1102        return tree.is_executable(path)
1103
1104    @staticmethod
1105    def kind(tree, path):
1106        """Determine the kind of a file-id (used as a key method)."""
1107        try:
1108            return tree.kind(path)
1109        except errors.NoSuchFile:
1110            return None
1111
1112    @staticmethod
1113    def _three_way(base, other, this):
1114        if base == other:
1115            # if 'base == other', either they all agree, or only 'this' has
1116            # changed.
1117            return 'this'
1118        elif this not in (base, other):
1119            # 'this' is neither 'base' nor 'other', so both sides changed
1120            return 'conflict'
1121        elif this == other:
1122            # "Ambiguous clean merge" -- both sides have made the same change.
1123            return "this"
1124        else:
1125            # this == base: only other has changed.
1126            return "other"
1127
1128    @staticmethod
1129    def _lca_multi_way(bases, other, this, allow_overriding_lca=True):
1130        """Consider LCAs when determining whether a change has occurred.
1131
1132        If LCAS are all identical, this is the same as a _three_way comparison.
1133
1134        :param bases: value in (BASE, [LCAS])
1135        :param other: value in OTHER
1136        :param this: value in THIS
1137        :param allow_overriding_lca: If there is more than one unique lca
1138            value, allow OTHER to override THIS if it has a new value, and
1139            THIS only has an lca value, or vice versa. This is appropriate for
1140            truly scalar values, not as much for non-scalars.
1141        :return: 'this', 'other', or 'conflict' depending on whether an entry
1142            changed or not.
1143        """
1144        # See doc/developers/lca_tree_merging.txt for details about this
1145        # algorithm.
1146        if other == this:
1147            # Either Ambiguously clean, or nothing was actually changed. We
1148            # don't really care
1149            return 'this'
1150        base_val, lca_vals = bases
1151        # Remove 'base_val' from the lca_vals, because it is not interesting
1152        filtered_lca_vals = [lca_val for lca_val in lca_vals
1153                             if lca_val != base_val]
1154        if len(filtered_lca_vals) == 0:
1155            return Merge3Merger._three_way(base_val, other, this)
1156
1157        unique_lca_vals = set(filtered_lca_vals)
1158        if len(unique_lca_vals) == 1:
1159            return Merge3Merger._three_way(unique_lca_vals.pop(), other, this)
1160
1161        if allow_overriding_lca:
1162            if other in unique_lca_vals:
1163                if this in unique_lca_vals:
1164                    # Each side picked a different lca, conflict
1165                    return 'conflict'
1166                else:
1167                    # This has a value which supersedes both lca values, and
1168                    # other only has an lca value
1169                    return 'this'
1170            elif this in unique_lca_vals:
1171                # OTHER has a value which supersedes both lca values, and this
1172                # only has an lca value
1173                return 'other'
1174
1175        # At this point, the lcas disagree, and the tip disagree
1176        return 'conflict'
1177
1178    def _merge_names(self, trans_id, file_id, paths, parents, names, resolver):
1179        """Perform a merge on file names and parents"""
1180        base_name, other_name, this_name = names
1181        base_parent, other_parent, this_parent = parents
1182        unused_base_path, other_path, this_path = paths
1183
1184        name_winner = resolver(*names)
1185
1186        parent_id_winner = resolver(*parents)
1187        if this_name is None:
1188            if name_winner == "this":
1189                name_winner = "other"
1190            if parent_id_winner == "this":
1191                parent_id_winner = "other"
1192        if name_winner == "this" and parent_id_winner == "this":
1193            return
1194        if name_winner == 'conflict' or parent_id_winner == 'conflict':
1195            # Creating helpers (.OTHER or .THIS) here cause problems down the
1196            # road if a ContentConflict needs to be created so we should not do
1197            # that
1198            self._raw_conflicts.append(('path conflict', trans_id, file_id,
1199                                        this_parent, this_name,
1200                                        other_parent, other_name))
1201        if other_path is None:
1202            # it doesn't matter whether the result was 'other' or
1203            # 'conflict'-- if it has no file id, we leave it alone.
1204            return
1205        parent_id = parents[self.winner_idx[parent_id_winner]]
1206        name = names[self.winner_idx[name_winner]]
1207        if parent_id is not None or name is not None:
1208            # if we get here, name_winner and parent_winner are set to safe
1209            # values.
1210            if parent_id is None and name is not None:
1211                # if parent_id is None and name is non-None, current file is
1212                # the tree root.
1213                if names[self.winner_idx[parent_id_winner]] != '':
1214                    raise AssertionError(
1215                        'File looks like a root, but named %s' %
1216                        names[self.winner_idx[parent_id_winner]])
1217                parent_trans_id = transform.ROOT_PARENT
1218            else:
1219                parent_trans_id = self.tt.trans_id_file_id(parent_id)
1220            self.tt.adjust_path(name, parent_trans_id, trans_id)
1221
1222    def _do_merge_contents(self, paths, trans_id, file_id):
1223        """Performs a merge on file_id contents."""
1224        def contents_pair(tree, path):
1225            if path is None:
1226                return (None, None)
1227            try:
1228                kind = tree.kind(path)
1229            except errors.NoSuchFile:
1230                return (None, None)
1231            if kind == "file":
1232                contents = tree.get_file_sha1(path)
1233            elif kind == "symlink":
1234                contents = tree.get_symlink_target(path)
1235            else:
1236                contents = None
1237            return kind, contents
1238
1239        base_path, other_path, this_path = paths
1240        # See SPOT run.  run, SPOT, run.
1241        # So we're not QUITE repeating ourselves; we do tricky things with
1242        # file kind...
1243        other_pair = contents_pair(self.other_tree, other_path)
1244        this_pair = contents_pair(self.this_tree, this_path)
1245        if self._lca_trees:
1246            (base_path, lca_paths) = base_path
1247            base_pair = contents_pair(self.base_tree, base_path)
1248            lca_pairs = [contents_pair(tree, path)
1249                         for tree, path in zip(self._lca_trees, lca_paths)]
1250            winner = self._lca_multi_way((base_pair, lca_pairs), other_pair,
1251                                         this_pair, allow_overriding_lca=False)
1252        else:
1253            base_pair = contents_pair(self.base_tree, base_path)
1254            if base_pair == other_pair:
1255                winner = 'this'
1256            else:
1257                # We delayed evaluating this_pair as long as we can to avoid
1258                # unnecessary sha1 calculation
1259                this_pair = contents_pair(self.this_tree, this_path)
1260                winner = self._three_way(base_pair, other_pair, this_pair)
1261        if winner == 'this':
1262            # No interesting changes introduced by OTHER
1263            return "unmodified"
1264        # We have a hypothetical conflict, but if we have files, then we
1265        # can try to merge the content
1266        params = MergeFileHookParams(
1267            self, (base_path, other_path, this_path), trans_id, this_pair[0],
1268            other_pair[0], winner)
1269        hooks = self.active_hooks
1270        hook_status = 'not_applicable'
1271        for hook in hooks:
1272            hook_status, lines = hook.merge_contents(params)
1273            if hook_status != 'not_applicable':
1274                # Don't try any more hooks, this one applies.
1275                break
1276        # If the merge ends up replacing the content of the file, we get rid of
1277        # it at the end of this method (this variable is used to track the
1278        # exceptions to this rule).
1279        keep_this = False
1280        result = "modified"
1281        if hook_status == 'not_applicable':
1282            # No merge hook was able to resolve the situation. Two cases exist:
1283            # a content conflict or a duplicate one.
1284            result = None
1285            name = self.tt.final_name(trans_id)
1286            parent_id = self.tt.final_parent(trans_id)
1287            inhibit_content_conflict = False
1288            if params.this_kind is None:  # file_id is not in THIS
1289                # Is the name used for a different file_id ?
1290                if self.this_tree.is_versioned(other_path):
1291                    # Two entries for the same path
1292                    keep_this = True
1293                    # versioning the merged file will trigger a duplicate
1294                    # conflict
1295                    self.tt.version_file(trans_id, file_id=file_id)
1296                    transform.create_from_tree(
1297                        self.tt, trans_id, self.other_tree,
1298                        other_path,
1299                        filter_tree_path=self._get_filter_tree_path(other_path))
1300                    inhibit_content_conflict = True
1301            elif params.other_kind is None:  # file_id is not in OTHER
1302                # Is the name used for a different file_id ?
1303                if self.other_tree.is_versioned(this_path):
1304                    # Two entries for the same path again, but here, the other
1305                    # entry will also be merged.  We simply inhibit the
1306                    # 'content' conflict creation because we know OTHER will
1307                    # create (or has already created depending on ordering) an
1308                    # entry at the same path. This will trigger a 'duplicate'
1309                    # conflict later.
1310                    keep_this = True
1311                    inhibit_content_conflict = True
1312            if not inhibit_content_conflict:
1313                if params.this_kind is not None:
1314                    self.tt.unversion_file(trans_id)
1315                # This is a contents conflict, because none of the available
1316                # functions could merge it.
1317                file_group = self._dump_conflicts(
1318                    name, (base_path, other_path, this_path), parent_id)
1319                for tid in file_group:
1320                    self.tt.version_file(tid, file_id=file_id)
1321                    break
1322                self._raw_conflicts.append(('contents conflict', file_group))
1323        elif hook_status == 'success':
1324            self.tt.create_file(lines, trans_id)
1325        elif hook_status == 'conflicted':
1326            # XXX: perhaps the hook should be able to provide
1327            # the BASE/THIS/OTHER files?
1328            self.tt.create_file(lines, trans_id)
1329            self._raw_conflicts.append(('text conflict', trans_id))
1330            name = self.tt.final_name(trans_id)
1331            parent_id = self.tt.final_parent(trans_id)
1332            self._dump_conflicts(
1333                name, (base_path, other_path, this_path), parent_id)
1334        elif hook_status == 'delete':
1335            self.tt.unversion_file(trans_id)
1336            result = "deleted"
1337        elif hook_status == 'done':
1338            # The hook function did whatever it needs to do directly, no
1339            # further action needed here.
1340            pass
1341        else:
1342            raise AssertionError('unknown hook_status: %r' % (hook_status,))
1343        if not this_path and result == "modified":
1344            self.tt.version_file(trans_id, file_id=file_id)
1345        if not keep_this:
1346            # The merge has been performed and produced a new content, so the
1347            # old contents should not be retained.
1348            self.tt.delete_contents(trans_id)
1349        return result
1350
1351    def _default_other_winner_merge(self, merge_hook_params):
1352        """Replace this contents with other."""
1353        trans_id = merge_hook_params.trans_id
1354        if merge_hook_params.other_path is not None:
1355            # OTHER changed the file
1356            transform.create_from_tree(
1357                self.tt, trans_id, self.other_tree,
1358                merge_hook_params.other_path,
1359                filter_tree_path=self._get_filter_tree_path(merge_hook_params.other_path))
1360            return 'done', None
1361        elif merge_hook_params.this_path is not None:
1362            # OTHER deleted the file
1363            return 'delete', None
1364        else:
1365            raise AssertionError(
1366                'winner is OTHER, but file %r not in THIS or OTHER tree'
1367                % (merge_hook_params.base_path,))
1368
1369    def merge_contents(self, merge_hook_params):
1370        """Fallback merge logic after user installed hooks."""
1371        # This function is used in merge hooks as the fallback instance.
1372        # Perhaps making this function and the functions it calls be a
1373        # a separate class would be better.
1374        if merge_hook_params.winner == 'other':
1375            # OTHER is a straight winner, so replace this contents with other
1376            return self._default_other_winner_merge(merge_hook_params)
1377        elif merge_hook_params.is_file_merge():
1378            # THIS and OTHER are both files, so text merge.  Either
1379            # BASE is a file, or both converted to files, so at least we
1380            # have agreement that output should be a file.
1381            try:
1382                self.text_merge(merge_hook_params.trans_id,
1383                                merge_hook_params.paths)
1384            except errors.BinaryFile:
1385                return 'not_applicable', None
1386            return 'done', None
1387        else:
1388            return 'not_applicable', None
1389
1390    def get_lines(self, tree, path):
1391        """Return the lines in a file, or an empty list."""
1392        if path is None:
1393            return []
1394        try:
1395            kind = tree.kind(path)
1396        except errors.NoSuchFile:
1397            return []
1398        else:
1399            if kind != 'file':
1400                return []
1401            return tree.get_file_lines(path)
1402
1403    def text_merge(self, trans_id, paths):
1404        """Perform a three-way text merge on a file"""
1405        # it's possible that we got here with base as a different type.
1406        # if so, we just want two-way text conflicts.
1407        base_path, other_path, this_path = paths
1408        base_lines = self.get_lines(self.base_tree, base_path)
1409        other_lines = self.get_lines(self.other_tree, other_path)
1410        this_lines = self.get_lines(self.this_tree, this_path)
1411        m3 = merge3.Merge3(base_lines, this_lines, other_lines,
1412                           is_cherrypick=self.cherrypick)
1413        start_marker = b"!START OF MERGE CONFLICT!" + b"I HOPE THIS IS UNIQUE"
1414        if self.show_base is True:
1415            base_marker = b'|' * 7
1416        else:
1417            base_marker = None
1418
1419        def iter_merge3(retval):
1420            retval["text_conflicts"] = False
1421            for line in m3.merge_lines(name_a=b"TREE",
1422                                       name_b=b"MERGE-SOURCE",
1423                                       name_base=b"BASE-REVISION",
1424                                       start_marker=start_marker,
1425                                       base_marker=base_marker,
1426                                       reprocess=self.reprocess):
1427                if line.startswith(start_marker):
1428                    retval["text_conflicts"] = True
1429                    yield line.replace(start_marker, b'<' * 7)
1430                else:
1431                    yield line
1432        retval = {}
1433        merge3_iterator = iter_merge3(retval)
1434        self.tt.create_file(merge3_iterator, trans_id)
1435        if retval["text_conflicts"] is True:
1436            self._raw_conflicts.append(('text conflict', trans_id))
1437            name = self.tt.final_name(trans_id)
1438            parent_id = self.tt.final_parent(trans_id)
1439            file_group = self._dump_conflicts(
1440                name, paths, parent_id,
1441                lines=(base_lines, other_lines, this_lines))
1442            file_group.append(trans_id)
1443
1444    def _get_filter_tree_path(self, path):
1445        if self.this_tree.supports_content_filtering():
1446            # We get the path from the working tree if it exists.
1447            # That fails though when OTHER is adding a file, so
1448            # we fall back to the other tree to find the path if
1449            # it doesn't exist locally.
1450            filter_path = _mod_tree.find_previous_path(
1451                self.other_tree, self.working_tree, path)
1452            if filter_path is None:
1453                filter_path = path
1454            return filter_path
1455        # Skip the lookup for older formats
1456        return None
1457
1458    def _dump_conflicts(self, name, paths, parent_id, lines=None,
1459                        no_base=False):
1460        """Emit conflict files.
1461        If this_lines, base_lines, or other_lines are omitted, they will be
1462        determined automatically.  If set_version is true, the .OTHER, .THIS
1463        or .BASE (in that order) will be created as versioned files.
1464        """
1465        base_path, other_path, this_path = paths
1466        if lines:
1467            base_lines, other_lines, this_lines = lines
1468        else:
1469            base_lines = other_lines = this_lines = None
1470        data = [('OTHER', self.other_tree, other_path, other_lines),
1471                ('THIS', self.this_tree, this_path, this_lines)]
1472        if not no_base:
1473            data.append(('BASE', self.base_tree, base_path, base_lines))
1474
1475        # We need to use the actual path in the working tree of the file here,
1476        if self.this_tree.supports_content_filtering():
1477            filter_tree_path = this_path
1478        else:
1479            filter_tree_path = None
1480
1481        file_group = []
1482        for suffix, tree, path, lines in data:
1483            if path is not None:
1484                trans_id = self._conflict_file(
1485                    name, parent_id, path, tree, suffix, lines,
1486                    filter_tree_path)
1487                file_group.append(trans_id)
1488        return file_group
1489
1490    def _conflict_file(self, name, parent_id, path, tree, suffix,
1491                       lines=None, filter_tree_path=None):
1492        """Emit a single conflict file."""
1493        name = name + '.' + suffix
1494        trans_id = self.tt.create_path(name, parent_id)
1495        transform.create_from_tree(
1496            self.tt, trans_id, tree, path,
1497            chunks=lines,
1498            filter_tree_path=filter_tree_path)
1499        return trans_id
1500
1501    def _merge_executable(self, paths, trans_id, executable, file_status,
1502                          resolver):
1503        """Perform a merge on the execute bit."""
1504        base_executable, other_executable, this_executable = executable
1505        base_path, other_path, this_path = paths
1506        if file_status == "deleted":
1507            return
1508        winner = resolver(*executable)
1509        if winner == "conflict":
1510            # There must be a None in here, if we have a conflict, but we
1511            # need executability since file status was not deleted.
1512            if other_path is None:
1513                winner = "this"
1514            else:
1515                winner = "other"
1516        if winner == 'this' and file_status != "modified":
1517            return
1518        if self.tt.final_kind(trans_id) != "file":
1519            return
1520        if winner == "this":
1521            executability = this_executable
1522        else:
1523            if other_path is not None:
1524                executability = other_executable
1525            elif this_path is not None:
1526                executability = this_executable
1527            elif base_path is not None:
1528                executability = base_executable
1529        if executability is not None:
1530            self.tt.set_executability(executability, trans_id)
1531
1532    def cook_conflicts(self, fs_conflicts):
1533        """Convert all conflicts into a form that doesn't depend on trans_id"""
1534        self.cooked_conflicts = list(self.tt.cook_conflicts(
1535            list(fs_conflicts) + self._raw_conflicts))
1536
1537
1538class WeaveMerger(Merge3Merger):
1539    """Three-way tree merger, text weave merger."""
1540    supports_reprocess = True
1541    supports_show_base = False
1542    supports_reverse_cherrypick = False
1543    history_based = True
1544    requires_file_merge_plan = True
1545
1546    def _generate_merge_plan(self, this_path, base):
1547        return self.this_tree.plan_file_merge(this_path, self.other_tree,
1548                                              base=base)
1549
1550    def _merged_lines(self, this_path):
1551        """Generate the merged lines.
1552        There is no distinction between lines that are meant to contain <<<<<<<
1553        and conflicts.
1554        """
1555        if self.cherrypick:
1556            base = self.base_tree
1557        else:
1558            base = None
1559        plan = self._generate_merge_plan(this_path, base)
1560        if 'merge' in debug.debug_flags:
1561            plan = list(plan)
1562            trans_id = self.tt.trans_id_file_id(file_id)
1563            name = self.tt.final_name(trans_id) + '.plan'
1564            contents = (b'%11s|%s' % l for l in plan)
1565            self.tt.new_file(name, self.tt.final_parent(trans_id), contents)
1566        textmerge = versionedfile.PlanWeaveMerge(plan, b'<<<<<<< TREE\n',
1567                                                 b'>>>>>>> MERGE-SOURCE\n')
1568        lines, conflicts = textmerge.merge_lines(self.reprocess)
1569        if conflicts:
1570            base_lines = textmerge.base_from_plan()
1571        else:
1572            base_lines = None
1573        return lines, base_lines
1574
1575    def text_merge(self, trans_id, paths):
1576        """Perform a (weave) text merge for a given file and file-id.
1577        If conflicts are encountered, .THIS and .OTHER files will be emitted,
1578        and a conflict will be noted.
1579        """
1580        base_path, other_path, this_path = paths
1581        lines, base_lines = self._merged_lines(this_path)
1582        lines = list(lines)
1583        # Note we're checking whether the OUTPUT is binary in this case,
1584        # because we don't want to get into weave merge guts.
1585        textfile.check_text_lines(lines)
1586        self.tt.create_file(lines, trans_id)
1587        if base_lines is not None:
1588            # Conflict
1589            self._raw_conflicts.append(('text conflict', trans_id))
1590            name = self.tt.final_name(trans_id)
1591            parent_id = self.tt.final_parent(trans_id)
1592            file_group = self._dump_conflicts(name, paths, parent_id,
1593                                              (base_lines, None, None),
1594                                              no_base=False)
1595            file_group.append(trans_id)
1596
1597
1598class LCAMerger(WeaveMerger):
1599
1600    requires_file_merge_plan = True
1601
1602    def _generate_merge_plan(self, this_path, base):
1603        return self.this_tree.plan_file_lca_merge(this_path, self.other_tree,
1604                                                  base=base)
1605
1606
1607class Diff3Merger(Merge3Merger):
1608    """Three-way merger using external diff3 for text merging"""
1609
1610    requires_file_merge_plan = False
1611
1612    def dump_file(self, temp_dir, name, tree, path):
1613        out_path = osutils.pathjoin(temp_dir, name)
1614        with open(out_path, "wb") as out_file:
1615            in_file = tree.get_file(path)
1616            for line in in_file:
1617                out_file.write(line)
1618        return out_path
1619
1620    def text_merge(self, trans_id, paths):
1621        """Perform a diff3 merge using a specified file-id and trans-id.
1622        If conflicts are encountered, .BASE, .THIS. and .OTHER conflict files
1623        will be dumped, and a will be conflict noted.
1624        """
1625        import breezy.patch
1626        base_path, other_path, this_path = paths
1627        temp_dir = osutils.mkdtemp(prefix="bzr-")
1628        try:
1629            new_file = osutils.pathjoin(temp_dir, "new")
1630            this = self.dump_file(
1631                temp_dir, "this", self.this_tree, this_path)
1632            base = self.dump_file(
1633                temp_dir, "base", self.base_tree, base_path)
1634            other = self.dump_file(
1635                temp_dir, "other", self.other_tree, other_path)
1636            status = breezy.patch.diff3(new_file, this, base, other)
1637            if status not in (0, 1):
1638                raise errors.BzrError("Unhandled diff3 exit code")
1639            with open(new_file, 'rb') as f:
1640                self.tt.create_file(f, trans_id)
1641            if status == 1:
1642                name = self.tt.final_name(trans_id)
1643                parent_id = self.tt.final_parent(trans_id)
1644                self._dump_conflicts(name, paths, parent_id)
1645                self._raw_conflicts.append(('text conflict', trans_id))
1646        finally:
1647            osutils.rmtree(temp_dir)
1648
1649
1650class PathNotInTree(errors.BzrError):
1651
1652    _fmt = """Merge-into failed because %(tree)s does not contain %(path)s."""
1653
1654    def __init__(self, path, tree):
1655        errors.BzrError.__init__(self, path=path, tree=tree)
1656
1657
1658class MergeIntoMerger(Merger):
1659    """Merger that understands other_tree will be merged into a subdir.
1660
1661    This also changes the Merger api so that it uses real Branch, revision_id,
1662    and RevisonTree objects, rather than using revision specs.
1663    """
1664
1665    def __init__(self, this_tree, other_branch, other_tree, target_subdir,
1666                 source_subpath, other_rev_id=None):
1667        """Create a new MergeIntoMerger object.
1668
1669        source_subpath in other_tree will be effectively copied to
1670        target_subdir in this_tree.
1671
1672        :param this_tree: The tree that we will be merging into.
1673        :param other_branch: The Branch we will be merging from.
1674        :param other_tree: The RevisionTree object we want to merge.
1675        :param target_subdir: The relative path where we want to merge
1676            other_tree into this_tree
1677        :param source_subpath: The relative path specifying the subtree of
1678            other_tree to merge into this_tree.
1679        """
1680        # It is assumed that we are merging a tree that is not in our current
1681        # ancestry, which means we are using the "EmptyTree" as our basis.
1682        null_ancestor_tree = this_tree.branch.repository.revision_tree(
1683            _mod_revision.NULL_REVISION)
1684        super(MergeIntoMerger, self).__init__(
1685            this_branch=this_tree.branch,
1686            this_tree=this_tree,
1687            other_tree=other_tree,
1688            base_tree=null_ancestor_tree,
1689            )
1690        self._target_subdir = target_subdir
1691        self._source_subpath = source_subpath
1692        self.other_branch = other_branch
1693        if other_rev_id is None:
1694            other_rev_id = other_tree.get_revision_id()
1695        self.other_rev_id = self.other_basis = other_rev_id
1696        self.base_is_ancestor = True
1697        self.backup_files = True
1698        self.merge_type = Merge3Merger
1699        self.show_base = False
1700        self.reprocess = False
1701        self.interesting_files = None
1702        self.merge_type = _MergeTypeParameterizer(MergeIntoMergeType,
1703                                                  target_subdir=self._target_subdir,
1704                                                  source_subpath=self._source_subpath)
1705        if self._source_subpath != '':
1706            # If this isn't a partial merge make sure the revisions will be
1707            # present.
1708            self._maybe_fetch(self.other_branch, self.this_branch,
1709                              self.other_basis)
1710
1711    def set_pending(self):
1712        if self._source_subpath != '':
1713            return
1714        Merger.set_pending(self)
1715
1716
1717class _MergeTypeParameterizer(object):
1718    """Wrap a merge-type class to provide extra parameters.
1719
1720    This is hack used by MergeIntoMerger to pass some extra parameters to its
1721    merge_type.  Merger.do_merge() sets up its own set of parameters to pass to
1722    the 'merge_type' member.  It is difficult override do_merge without
1723    re-writing the whole thing, so instead we create a wrapper which will pass
1724    the extra parameters.
1725    """
1726
1727    def __init__(self, merge_type, **kwargs):
1728        self._extra_kwargs = kwargs
1729        self._merge_type = merge_type
1730
1731    def __call__(self, *args, **kwargs):
1732        kwargs.update(self._extra_kwargs)
1733        return self._merge_type(*args, **kwargs)
1734
1735    def __getattr__(self, name):
1736        return getattr(self._merge_type, name)
1737
1738
1739class MergeIntoMergeType(Merge3Merger):
1740    """Merger that incorporates a tree (or part of a tree) into another."""
1741
1742    def __init__(self, *args, **kwargs):
1743        """Initialize the merger object.
1744
1745        :param args: See Merge3Merger.__init__'s args.
1746        :param kwargs: See Merge3Merger.__init__'s keyword args, except for
1747            source_subpath and target_subdir.
1748        :keyword source_subpath: The relative path specifying the subtree of
1749            other_tree to merge into this_tree.
1750        :keyword target_subdir: The relative path where we want to merge
1751            other_tree into this_tree
1752        """
1753        # All of the interesting work happens during Merge3Merger.__init__(),
1754        # so we have have to hack in to get our extra parameters set.
1755        self._source_subpath = kwargs.pop('source_subpath')
1756        self._target_subdir = kwargs.pop('target_subdir')
1757        super(MergeIntoMergeType, self).__init__(*args, **kwargs)
1758
1759    def _compute_transform(self):
1760        with ui.ui_factory.nested_progress_bar() as child_pb:
1761            entries = self._entries_to_incorporate()
1762            entries = list(entries)
1763            for num, (entry, parent_id, relpath) in enumerate(entries):
1764                child_pb.update(gettext('Preparing file merge'),
1765                                num, len(entries))
1766                parent_trans_id = self.tt.trans_id_file_id(parent_id)
1767                path = osutils.pathjoin(self._source_subpath, relpath)
1768                trans_id = transform.new_by_entry(path, self.tt, entry,
1769                                                  parent_trans_id, self.other_tree)
1770        self._finish_computing_transform()
1771
1772    def _entries_to_incorporate(self):
1773        """Yields pairs of (inventory_entry, new_parent)."""
1774        subdir_id = self.other_tree.path2id(self._source_subpath)
1775        if subdir_id is None:
1776            # XXX: The error would be clearer if it gave the URL of the source
1777            # branch, but we don't have a reference to that here.
1778            raise PathNotInTree(self._source_subpath, "Source tree")
1779        subdir = next(self.other_tree.iter_entries_by_dir(
1780            specific_files=[self._source_subpath]))[1]
1781        parent_in_target = osutils.dirname(self._target_subdir)
1782        target_id = self.this_tree.path2id(parent_in_target)
1783        if target_id is None:
1784            raise PathNotInTree(self._target_subdir, "Target tree")
1785        name_in_target = osutils.basename(self._target_subdir)
1786        merge_into_root = subdir.copy()
1787        merge_into_root.name = name_in_target
1788        try:
1789            self.this_tree.id2path(merge_into_root.file_id)
1790        except errors.NoSuchId:
1791            pass
1792        else:
1793            # Give the root a new file-id.
1794            # This can happen fairly easily if the directory we are
1795            # incorporating is the root, and both trees have 'TREE_ROOT' as
1796            # their root_id.  Users will expect this to Just Work, so we
1797            # change the file-id here.
1798            # Non-root file-ids could potentially conflict too.  That's really
1799            # an edge case, so we don't do anything special for those.  We let
1800            # them cause conflicts.
1801            merge_into_root.file_id = generate_ids.gen_file_id(name_in_target)
1802        yield (merge_into_root, target_id, '')
1803        if subdir.kind != 'directory':
1804            # No children, so we are done.
1805            return
1806        for path, entry in self.other_tree.root_inventory.iter_entries_by_dir(subdir_id):
1807            parent_id = entry.parent_id
1808            if parent_id == subdir.file_id:
1809                # The root's parent ID has changed, so make sure children of
1810                # the root refer to the new ID.
1811                parent_id = merge_into_root.file_id
1812            yield (entry, parent_id, path)
1813
1814
1815def merge_inner(this_branch, other_tree, base_tree, ignore_zero=False,
1816                backup_files=False,
1817                merge_type=Merge3Merger,
1818                show_base=False,
1819                reprocess=False,
1820                other_rev_id=None,
1821                interesting_files=None,
1822                this_tree=None,
1823                change_reporter=None):
1824    """Primary interface for merging.
1825
1826    Typical use is probably::
1827
1828        merge_inner(branch, branch.get_revision_tree(other_revision),
1829                    branch.get_revision_tree(base_revision))
1830    """
1831    if this_tree is None:
1832        raise errors.BzrError("breezy.merge.merge_inner requires a this_tree "
1833                              "parameter")
1834    merger = Merger(this_branch, other_tree, base_tree, this_tree=this_tree,
1835                    change_reporter=change_reporter)
1836    merger.backup_files = backup_files
1837    merger.merge_type = merge_type
1838    merger.ignore_zero = ignore_zero
1839    merger.interesting_files = interesting_files
1840    merger.show_base = show_base
1841    merger.reprocess = reprocess
1842    merger.other_rev_id = other_rev_id
1843    merger.other_basis = other_rev_id
1844    get_revision_id = getattr(base_tree, 'get_revision_id', None)
1845    if get_revision_id is None:
1846        get_revision_id = base_tree.last_revision
1847    merger.cache_trees_with_revision_ids([other_tree, base_tree, this_tree])
1848    merger.set_base_revision(get_revision_id(), this_branch)
1849    return merger.do_merge()
1850
1851
1852merge_type_registry = registry.Registry()
1853merge_type_registry.register('diff3', Diff3Merger,
1854                             "Merge using external diff3.")
1855merge_type_registry.register('lca', LCAMerger,
1856                             "LCA-newness merge.")
1857merge_type_registry.register('merge3', Merge3Merger,
1858                             "Native diff3-style merge.")
1859merge_type_registry.register('weave', WeaveMerger,
1860                             "Weave-based merge.")
1861
1862
1863def get_merge_type_registry():
1864    """Merge type registry was previously in breezy.option
1865
1866    This method provides a backwards compatible way to retrieve it.
1867    """
1868    return merge_type_registry
1869
1870
1871def _plan_annotate_merge(annotated_a, annotated_b, ancestors_a, ancestors_b):
1872    def status_a(revision, text):
1873        if revision in ancestors_b:
1874            return 'killed-b', text
1875        else:
1876            return 'new-a', text
1877
1878    def status_b(revision, text):
1879        if revision in ancestors_a:
1880            return 'killed-a', text
1881        else:
1882            return 'new-b', text
1883
1884    plain_a = [t for (a, t) in annotated_a]
1885    plain_b = [t for (a, t) in annotated_b]
1886    matcher = patiencediff.PatienceSequenceMatcher(None, plain_a, plain_b)
1887    blocks = matcher.get_matching_blocks()
1888    a_cur = 0
1889    b_cur = 0
1890    for ai, bi, l in blocks:
1891        # process all mismatched sections
1892        # (last mismatched section is handled because blocks always
1893        # includes a 0-length last block)
1894        for revision, text in annotated_a[a_cur:ai]:
1895            yield status_a(revision, text)
1896        for revision, text in annotated_b[b_cur:bi]:
1897            yield status_b(revision, text)
1898        # and now the matched section
1899        a_cur = ai + l
1900        b_cur = bi + l
1901        for text_a in plain_a[ai:a_cur]:
1902            yield "unchanged", text_a
1903
1904
1905class _PlanMergeBase(object):
1906
1907    def __init__(self, a_rev, b_rev, vf, key_prefix):
1908        """Contructor.
1909
1910        :param a_rev: Revision-id of one revision to merge
1911        :param b_rev: Revision-id of the other revision to merge
1912        :param vf: A VersionedFiles containing both revisions
1913        :param key_prefix: A prefix for accessing keys in vf, typically
1914            (file_id,).
1915        """
1916        self.a_rev = a_rev
1917        self.b_rev = b_rev
1918        self.vf = vf
1919        self._last_lines = None
1920        self._last_lines_revision_id = None
1921        self._cached_matching_blocks = {}
1922        self._key_prefix = key_prefix
1923        self._precache_tip_lines()
1924
1925    def _precache_tip_lines(self):
1926        lines = self.get_lines([self.a_rev, self.b_rev])
1927        self.lines_a = lines[self.a_rev]
1928        self.lines_b = lines[self.b_rev]
1929
1930    def get_lines(self, revisions):
1931        """Get lines for revisions from the backing VersionedFiles.
1932
1933        :raises RevisionNotPresent: on absent texts.
1934        """
1935        keys = [(self._key_prefix + (rev,)) for rev in revisions]
1936        result = {}
1937        for record in self.vf.get_record_stream(keys, 'unordered', True):
1938            if record.storage_kind == 'absent':
1939                raise errors.RevisionNotPresent(record.key, self.vf)
1940            result[record.key[-1]] = record.get_bytes_as('lines')
1941        return result
1942
1943    def plan_merge(self):
1944        """Generate a 'plan' for merging the two revisions.
1945
1946        This involves comparing their texts and determining the cause of
1947        differences.  If text A has a line and text B does not, then either the
1948        line was added to text A, or it was deleted from B.  Once the causes
1949        are combined, they are written out in the format described in
1950        VersionedFile.plan_merge
1951        """
1952        blocks = self._get_matching_blocks(self.a_rev, self.b_rev)
1953        unique_a, unique_b = self._unique_lines(blocks)
1954        new_a, killed_b = self._determine_status(self.a_rev, unique_a)
1955        new_b, killed_a = self._determine_status(self.b_rev, unique_b)
1956        return self._iter_plan(blocks, new_a, killed_b, new_b, killed_a)
1957
1958    def _iter_plan(self, blocks, new_a, killed_b, new_b, killed_a):
1959        last_i = 0
1960        last_j = 0
1961        for i, j, n in blocks:
1962            for a_index in range(last_i, i):
1963                if a_index in new_a:
1964                    if a_index in killed_b:
1965                        yield 'conflicted-a', self.lines_a[a_index]
1966                    else:
1967                        yield 'new-a', self.lines_a[a_index]
1968                else:
1969                    yield 'killed-b', self.lines_a[a_index]
1970            for b_index in range(last_j, j):
1971                if b_index in new_b:
1972                    if b_index in killed_a:
1973                        yield 'conflicted-b', self.lines_b[b_index]
1974                    else:
1975                        yield 'new-b', self.lines_b[b_index]
1976                else:
1977                    yield 'killed-a', self.lines_b[b_index]
1978            # handle common lines
1979            for a_index in range(i, i + n):
1980                yield 'unchanged', self.lines_a[a_index]
1981            last_i = i + n
1982            last_j = j + n
1983
1984    def _get_matching_blocks(self, left_revision, right_revision):
1985        """Return a description of which sections of two revisions match.
1986
1987        See SequenceMatcher.get_matching_blocks
1988        """
1989        cached = self._cached_matching_blocks.get((left_revision,
1990                                                   right_revision))
1991        if cached is not None:
1992            return cached
1993        if self._last_lines_revision_id == left_revision:
1994            left_lines = self._last_lines
1995            right_lines = self.get_lines([right_revision])[right_revision]
1996        else:
1997            lines = self.get_lines([left_revision, right_revision])
1998            left_lines = lines[left_revision]
1999            right_lines = lines[right_revision]
2000        self._last_lines = right_lines
2001        self._last_lines_revision_id = right_revision
2002        matcher = patiencediff.PatienceSequenceMatcher(None, left_lines,
2003                                                       right_lines)
2004        return matcher.get_matching_blocks()
2005
2006    def _unique_lines(self, matching_blocks):
2007        """Analyse matching_blocks to determine which lines are unique
2008
2009        :return: a tuple of (unique_left, unique_right), where the values are
2010            sets of line numbers of unique lines.
2011        """
2012        last_i = 0
2013        last_j = 0
2014        unique_left = []
2015        unique_right = []
2016        for i, j, n in matching_blocks:
2017            unique_left.extend(range(last_i, i))
2018            unique_right.extend(range(last_j, j))
2019            last_i = i + n
2020            last_j = j + n
2021        return unique_left, unique_right
2022
2023    @staticmethod
2024    def _subtract_plans(old_plan, new_plan):
2025        """Remove changes from new_plan that came from old_plan.
2026
2027        It is assumed that the difference between the old_plan and new_plan
2028        is their choice of 'b' text.
2029
2030        All lines from new_plan that differ from old_plan are emitted
2031        verbatim.  All lines from new_plan that match old_plan but are
2032        not about the 'b' revision are emitted verbatim.
2033
2034        Lines that match and are about the 'b' revision are the lines we
2035        don't want, so we convert 'killed-b' -> 'unchanged', and 'new-b'
2036        is skipped entirely.
2037        """
2038        matcher = patiencediff.PatienceSequenceMatcher(None, old_plan,
2039                                                       new_plan)
2040        last_j = 0
2041        for i, j, n in matcher.get_matching_blocks():
2042            for jj in range(last_j, j):
2043                yield new_plan[jj]
2044            for jj in range(j, j + n):
2045                plan_line = new_plan[jj]
2046                if plan_line[0] == 'new-b':
2047                    pass
2048                elif plan_line[0] == 'killed-b':
2049                    yield 'unchanged', plan_line[1]
2050                else:
2051                    yield plan_line
2052            last_j = j + n
2053
2054
2055class _PlanMerge(_PlanMergeBase):
2056    """Plan an annotate merge using on-the-fly annotation"""
2057
2058    def __init__(self, a_rev, b_rev, vf, key_prefix):
2059        super(_PlanMerge, self).__init__(a_rev, b_rev, vf, key_prefix)
2060        self.a_key = self._key_prefix + (self.a_rev,)
2061        self.b_key = self._key_prefix + (self.b_rev,)
2062        self.graph = _mod_graph.Graph(self.vf)
2063        heads = self.graph.heads((self.a_key, self.b_key))
2064        if len(heads) == 1:
2065            # one side dominates, so we can just return its values, yay for
2066            # per-file graphs
2067            # Ideally we would know that before we get this far
2068            self._head_key = heads.pop()
2069            if self._head_key == self.a_key:
2070                other = b_rev
2071            else:
2072                other = a_rev
2073            trace.mutter('found dominating revision for %s\n%s > %s', self.vf,
2074                         self._head_key[-1], other)
2075            self._weave = None
2076        else:
2077            self._head_key = None
2078            self._build_weave()
2079
2080    def _precache_tip_lines(self):
2081        # Turn this into a no-op, because we will do this later
2082        pass
2083
2084    def _find_recursive_lcas(self):
2085        """Find all the ancestors back to a unique lca"""
2086        cur_ancestors = (self.a_key, self.b_key)
2087        # graph.find_lca(uncommon, keys) now returns plain NULL_REVISION,
2088        # rather than a key tuple. We will just map that directly to no common
2089        # ancestors.
2090        parent_map = {}
2091        while True:
2092            next_lcas = self.graph.find_lca(*cur_ancestors)
2093            # Map a plain NULL_REVISION to a simple no-ancestors
2094            if next_lcas == {_mod_revision.NULL_REVISION}:
2095                next_lcas = ()
2096            # Order the lca's based on when they were merged into the tip
2097            # While the actual merge portion of weave merge uses a set() of
2098            # active revisions, the order of insertion *does* effect the
2099            # implicit ordering of the texts.
2100            for rev_key in cur_ancestors:
2101                ordered_parents = tuple(self.graph.find_merge_order(rev_key,
2102                                                                    next_lcas))
2103                parent_map[rev_key] = ordered_parents
2104            if len(next_lcas) == 0:
2105                break
2106            elif len(next_lcas) == 1:
2107                parent_map[list(next_lcas)[0]] = ()
2108                break
2109            elif len(next_lcas) > 2:
2110                # More than 2 lca's, fall back to grabbing all nodes between
2111                # this and the unique lca.
2112                trace.mutter('More than 2 LCAs, falling back to all nodes for:'
2113                             ' %s, %s\n=> %s',
2114                             self.a_key, self.b_key, cur_ancestors)
2115                cur_lcas = next_lcas
2116                while len(cur_lcas) > 1:
2117                    cur_lcas = self.graph.find_lca(*cur_lcas)
2118                if len(cur_lcas) == 0:
2119                    # No common base to find, use the full ancestry
2120                    unique_lca = None
2121                else:
2122                    unique_lca = list(cur_lcas)[0]
2123                    if unique_lca == _mod_revision.NULL_REVISION:
2124                        # find_lca will return a plain 'NULL_REVISION' rather
2125                        # than a key tuple when there is no common ancestor, we
2126                        # prefer to just use None, because it doesn't confuse
2127                        # _get_interesting_texts()
2128                        unique_lca = None
2129                parent_map.update(self._find_unique_parents(next_lcas,
2130                                                            unique_lca))
2131                break
2132            cur_ancestors = next_lcas
2133        return parent_map
2134
2135    def _find_unique_parents(self, tip_keys, base_key):
2136        """Find ancestors of tip that aren't ancestors of base.
2137
2138        :param tip_keys: Nodes that are interesting
2139        :param base_key: Cull all ancestors of this node
2140        :return: The parent map for all revisions between tip_keys and
2141            base_key. base_key will be included. References to nodes outside of
2142            the ancestor set will also be removed.
2143        """
2144        # TODO: this would be simpler if find_unique_ancestors took a list
2145        #       instead of a single tip, internally it supports it, but it
2146        #       isn't a "backwards compatible" api change.
2147        if base_key is None:
2148            parent_map = dict(self.graph.iter_ancestry(tip_keys))
2149            # We remove NULL_REVISION because it isn't a proper tuple key, and
2150            # thus confuses things like _get_interesting_texts, and our logic
2151            # to add the texts into the memory weave.
2152            if _mod_revision.NULL_REVISION in parent_map:
2153                parent_map.pop(_mod_revision.NULL_REVISION)
2154        else:
2155            interesting = set()
2156            for tip in tip_keys:
2157                interesting.update(
2158                    self.graph.find_unique_ancestors(tip, [base_key]))
2159            parent_map = self.graph.get_parent_map(interesting)
2160            parent_map[base_key] = ()
2161        culled_parent_map, child_map, tails = self._remove_external_references(
2162            parent_map)
2163        # Remove all the tails but base_key
2164        if base_key is not None:
2165            tails.remove(base_key)
2166            self._prune_tails(culled_parent_map, child_map, tails)
2167        # Now remove all the uninteresting 'linear' regions
2168        simple_map = _mod_graph.collapse_linear_regions(culled_parent_map)
2169        return simple_map
2170
2171    @staticmethod
2172    def _remove_external_references(parent_map):
2173        """Remove references that go outside of the parent map.
2174
2175        :param parent_map: Something returned from Graph.get_parent_map(keys)
2176        :return: (filtered_parent_map, child_map, tails)
2177            filtered_parent_map is parent_map without external references
2178            child_map is the {parent_key: [child_keys]} mapping
2179            tails is a list of nodes that do not have any parents in the map
2180        """
2181        # TODO: The basic effect of this function seems more generic than
2182        #       _PlanMerge. But the specific details of building a child_map,
2183        #       and computing tails seems very specific to _PlanMerge.
2184        #       Still, should this be in Graph land?
2185        filtered_parent_map = {}
2186        child_map = {}
2187        tails = []
2188        for key, parent_keys in parent_map.items():
2189            culled_parent_keys = [p for p in parent_keys if p in parent_map]
2190            if not culled_parent_keys:
2191                tails.append(key)
2192            for parent_key in culled_parent_keys:
2193                child_map.setdefault(parent_key, []).append(key)
2194            # TODO: Do we want to do this, it adds overhead for every node,
2195            #       just to say that the node has no children
2196            child_map.setdefault(key, [])
2197            filtered_parent_map[key] = culled_parent_keys
2198        return filtered_parent_map, child_map, tails
2199
2200    @staticmethod
2201    def _prune_tails(parent_map, child_map, tails_to_remove):
2202        """Remove tails from the parent map.
2203
2204        This will remove the supplied revisions until no more children have 0
2205        parents.
2206
2207        :param parent_map: A dict of {child: [parents]}, this dictionary will
2208            be modified in place.
2209        :param tails_to_remove: A list of tips that should be removed,
2210            this list will be consumed
2211        :param child_map: The reverse dict of parent_map ({parent: [children]})
2212            this dict will be modified
2213        :return: None, parent_map will be modified in place.
2214        """
2215        while tails_to_remove:
2216            next = tails_to_remove.pop()
2217            parent_map.pop(next)
2218            children = child_map.pop(next)
2219            for child in children:
2220                child_parents = parent_map[child]
2221                child_parents.remove(next)
2222                if len(child_parents) == 0:
2223                    tails_to_remove.append(child)
2224
2225    def _get_interesting_texts(self, parent_map):
2226        """Return a dict of texts we are interested in.
2227
2228        Note that the input is in key tuples, but the output is in plain
2229        revision ids.
2230
2231        :param parent_map: The output from _find_recursive_lcas
2232        :return: A dict of {'revision_id':lines} as returned by
2233            _PlanMergeBase.get_lines()
2234        """
2235        all_revision_keys = set(parent_map)
2236        all_revision_keys.add(self.a_key)
2237        all_revision_keys.add(self.b_key)
2238
2239        # Everything else is in 'keys' but get_lines is in 'revision_ids'
2240        all_texts = self.get_lines([k[-1] for k in all_revision_keys])
2241        return all_texts
2242
2243    def _build_weave(self):
2244        from .bzr import weave
2245        self._weave = weave.Weave(weave_name='in_memory_weave',
2246                                  allow_reserved=True)
2247        parent_map = self._find_recursive_lcas()
2248
2249        all_texts = self._get_interesting_texts(parent_map)
2250
2251        # Note: Unfortunately, the order given by topo_sort will effect the
2252        # ordering resolution in the output. Specifically, if you add A then B,
2253        # then in the output text A lines will show up before B lines. And, of
2254        # course, topo_sort doesn't guarantee any real ordering.
2255        # So we use merge_sort, and add a fake node on the tip.
2256        # This ensures that left-hand parents will always be inserted into the
2257        # weave before right-hand parents.
2258        tip_key = self._key_prefix + (_mod_revision.CURRENT_REVISION,)
2259        parent_map[tip_key] = (self.a_key, self.b_key)
2260
2261        for seq_num, key, depth, eom in reversed(tsort.merge_sort(parent_map,
2262                                                                  tip_key)):
2263            if key == tip_key:
2264                continue
2265        # for key in tsort.topo_sort(parent_map):
2266            parent_keys = parent_map[key]
2267            revision_id = key[-1]
2268            parent_ids = [k[-1] for k in parent_keys]
2269            self._weave.add_lines(revision_id, parent_ids,
2270                                  all_texts[revision_id])
2271
2272    def plan_merge(self):
2273        """Generate a 'plan' for merging the two revisions.
2274
2275        This involves comparing their texts and determining the cause of
2276        differences.  If text A has a line and text B does not, then either the
2277        line was added to text A, or it was deleted from B.  Once the causes
2278        are combined, they are written out in the format described in
2279        VersionedFile.plan_merge
2280        """
2281        if self._head_key is not None:  # There was a single head
2282            if self._head_key == self.a_key:
2283                plan = 'new-a'
2284            else:
2285                if self._head_key != self.b_key:
2286                    raise AssertionError('There was an invalid head: %s != %s'
2287                                         % (self.b_key, self._head_key))
2288                plan = 'new-b'
2289            head_rev = self._head_key[-1]
2290            lines = self.get_lines([head_rev])[head_rev]
2291            return ((plan, line) for line in lines)
2292        return self._weave.plan_merge(self.a_rev, self.b_rev)
2293
2294
2295class _PlanLCAMerge(_PlanMergeBase):
2296    """
2297    This merge algorithm differs from _PlanMerge in that:
2298
2299    1. comparisons are done against LCAs only
2300    2. cases where a contested line is new versus one LCA but old versus
2301       another are marked as conflicts, by emitting the line as conflicted-a
2302       or conflicted-b.
2303
2304    This is faster, and hopefully produces more useful output.
2305    """
2306
2307    def __init__(self, a_rev, b_rev, vf, key_prefix, graph):
2308        _PlanMergeBase.__init__(self, a_rev, b_rev, vf, key_prefix)
2309        lcas = graph.find_lca(key_prefix + (a_rev,), key_prefix + (b_rev,))
2310        self.lcas = set()
2311        for lca in lcas:
2312            if lca == _mod_revision.NULL_REVISION:
2313                self.lcas.add(lca)
2314            else:
2315                self.lcas.add(lca[-1])
2316        for lca in self.lcas:
2317            if _mod_revision.is_null(lca):
2318                lca_lines = []
2319            else:
2320                lca_lines = self.get_lines([lca])[lca]
2321            matcher = patiencediff.PatienceSequenceMatcher(None, self.lines_a,
2322                                                           lca_lines)
2323            blocks = list(matcher.get_matching_blocks())
2324            self._cached_matching_blocks[(a_rev, lca)] = blocks
2325            matcher = patiencediff.PatienceSequenceMatcher(None, self.lines_b,
2326                                                           lca_lines)
2327            blocks = list(matcher.get_matching_blocks())
2328            self._cached_matching_blocks[(b_rev, lca)] = blocks
2329
2330    def _determine_status(self, revision_id, unique_line_numbers):
2331        """Determines the status unique lines versus all lcas.
2332
2333        Basically, determines why the line is unique to this revision.
2334
2335        A line may be determined new, killed, or both.
2336
2337        If a line is determined new, that means it was not present in at least
2338        one LCA, and is not present in the other merge revision.
2339
2340        If a line is determined killed, that means the line was present in
2341        at least one LCA.
2342
2343        If a line is killed and new, this indicates that the two merge
2344        revisions contain differing conflict resolutions.
2345
2346        :param revision_id: The id of the revision in which the lines are
2347            unique
2348        :param unique_line_numbers: The line numbers of unique lines.
2349        :return: a tuple of (new_this, killed_other)
2350        """
2351        new = set()
2352        killed = set()
2353        unique_line_numbers = set(unique_line_numbers)
2354        for lca in self.lcas:
2355            blocks = self._get_matching_blocks(revision_id, lca)
2356            unique_vs_lca, _ignored = self._unique_lines(blocks)
2357            new.update(unique_line_numbers.intersection(unique_vs_lca))
2358            killed.update(unique_line_numbers.difference(unique_vs_lca))
2359        return new, killed
2360