1# Copyright (C) 2007-2011 Canonical Ltd
2#
3# This program is free software; you can redistribute it and/or modify
4# it under the terms of the GNU General Public License as published by
5# the Free Software Foundation; either version 2 of the License, or
6# (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17import time
18
19from . import (
20    debug,
21    errors,
22    osutils,
23    revision,
24    trace,
25    )
26
27STEP_UNIQUE_SEARCHER_EVERY = 5
28
29# DIAGRAM of terminology
30#       A
31#       /\
32#      B  C
33#      |  |\
34#      D  E F
35#      |\/| |
36#      |/\|/
37#      G  H
38#
39# In this diagram, relative to G and H:
40# A, B, C, D, E are common ancestors.
41# C, D and E are border ancestors, because each has a non-common descendant.
42# D and E are least common ancestors because none of their descendants are
43# common ancestors.
44# C is not a least common ancestor because its descendant, E, is a common
45# ancestor.
46#
47# The find_unique_lca algorithm will pick A in two steps:
48# 1. find_lca('G', 'H') => ['D', 'E']
49# 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A']
50
51
52class DictParentsProvider(object):
53    """A parents provider for Graph objects."""
54
55    def __init__(self, ancestry):
56        self.ancestry = ancestry
57
58    def __repr__(self):
59        return 'DictParentsProvider(%r)' % self.ancestry
60
61    # Note: DictParentsProvider does not implement get_cached_parent_map
62    #       Arguably, the data is clearly cached in memory. However, this class
63    #       is mostly used for testing, and it keeps the tests clean to not
64    #       change it.
65
66    def get_parent_map(self, keys):
67        """See StackedParentsProvider.get_parent_map"""
68        ancestry = self.ancestry
69        return dict([(k, ancestry[k]) for k in keys if k in ancestry])
70
71
72class StackedParentsProvider(object):
73    """A parents provider which stacks (or unions) multiple providers.
74
75    The providers are queries in the order of the provided parent_providers.
76    """
77
78    def __init__(self, parent_providers):
79        self._parent_providers = parent_providers
80
81    def __repr__(self):
82        return "%s(%r)" % (self.__class__.__name__, self._parent_providers)
83
84    def get_parent_map(self, keys):
85        """Get a mapping of keys => parents
86
87        A dictionary is returned with an entry for each key present in this
88        source. If this source doesn't have information about a key, it should
89        not include an entry.
90
91        [NULL_REVISION] is used as the parent of the first user-committed
92        revision.  Its parent list is empty.
93
94        :param keys: An iterable returning keys to check (eg revision_ids)
95        :return: A dictionary mapping each key to its parents
96        """
97        found = {}
98        remaining = set(keys)
99        # This adds getattr() overhead to each get_parent_map call. However,
100        # this is StackedParentsProvider, which means we're dealing with I/O
101        # (either local indexes, or remote RPCs), so CPU overhead should be
102        # minimal.
103        for parents_provider in self._parent_providers:
104            get_cached = getattr(parents_provider, 'get_cached_parent_map',
105                                 None)
106            if get_cached is None:
107                continue
108            new_found = get_cached(remaining)
109            found.update(new_found)
110            remaining.difference_update(new_found)
111            if not remaining:
112                break
113        if not remaining:
114            return found
115        for parents_provider in self._parent_providers:
116            try:
117                new_found = parents_provider.get_parent_map(remaining)
118            except errors.UnsupportedOperation:
119                continue
120            found.update(new_found)
121            remaining.difference_update(new_found)
122            if not remaining:
123                break
124        return found
125
126
127class CachingParentsProvider(object):
128    """A parents provider which will cache the revision => parents as a dict.
129
130    This is useful for providers which have an expensive look up.
131
132    Either a ParentsProvider or a get_parent_map-like callback may be
133    supplied.  If it provides extra un-asked-for parents, they will be cached,
134    but filtered out of get_parent_map.
135
136    The cache is enabled by default, but may be disabled and re-enabled.
137    """
138
139    def __init__(self, parent_provider=None, get_parent_map=None):
140        """Constructor.
141
142        :param parent_provider: The ParentProvider to use.  It or
143            get_parent_map must be supplied.
144        :param get_parent_map: The get_parent_map callback to use.  It or
145            parent_provider must be supplied.
146        """
147        self._real_provider = parent_provider
148        if get_parent_map is None:
149            self._get_parent_map = self._real_provider.get_parent_map
150        else:
151            self._get_parent_map = get_parent_map
152        self._cache = None
153        self.enable_cache(True)
154
155    def __repr__(self):
156        return "%s(%r)" % (self.__class__.__name__, self._real_provider)
157
158    def enable_cache(self, cache_misses=True):
159        """Enable cache."""
160        if self._cache is not None:
161            raise AssertionError('Cache enabled when already enabled.')
162        self._cache = {}
163        self._cache_misses = cache_misses
164        self.missing_keys = set()
165
166    def disable_cache(self):
167        """Disable and clear the cache."""
168        self._cache = None
169        self._cache_misses = None
170        self.missing_keys = set()
171
172    def get_cached_map(self):
173        """Return any cached get_parent_map values."""
174        if self._cache is None:
175            return None
176        return dict(self._cache)
177
178    def get_cached_parent_map(self, keys):
179        """Return items from the cache.
180
181        This returns the same info as get_parent_map, but explicitly does not
182        invoke the supplied ParentsProvider to search for uncached values.
183        """
184        cache = self._cache
185        if cache is None:
186            return {}
187        return dict([(key, cache[key]) for key in keys if key in cache])
188
189    def get_parent_map(self, keys):
190        """See StackedParentsProvider.get_parent_map."""
191        cache = self._cache
192        if cache is None:
193            cache = self._get_parent_map(keys)
194        else:
195            needed_revisions = set(key for key in keys if key not in cache)
196            # Do not ask for negatively cached keys
197            needed_revisions.difference_update(self.missing_keys)
198            if needed_revisions:
199                parent_map = self._get_parent_map(needed_revisions)
200                cache.update(parent_map)
201                if self._cache_misses:
202                    for key in needed_revisions:
203                        if key not in parent_map:
204                            self.note_missing_key(key)
205        result = {}
206        for key in keys:
207            value = cache.get(key)
208            if value is not None:
209                result[key] = value
210        return result
211
212    def note_missing_key(self, key):
213        """Note that key is a missing key."""
214        if self._cache_misses:
215            self.missing_keys.add(key)
216
217
218class CallableToParentsProviderAdapter(object):
219    """A parents provider that adapts any callable to the parents provider API.
220
221    i.e. it accepts calls to self.get_parent_map and relays them to the
222    callable it was constructed with.
223    """
224
225    def __init__(self, a_callable):
226        self.callable = a_callable
227
228    def __repr__(self):
229        return "%s(%r)" % (self.__class__.__name__, self.callable)
230
231    def get_parent_map(self, keys):
232        return self.callable(keys)
233
234
235class Graph(object):
236    """Provide incremental access to revision graphs.
237
238    This is the generic implementation; it is intended to be subclassed to
239    specialize it for other repository types.
240    """
241
242    def __init__(self, parents_provider):
243        """Construct a Graph that uses several graphs as its input
244
245        This should not normally be invoked directly, because there may be
246        specialized implementations for particular repository types.  See
247        Repository.get_graph().
248
249        :param parents_provider: An object providing a get_parent_map call
250            conforming to the behavior of
251            StackedParentsProvider.get_parent_map.
252        """
253        if getattr(parents_provider, 'get_parents', None) is not None:
254            self.get_parents = parents_provider.get_parents
255        if getattr(parents_provider, 'get_parent_map', None) is not None:
256            self.get_parent_map = parents_provider.get_parent_map
257        self._parents_provider = parents_provider
258
259    def __repr__(self):
260        return 'Graph(%r)' % self._parents_provider
261
262    def find_lca(self, *revisions):
263        """Determine the lowest common ancestors of the provided revisions
264
265        A lowest common ancestor is a common ancestor none of whose
266        descendants are common ancestors.  In graphs, unlike trees, there may
267        be multiple lowest common ancestors.
268
269        This algorithm has two phases.  Phase 1 identifies border ancestors,
270        and phase 2 filters border ancestors to determine lowest common
271        ancestors.
272
273        In phase 1, border ancestors are identified, using a breadth-first
274        search starting at the bottom of the graph.  Searches are stopped
275        whenever a node or one of its descendants is determined to be common
276
277        In phase 2, the border ancestors are filtered to find the least
278        common ancestors.  This is done by searching the ancestries of each
279        border ancestor.
280
281        Phase 2 is perfomed on the principle that a border ancestor that is
282        not an ancestor of any other border ancestor is a least common
283        ancestor.
284
285        Searches are stopped when they find a node that is determined to be a
286        common ancestor of all border ancestors, because this shows that it
287        cannot be a descendant of any border ancestor.
288
289        The scaling of this operation should be proportional to:
290
291        1. The number of uncommon ancestors
292        2. The number of border ancestors
293        3. The length of the shortest path between a border ancestor and an
294           ancestor of all border ancestors.
295        """
296        border_common, common, sides = self._find_border_ancestors(revisions)
297        # We may have common ancestors that can be reached from each other.
298        # - ask for the heads of them to filter it down to only ones that
299        # cannot be reached from each other - phase 2.
300        return self.heads(border_common)
301
302    def find_difference(self, left_revision, right_revision):
303        """Determine the graph difference between two revisions"""
304        border, common, searchers = self._find_border_ancestors(
305            [left_revision, right_revision])
306        self._search_for_extra_common(common, searchers)
307        left = searchers[0].seen
308        right = searchers[1].seen
309        return (left.difference(right), right.difference(left))
310
311    def find_descendants(self, old_key, new_key):
312        """Find descendants of old_key that are ancestors of new_key."""
313        child_map = self.get_child_map(self._find_descendant_ancestors(
314            old_key, new_key))
315        graph = Graph(DictParentsProvider(child_map))
316        searcher = graph._make_breadth_first_searcher([old_key])
317        list(searcher)
318        return searcher.seen
319
320    def _find_descendant_ancestors(self, old_key, new_key):
321        """Find ancestors of new_key that may be descendants of old_key."""
322        stop = self._make_breadth_first_searcher([old_key])
323        descendants = self._make_breadth_first_searcher([new_key])
324        for revisions in descendants:
325            old_stop = stop.seen.intersection(revisions)
326            descendants.stop_searching_any(old_stop)
327            seen_stop = descendants.find_seen_ancestors(stop.step())
328            descendants.stop_searching_any(seen_stop)
329        return descendants.seen.difference(stop.seen)
330
331    def get_child_map(self, keys):
332        """Get a mapping from parents to children of the specified keys.
333
334        This is simply the inversion of get_parent_map.  Only supplied keys
335        will be discovered as children.
336        :return: a dict of key:child_list for keys.
337        """
338        parent_map = self._parents_provider.get_parent_map(keys)
339        parent_child = {}
340        for child, parents in sorted(parent_map.items()):
341            for parent in parents:
342                parent_child.setdefault(parent, []).append(child)
343        return parent_child
344
345    def find_distance_to_null(self, target_revision_id, known_revision_ids):
346        """Find the left-hand distance to the NULL_REVISION.
347
348        (This can also be considered the revno of a branch at
349        target_revision_id.)
350
351        :param target_revision_id: A revision_id which we would like to know
352            the revno for.
353        :param known_revision_ids: [(revision_id, revno)] A list of known
354            revno, revision_id tuples. We'll use this to seed the search.
355        """
356        # Map from revision_ids to a known value for their revno
357        known_revnos = dict(known_revision_ids)
358        cur_tip = target_revision_id
359        num_steps = 0
360        NULL_REVISION = revision.NULL_REVISION
361        known_revnos[NULL_REVISION] = 0
362
363        searching_known_tips = list(known_revnos)
364
365        unknown_searched = {}
366
367        while cur_tip not in known_revnos:
368            unknown_searched[cur_tip] = num_steps
369            num_steps += 1
370            to_search = {cur_tip}
371            to_search.update(searching_known_tips)
372            parent_map = self.get_parent_map(to_search)
373            parents = parent_map.get(cur_tip, None)
374            if not parents:  # An empty list or None is a ghost
375                raise errors.GhostRevisionsHaveNoRevno(target_revision_id,
376                                                       cur_tip)
377            cur_tip = parents[0]
378            next_known_tips = []
379            for revision_id in searching_known_tips:
380                parents = parent_map.get(revision_id, None)
381                if not parents:
382                    continue
383                next = parents[0]
384                next_revno = known_revnos[revision_id] - 1
385                if next in unknown_searched:
386                    # We have enough information to return a value right now
387                    return next_revno + unknown_searched[next]
388                if next in known_revnos:
389                    continue
390                known_revnos[next] = next_revno
391                next_known_tips.append(next)
392            searching_known_tips = next_known_tips
393
394        # We reached a known revision, so just add in how many steps it took to
395        # get there.
396        return known_revnos[cur_tip] + num_steps
397
398    def find_lefthand_distances(self, keys):
399        """Find the distance to null for all the keys in keys.
400
401        :param keys: keys to lookup.
402        :return: A dict key->distance for all of keys.
403        """
404        # Optimisable by concurrent searching, but a random spread should get
405        # some sort of hit rate.
406        known_revnos = []
407        ghosts = []
408        for key in keys:
409            try:
410                known_revnos.append(
411                    (key, self.find_distance_to_null(key, known_revnos)))
412            except errors.GhostRevisionsHaveNoRevno:
413                ghosts.append(key)
414        for key in ghosts:
415            known_revnos.append((key, -1))
416        return dict(known_revnos)
417
418    def find_unique_ancestors(self, unique_revision, common_revisions):
419        """Find the unique ancestors for a revision versus others.
420
421        This returns the ancestry of unique_revision, excluding all revisions
422        in the ancestry of common_revisions. If unique_revision is in the
423        ancestry, then the empty set will be returned.
424
425        :param unique_revision: The revision_id whose ancestry we are
426            interested in.
427            (XXX: Would this API be better if we allowed multiple revisions on
428            to be searched here?)
429        :param common_revisions: Revision_ids of ancestries to exclude.
430        :return: A set of revisions in the ancestry of unique_revision
431        """
432        if unique_revision in common_revisions:
433            return set()
434
435        # Algorithm description
436        # 1) Walk backwards from the unique node and all common nodes.
437        # 2) When a node is seen by both sides, stop searching it in the unique
438        #    walker, include it in the common walker.
439        # 3) Stop searching when there are no nodes left for the unique walker.
440        #    At this point, you have a maximal set of unique nodes. Some of
441        #    them may actually be common, and you haven't reached them yet.
442        # 4) Start new searchers for the unique nodes, seeded with the
443        #    information you have so far.
444        # 5) Continue searching, stopping the common searches when the search
445        #    tip is an ancestor of all unique nodes.
446        # 6) Aggregate together unique searchers when they are searching the
447        #    same tips. When all unique searchers are searching the same node,
448        #    stop move it to a single 'all_unique_searcher'.
449        # 7) The 'all_unique_searcher' represents the very 'tip' of searching.
450        #    Most of the time this produces very little important information.
451        #    So don't step it as quickly as the other searchers.
452        # 8) Search is done when all common searchers have completed.
453
454        unique_searcher, common_searcher = self._find_initial_unique_nodes(
455            [unique_revision], common_revisions)
456
457        unique_nodes = unique_searcher.seen.difference(common_searcher.seen)
458        if not unique_nodes:
459            return unique_nodes
460
461        (all_unique_searcher,
462         unique_tip_searchers) = self._make_unique_searchers(
463             unique_nodes, unique_searcher, common_searcher)
464
465        self._refine_unique_nodes(unique_searcher, all_unique_searcher,
466                                  unique_tip_searchers, common_searcher)
467        true_unique_nodes = unique_nodes.difference(common_searcher.seen)
468        if 'graph' in debug.debug_flags:
469            trace.mutter('Found %d truly unique nodes out of %d',
470                         len(true_unique_nodes), len(unique_nodes))
471        return true_unique_nodes
472
473    def _find_initial_unique_nodes(self, unique_revisions, common_revisions):
474        """Steps 1-3 of find_unique_ancestors.
475
476        Find the maximal set of unique nodes. Some of these might actually
477        still be common, but we are sure that there are no other unique nodes.
478
479        :return: (unique_searcher, common_searcher)
480        """
481
482        unique_searcher = self._make_breadth_first_searcher(unique_revisions)
483        # we know that unique_revisions aren't in common_revisions, so skip
484        # past them.
485        next(unique_searcher)
486        common_searcher = self._make_breadth_first_searcher(common_revisions)
487
488        # As long as we are still finding unique nodes, keep searching
489        while unique_searcher._next_query:
490            next_unique_nodes = set(unique_searcher.step())
491            next_common_nodes = set(common_searcher.step())
492
493            # Check if either searcher encounters new nodes seen by the other
494            # side.
495            unique_are_common_nodes = next_unique_nodes.intersection(
496                common_searcher.seen)
497            unique_are_common_nodes.update(
498                next_common_nodes.intersection(unique_searcher.seen))
499            if unique_are_common_nodes:
500                ancestors = unique_searcher.find_seen_ancestors(
501                    unique_are_common_nodes)
502                # TODO: This is a bit overboard, we only really care about
503                #       the ancestors of the tips because the rest we
504                #       already know. This is *correct* but causes us to
505                #       search too much ancestry.
506                ancestors.update(
507                    common_searcher.find_seen_ancestors(ancestors))
508                unique_searcher.stop_searching_any(ancestors)
509                common_searcher.start_searching(ancestors)
510
511        return unique_searcher, common_searcher
512
513    def _make_unique_searchers(self, unique_nodes, unique_searcher,
514                               common_searcher):
515        """Create a searcher for all the unique search tips (step 4).
516
517        As a side effect, the common_searcher will stop searching any nodes
518        that are ancestors of the unique searcher tips.
519
520        :return: (all_unique_searcher, unique_tip_searchers)
521        """
522        unique_tips = self._remove_simple_descendants(
523            unique_nodes, self.get_parent_map(unique_nodes))
524
525        if len(unique_tips) == 1:
526            unique_tip_searchers = []
527            ancestor_all_unique = unique_searcher.find_seen_ancestors(
528                unique_tips)
529        else:
530            unique_tip_searchers = []
531            for tip in unique_tips:
532                revs_to_search = unique_searcher.find_seen_ancestors([tip])
533                revs_to_search.update(
534                    common_searcher.find_seen_ancestors(revs_to_search))
535                searcher = self._make_breadth_first_searcher(revs_to_search)
536                # We don't care about the starting nodes.
537                searcher._label = tip
538                searcher.step()
539                unique_tip_searchers.append(searcher)
540
541            ancestor_all_unique = None
542            for searcher in unique_tip_searchers:
543                if ancestor_all_unique is None:
544                    ancestor_all_unique = set(searcher.seen)
545                else:
546                    ancestor_all_unique = ancestor_all_unique.intersection(
547                        searcher.seen)
548        # Collapse all the common nodes into a single searcher
549        all_unique_searcher = self._make_breadth_first_searcher(
550            ancestor_all_unique)
551        if ancestor_all_unique:
552            # We've seen these nodes in all the searchers, so we'll just go to
553            # the next
554            all_unique_searcher.step()
555
556            # Stop any search tips that are already known as ancestors of the
557            # unique nodes
558            stopped_common = common_searcher.stop_searching_any(
559                common_searcher.find_seen_ancestors(ancestor_all_unique))
560
561            total_stopped = 0
562            for searcher in unique_tip_searchers:
563                total_stopped += len(searcher.stop_searching_any(
564                    searcher.find_seen_ancestors(ancestor_all_unique)))
565        if 'graph' in debug.debug_flags:
566            trace.mutter('For %d unique nodes, created %d + 1 unique searchers'
567                         ' (%d stopped search tips, %d common ancestors'
568                         ' (%d stopped common)',
569                         len(unique_nodes), len(unique_tip_searchers),
570                         total_stopped, len(ancestor_all_unique),
571                         len(stopped_common))
572        return all_unique_searcher, unique_tip_searchers
573
574    def _step_unique_and_common_searchers(self, common_searcher,
575                                          unique_tip_searchers,
576                                          unique_searcher):
577        """Step all the searchers"""
578        newly_seen_common = set(common_searcher.step())
579        newly_seen_unique = set()
580        for searcher in unique_tip_searchers:
581            next = set(searcher.step())
582            next.update(unique_searcher.find_seen_ancestors(next))
583            next.update(common_searcher.find_seen_ancestors(next))
584            for alt_searcher in unique_tip_searchers:
585                if alt_searcher is searcher:
586                    continue
587                next.update(alt_searcher.find_seen_ancestors(next))
588            searcher.start_searching(next)
589            newly_seen_unique.update(next)
590        return newly_seen_common, newly_seen_unique
591
592    def _find_nodes_common_to_all_unique(self, unique_tip_searchers,
593                                         all_unique_searcher,
594                                         newly_seen_unique, step_all_unique):
595        """Find nodes that are common to all unique_tip_searchers.
596
597        If it is time, step the all_unique_searcher, and add its nodes to the
598        result.
599        """
600        common_to_all_unique_nodes = newly_seen_unique.copy()
601        for searcher in unique_tip_searchers:
602            common_to_all_unique_nodes.intersection_update(searcher.seen)
603        common_to_all_unique_nodes.intersection_update(
604            all_unique_searcher.seen)
605        # Step all-unique less frequently than the other searchers.
606        # In the common case, we don't need to spider out far here, so
607        # avoid doing extra work.
608        if step_all_unique:
609            tstart = osutils.perf_counter()
610            nodes = all_unique_searcher.step()
611            common_to_all_unique_nodes.update(nodes)
612            if 'graph' in debug.debug_flags:
613                tdelta = osutils.perf_counter() - tstart
614                trace.mutter('all_unique_searcher step() took %.3fs'
615                             'for %d nodes (%d total), iteration: %s',
616                             tdelta, len(nodes), len(all_unique_searcher.seen),
617                             all_unique_searcher._iterations)
618        return common_to_all_unique_nodes
619
620    def _collapse_unique_searchers(self, unique_tip_searchers,
621                                   common_to_all_unique_nodes):
622        """Combine searchers that are searching the same tips.
623
624        When two searchers are searching the same tips, we can stop one of the
625        searchers. We also know that the maximal set of common ancestors is the
626        intersection of the two original searchers.
627
628        :return: A list of searchers that are searching unique nodes.
629        """
630        # Filter out searchers that don't actually search different
631        # nodes. We already have the ancestry intersection for them
632        unique_search_tips = {}
633        for searcher in unique_tip_searchers:
634            stopped = searcher.stop_searching_any(common_to_all_unique_nodes)
635            will_search_set = frozenset(searcher._next_query)
636            if not will_search_set:
637                if 'graph' in debug.debug_flags:
638                    trace.mutter('Unique searcher %s was stopped.'
639                                 ' (%s iterations) %d nodes stopped',
640                                 searcher._label,
641                                 searcher._iterations,
642                                 len(stopped))
643            elif will_search_set not in unique_search_tips:
644                # This searcher is searching a unique set of nodes, let it
645                unique_search_tips[will_search_set] = [searcher]
646            else:
647                unique_search_tips[will_search_set].append(searcher)
648        # TODO: it might be possible to collapse searchers faster when they
649        #       only have *some* search tips in common.
650        next_unique_searchers = []
651        for searchers in unique_search_tips.values():
652            if len(searchers) == 1:
653                # Searching unique tips, go for it
654                next_unique_searchers.append(searchers[0])
655            else:
656                # These searchers have started searching the same tips, we
657                # don't need them to cover the same ground. The
658                # intersection of their ancestry won't change, so create a
659                # new searcher, combining their histories.
660                next_searcher = searchers[0]
661                for searcher in searchers[1:]:
662                    next_searcher.seen.intersection_update(searcher.seen)
663                if 'graph' in debug.debug_flags:
664                    trace.mutter('Combining %d searchers into a single'
665                                 ' searcher searching %d nodes with'
666                                 ' %d ancestry',
667                                 len(searchers),
668                                 len(next_searcher._next_query),
669                                 len(next_searcher.seen))
670                next_unique_searchers.append(next_searcher)
671        return next_unique_searchers
672
673    def _refine_unique_nodes(self, unique_searcher, all_unique_searcher,
674                             unique_tip_searchers, common_searcher):
675        """Steps 5-8 of find_unique_ancestors.
676
677        This function returns when common_searcher has stopped searching for
678        more nodes.
679        """
680        # We step the ancestor_all_unique searcher only every
681        # STEP_UNIQUE_SEARCHER_EVERY steps.
682        step_all_unique_counter = 0
683        # While we still have common nodes to search
684        while common_searcher._next_query:
685            (newly_seen_common,
686             newly_seen_unique) = self._step_unique_and_common_searchers(
687                common_searcher, unique_tip_searchers, unique_searcher)
688            # These nodes are common ancestors of all unique nodes
689            common_to_all_unique_nodes = self._find_nodes_common_to_all_unique(
690                unique_tip_searchers, all_unique_searcher, newly_seen_unique,
691                step_all_unique_counter == 0)
692            step_all_unique_counter = ((step_all_unique_counter + 1)
693                                       % STEP_UNIQUE_SEARCHER_EVERY)
694
695            if newly_seen_common:
696                # If a 'common' node is an ancestor of all unique searchers, we
697                # can stop searching it.
698                common_searcher.stop_searching_any(
699                    all_unique_searcher.seen.intersection(newly_seen_common))
700            if common_to_all_unique_nodes:
701                common_to_all_unique_nodes.update(
702                    common_searcher.find_seen_ancestors(
703                        common_to_all_unique_nodes))
704                # The all_unique searcher can start searching the common nodes
705                # but everyone else can stop.
706                # This is the sort of thing where we would like to not have it
707                # start_searching all of the nodes, but only mark all of them
708                # as seen, and have it search only the actual tips. Otherwise
709                # it is another get_parent_map() traversal for it to figure out
710                # what we already should know.
711                all_unique_searcher.start_searching(common_to_all_unique_nodes)
712                common_searcher.stop_searching_any(common_to_all_unique_nodes)
713
714            next_unique_searchers = self._collapse_unique_searchers(
715                unique_tip_searchers, common_to_all_unique_nodes)
716            if len(unique_tip_searchers) != len(next_unique_searchers):
717                if 'graph' in debug.debug_flags:
718                    trace.mutter('Collapsed %d unique searchers => %d'
719                                 ' at %s iterations',
720                                 len(unique_tip_searchers),
721                                 len(next_unique_searchers),
722                                 all_unique_searcher._iterations)
723            unique_tip_searchers = next_unique_searchers
724
725    def get_parent_map(self, revisions):
726        """Get a map of key:parent_list for revisions.
727
728        This implementation delegates to get_parents, for old parent_providers
729        that do not supply get_parent_map.
730        """
731        result = {}
732        for rev, parents in self.get_parents(revisions):
733            if parents is not None:
734                result[rev] = parents
735        return result
736
737    def _make_breadth_first_searcher(self, revisions):
738        return _BreadthFirstSearcher(revisions, self)
739
740    def _find_border_ancestors(self, revisions):
741        """Find common ancestors with at least one uncommon descendant.
742
743        Border ancestors are identified using a breadth-first
744        search starting at the bottom of the graph.  Searches are stopped
745        whenever a node or one of its descendants is determined to be common.
746
747        This will scale with the number of uncommon ancestors.
748
749        As well as the border ancestors, a set of seen common ancestors and a
750        list of sets of seen ancestors for each input revision is returned.
751        This allows calculation of graph difference from the results of this
752        operation.
753        """
754        if None in revisions:
755            raise errors.InvalidRevisionId(None, self)
756        common_ancestors = set()
757        searchers = [self._make_breadth_first_searcher([r])
758                     for r in revisions]
759        border_ancestors = set()
760
761        while True:
762            newly_seen = set()
763            for searcher in searchers:
764                new_ancestors = searcher.step()
765                if new_ancestors:
766                    newly_seen.update(new_ancestors)
767            new_common = set()
768            for revision in newly_seen:
769                if revision in common_ancestors:
770                    # Not a border ancestor because it was seen as common
771                    # already
772                    new_common.add(revision)
773                    continue
774                for searcher in searchers:
775                    if revision not in searcher.seen:
776                        break
777                else:
778                    # This is a border because it is a first common that we see
779                    # after walking for a while.
780                    border_ancestors.add(revision)
781                    new_common.add(revision)
782            if new_common:
783                for searcher in searchers:
784                    new_common.update(searcher.find_seen_ancestors(new_common))
785                for searcher in searchers:
786                    searcher.start_searching(new_common)
787                common_ancestors.update(new_common)
788
789            # Figure out what the searchers will be searching next, and if
790            # there is only 1 set being searched, then we are done searching,
791            # since all searchers would have to be searching the same data,
792            # thus it *must* be in common.
793            unique_search_sets = set()
794            for searcher in searchers:
795                will_search_set = frozenset(searcher._next_query)
796                if will_search_set not in unique_search_sets:
797                    # This searcher is searching a unique set of nodes, let it
798                    unique_search_sets.add(will_search_set)
799
800            if len(unique_search_sets) == 1:
801                nodes = unique_search_sets.pop()
802                uncommon_nodes = nodes.difference(common_ancestors)
803                if uncommon_nodes:
804                    raise AssertionError("Somehow we ended up converging"
805                                         " without actually marking them as"
806                                         " in common."
807                                         "\nStart_nodes: %s"
808                                         "\nuncommon_nodes: %s"
809                                         % (revisions, uncommon_nodes))
810                break
811        return border_ancestors, common_ancestors, searchers
812
813    def heads(self, keys):
814        """Return the heads from amongst keys.
815
816        This is done by searching the ancestries of each key.  Any key that is
817        reachable from another key is not returned; all the others are.
818
819        This operation scales with the relative depth between any two keys. If
820        any two keys are completely disconnected all ancestry of both sides
821        will be retrieved.
822
823        :param keys: An iterable of keys.
824        :return: A set of the heads. Note that as a set there is no ordering
825            information. Callers will need to filter their input to create
826            order if they need it.
827        """
828        candidate_heads = set(keys)
829        if revision.NULL_REVISION in candidate_heads:
830            # NULL_REVISION is only a head if it is the only entry
831            candidate_heads.remove(revision.NULL_REVISION)
832            if not candidate_heads:
833                return {revision.NULL_REVISION}
834        if len(candidate_heads) < 2:
835            return candidate_heads
836        searchers = dict((c, self._make_breadth_first_searcher([c]))
837                         for c in candidate_heads)
838        active_searchers = dict(searchers)
839        # skip over the actual candidate for each searcher
840        for searcher in active_searchers.values():
841            next(searcher)
842        # The common walker finds nodes that are common to two or more of the
843        # input keys, so that we don't access all history when a currently
844        # uncommon search point actually meets up with something behind a
845        # common search point. Common search points do not keep searches
846        # active; they just allow us to make searches inactive without
847        # accessing all history.
848        common_walker = self._make_breadth_first_searcher([])
849        while len(active_searchers) > 0:
850            ancestors = set()
851            # advance searches
852            try:
853                next(common_walker)
854            except StopIteration:
855                # No common points being searched at this time.
856                pass
857            for candidate in list(active_searchers):
858                try:
859                    searcher = active_searchers[candidate]
860                except KeyError:
861                    # rare case: we deleted candidate in a previous iteration
862                    # through this for loop, because it was determined to be
863                    # a descendant of another candidate.
864                    continue
865                try:
866                    ancestors.update(next(searcher))
867                except StopIteration:
868                    del active_searchers[candidate]
869                    continue
870            # process found nodes
871            new_common = set()
872            for ancestor in ancestors:
873                if ancestor in candidate_heads:
874                    candidate_heads.remove(ancestor)
875                    del searchers[ancestor]
876                    if ancestor in active_searchers:
877                        del active_searchers[ancestor]
878                # it may meet up with a known common node
879                if ancestor in common_walker.seen:
880                    # some searcher has encountered our known common nodes:
881                    # just stop it
882                    ancestor_set = {ancestor}
883                    for searcher in searchers.values():
884                        searcher.stop_searching_any(ancestor_set)
885                else:
886                    # or it may have been just reached by all the searchers:
887                    for searcher in searchers.values():
888                        if ancestor not in searcher.seen:
889                            break
890                    else:
891                        # The final active searcher has just reached this node,
892                        # making it be known as a descendant of all candidates,
893                        # so we can stop searching it, and any seen ancestors
894                        new_common.add(ancestor)
895                        for searcher in searchers.values():
896                            seen_ancestors =\
897                                searcher.find_seen_ancestors([ancestor])
898                            searcher.stop_searching_any(seen_ancestors)
899            common_walker.start_searching(new_common)
900        return candidate_heads
901
902    def find_merge_order(self, tip_revision_id, lca_revision_ids):
903        """Find the order that each revision was merged into tip.
904
905        This basically just walks backwards with a stack, and walks left-first
906        until it finds a node to stop.
907        """
908        if len(lca_revision_ids) == 1:
909            return list(lca_revision_ids)
910        looking_for = set(lca_revision_ids)
911        # TODO: Is there a way we could do this "faster" by batching up the
912        # get_parent_map requests?
913        # TODO: Should we also be culling the ancestry search right away? We
914        # could add looking_for to the "stop" list, and walk their
915        # ancestry in batched mode. The flip side is it might mean we walk a
916        # lot of "stop" nodes, rather than only the minimum.
917        # Then again, without it we may trace back into ancestry we could have
918        # stopped early.
919        stack = [tip_revision_id]
920        found = []
921        stop = set()
922        while stack and looking_for:
923            next = stack.pop()
924            stop.add(next)
925            if next in looking_for:
926                found.append(next)
927                looking_for.remove(next)
928                if len(looking_for) == 1:
929                    found.append(looking_for.pop())
930                    break
931                continue
932            parent_ids = self.get_parent_map([next]).get(next, None)
933            if not parent_ids:  # Ghost, nothing to search here
934                continue
935            for parent_id in reversed(parent_ids):
936                # TODO: (performance) We see the parent at this point, but we
937                #       wait to mark it until later to make sure we get left
938                #       parents before right parents. However, instead of
939                #       waiting until we have traversed enough parents, we
940                #       could instead note that we've found it, and once all
941                #       parents are in the stack, just reverse iterate the
942                #       stack for them.
943                if parent_id not in stop:
944                    # this will need to be searched
945                    stack.append(parent_id)
946                stop.add(parent_id)
947        return found
948
949    def find_lefthand_merger(self, merged_key, tip_key):
950        """Find the first lefthand ancestor of tip_key that merged merged_key.
951
952        We do this by first finding the descendants of merged_key, then
953        walking through the lefthand ancestry of tip_key until we find a key
954        that doesn't descend from merged_key.  Its child is the key that
955        merged merged_key.
956
957        :return: The first lefthand ancestor of tip_key to merge merged_key.
958            merged_key if it is a lefthand ancestor of tip_key.
959            None if no ancestor of tip_key merged merged_key.
960        """
961        descendants = self.find_descendants(merged_key, tip_key)
962        candidate_iterator = self.iter_lefthand_ancestry(tip_key)
963        last_candidate = None
964        for candidate in candidate_iterator:
965            if candidate not in descendants:
966                return last_candidate
967            last_candidate = candidate
968
969    def find_unique_lca(self, left_revision, right_revision,
970                        count_steps=False):
971        """Find a unique LCA.
972
973        Find lowest common ancestors.  If there is no unique  common
974        ancestor, find the lowest common ancestors of those ancestors.
975
976        Iteration stops when a unique lowest common ancestor is found.
977        The graph origin is necessarily a unique lowest common ancestor.
978
979        Note that None is not an acceptable substitute for NULL_REVISION.
980        in the input for this method.
981
982        :param count_steps: If True, the return value will be a tuple of
983            (unique_lca, steps) where steps is the number of times that
984            find_lca was run.  If False, only unique_lca is returned.
985        """
986        revisions = [left_revision, right_revision]
987        steps = 0
988        while True:
989            steps += 1
990            lca = self.find_lca(*revisions)
991            if len(lca) == 1:
992                result = lca.pop()
993                if count_steps:
994                    return result, steps
995                else:
996                    return result
997            if len(lca) == 0:
998                raise errors.NoCommonAncestor(left_revision, right_revision)
999            revisions = lca
1000
1001    def iter_ancestry(self, revision_ids):
1002        """Iterate the ancestry of this revision.
1003
1004        :param revision_ids: Nodes to start the search
1005        :return: Yield tuples mapping a revision_id to its parents for the
1006            ancestry of revision_id.
1007            Ghosts will be returned with None as their parents, and nodes
1008            with no parents will have NULL_REVISION as their only parent. (As
1009            defined by get_parent_map.)
1010            There will also be a node for (NULL_REVISION, ())
1011        """
1012        pending = set(revision_ids)
1013        processed = set()
1014        while pending:
1015            processed.update(pending)
1016            next_map = self.get_parent_map(pending)
1017            next_pending = set()
1018            for item in next_map.items():
1019                yield item
1020                next_pending.update(p for p in item[1] if p not in processed)
1021            ghosts = pending.difference(next_map)
1022            for ghost in ghosts:
1023                yield (ghost, None)
1024            pending = next_pending
1025
1026    def iter_lefthand_ancestry(self, start_key, stop_keys=None):
1027        if stop_keys is None:
1028            stop_keys = ()
1029        next_key = start_key
1030
1031        def get_parents(key):
1032            try:
1033                return self._parents_provider.get_parent_map([key])[key]
1034            except KeyError:
1035                raise errors.RevisionNotPresent(next_key, self)
1036        while True:
1037            if next_key in stop_keys:
1038                return
1039            parents = get_parents(next_key)
1040            yield next_key
1041            if len(parents) == 0:
1042                return
1043            else:
1044                next_key = parents[0]
1045
1046    def iter_topo_order(self, revisions):
1047        """Iterate through the input revisions in topological order.
1048
1049        This sorting only ensures that parents come before their children.
1050        An ancestor may sort after a descendant if the relationship is not
1051        visible in the supplied list of revisions.
1052        """
1053        from breezy import tsort
1054        sorter = tsort.TopoSorter(self.get_parent_map(revisions))
1055        return sorter.iter_topo_order()
1056
1057    def is_ancestor(self, candidate_ancestor, candidate_descendant):
1058        """Determine whether a revision is an ancestor of another.
1059
1060        We answer this using heads() as heads() has the logic to perform the
1061        smallest number of parent lookups to determine the ancestral
1062        relationship between N revisions.
1063        """
1064        return {candidate_descendant} == self.heads(
1065            [candidate_ancestor, candidate_descendant])
1066
1067    def is_between(self, revid, lower_bound_revid, upper_bound_revid):
1068        """Determine whether a revision is between two others.
1069
1070        returns true if and only if:
1071        lower_bound_revid <= revid <= upper_bound_revid
1072        """
1073        return ((upper_bound_revid is None or
1074                 self.is_ancestor(revid, upper_bound_revid)) and
1075                (lower_bound_revid is None or
1076                 self.is_ancestor(lower_bound_revid, revid)))
1077
1078    def _search_for_extra_common(self, common, searchers):
1079        """Make sure that unique nodes are genuinely unique.
1080
1081        After _find_border_ancestors, all nodes marked "common" are indeed
1082        common. Some of the nodes considered unique are not, due to history
1083        shortcuts stopping the searches early.
1084
1085        We know that we have searched enough when all common search tips are
1086        descended from all unique (uncommon) nodes because we know that a node
1087        cannot be an ancestor of its own ancestor.
1088
1089        :param common: A set of common nodes
1090        :param searchers: The searchers returned from _find_border_ancestors
1091        :return: None
1092        """
1093        # Basic algorithm...
1094        #   A) The passed in searchers should all be on the same tips, thus
1095        #      they should be considered the "common" searchers.
1096        #   B) We find the difference between the searchers, these are the
1097        #      "unique" nodes for each side.
1098        #   C) We do a quick culling so that we only start searching from the
1099        #      more interesting unique nodes. (A unique ancestor is more
1100        #      interesting than any of its children.)
1101        #   D) We start searching for ancestors common to all unique nodes.
1102        #   E) We have the common searchers stop searching any ancestors of
1103        #      nodes found by (D)
1104        #   F) When there are no more common search tips, we stop
1105
1106        # TODO: We need a way to remove unique_searchers when they overlap with
1107        #       other unique searchers.
1108        if len(searchers) != 2:
1109            raise NotImplementedError(
1110                "Algorithm not yet implemented for > 2 searchers")
1111        common_searchers = searchers
1112        left_searcher = searchers[0]
1113        right_searcher = searchers[1]
1114        unique = left_searcher.seen.symmetric_difference(right_searcher.seen)
1115        if not unique:  # No unique nodes, nothing to do
1116            return
1117        total_unique = len(unique)
1118        unique = self._remove_simple_descendants(unique,
1119                                                 self.get_parent_map(unique))
1120        simple_unique = len(unique)
1121
1122        unique_searchers = []
1123        for revision_id in unique:
1124            if revision_id in left_searcher.seen:
1125                parent_searcher = left_searcher
1126            else:
1127                parent_searcher = right_searcher
1128            revs_to_search = parent_searcher.find_seen_ancestors([revision_id])
1129            if not revs_to_search:  # XXX: This shouldn't be possible
1130                revs_to_search = [revision_id]
1131            searcher = self._make_breadth_first_searcher(revs_to_search)
1132            # We don't care about the starting nodes.
1133            searcher.step()
1134            unique_searchers.append(searcher)
1135
1136        # possible todo: aggregate the common searchers into a single common
1137        #   searcher, just make sure that we include the nodes into the .seen
1138        #   properties of the original searchers
1139
1140        ancestor_all_unique = None
1141        for searcher in unique_searchers:
1142            if ancestor_all_unique is None:
1143                ancestor_all_unique = set(searcher.seen)
1144            else:
1145                ancestor_all_unique = ancestor_all_unique.intersection(
1146                    searcher.seen)
1147
1148        trace.mutter('Started %d unique searchers for %d unique revisions',
1149                     simple_unique, total_unique)
1150
1151        while True:  # If we have no more nodes we have nothing to do
1152            newly_seen_common = set()
1153            for searcher in common_searchers:
1154                newly_seen_common.update(searcher.step())
1155            newly_seen_unique = set()
1156            for searcher in unique_searchers:
1157                newly_seen_unique.update(searcher.step())
1158            new_common_unique = set()
1159            for revision in newly_seen_unique:
1160                for searcher in unique_searchers:
1161                    if revision not in searcher.seen:
1162                        break
1163                else:
1164                    # This is a border because it is a first common that we see
1165                    # after walking for a while.
1166                    new_common_unique.add(revision)
1167            if newly_seen_common:
1168                # These are nodes descended from one of the 'common' searchers.
1169                # Make sure all searchers are on the same page
1170                for searcher in common_searchers:
1171                    newly_seen_common.update(
1172                        searcher.find_seen_ancestors(newly_seen_common))
1173                # We start searching the whole ancestry. It is a bit wasteful,
1174                # though. We really just want to mark all of these nodes as
1175                # 'seen' and then start just the tips. However, it requires a
1176                # get_parent_map() call to figure out the tips anyway, and all
1177                # redundant requests should be fairly fast.
1178                for searcher in common_searchers:
1179                    searcher.start_searching(newly_seen_common)
1180
1181                # If a 'common' node is an ancestor of all unique searchers, we
1182                # can stop searching it.
1183                stop_searching_common = ancestor_all_unique.intersection(
1184                    newly_seen_common)
1185                if stop_searching_common:
1186                    for searcher in common_searchers:
1187                        searcher.stop_searching_any(stop_searching_common)
1188            if new_common_unique:
1189                # We found some ancestors that are common
1190                for searcher in unique_searchers:
1191                    new_common_unique.update(
1192                        searcher.find_seen_ancestors(new_common_unique))
1193                # Since these are common, we can grab another set of ancestors
1194                # that we have seen
1195                for searcher in common_searchers:
1196                    new_common_unique.update(
1197                        searcher.find_seen_ancestors(new_common_unique))
1198
1199                # We can tell all of the unique searchers to start at these
1200                # nodes, and tell all of the common searchers to *stop*
1201                # searching these nodes
1202                for searcher in unique_searchers:
1203                    searcher.start_searching(new_common_unique)
1204                for searcher in common_searchers:
1205                    searcher.stop_searching_any(new_common_unique)
1206                ancestor_all_unique.update(new_common_unique)
1207
1208                # Filter out searchers that don't actually search different
1209                # nodes. We already have the ancestry intersection for them
1210                next_unique_searchers = []
1211                unique_search_sets = set()
1212                for searcher in unique_searchers:
1213                    will_search_set = frozenset(searcher._next_query)
1214                    if will_search_set not in unique_search_sets:
1215                        # This searcher is searching a unique set of nodes, let
1216                        # it
1217                        unique_search_sets.add(will_search_set)
1218                        next_unique_searchers.append(searcher)
1219                unique_searchers = next_unique_searchers
1220            for searcher in common_searchers:
1221                if searcher._next_query:
1222                    break
1223            else:
1224                # All common searcher have stopped searching
1225                return
1226
1227    def _remove_simple_descendants(self, revisions, parent_map):
1228        """remove revisions which are children of other ones in the set
1229
1230        This doesn't do any graph searching, it just checks the immediate
1231        parent_map to find if there are any children which can be removed.
1232
1233        :param revisions: A set of revision_ids
1234        :return: A set of revision_ids with the children removed
1235        """
1236        simple_ancestors = revisions.copy()
1237        # TODO: jam 20071214 we *could* restrict it to searching only the
1238        #       parent_map of revisions already present in 'revisions', but
1239        #       considering the general use case, I think this is actually
1240        #       better.
1241
1242        # This is the same as the following loop. I don't know that it is any
1243        # faster.
1244        # simple_ancestors.difference_update(r for r, p_ids in parent_map.iteritems()
1245        # if p_ids is not None and revisions.intersection(p_ids))
1246        # return simple_ancestors
1247
1248        # Yet Another Way, invert the parent map (which can be cached)
1249        ## descendants = {}
1250        # for revision_id, parent_ids in parent_map.iteritems():
1251        # for p_id in parent_ids:
1252        ##       descendants.setdefault(p_id, []).append(revision_id)
1253        # for revision in revisions.intersection(descendants):
1254        # simple_ancestors.difference_update(descendants[revision])
1255        # return simple_ancestors
1256        for revision, parent_ids in parent_map.items():
1257            if parent_ids is None:
1258                continue
1259            for parent_id in parent_ids:
1260                if parent_id in revisions:
1261                    # This node has a parent present in the set, so we can
1262                    # remove it
1263                    simple_ancestors.discard(revision)
1264                    break
1265        return simple_ancestors
1266
1267
1268class HeadsCache(object):
1269    """A cache of results for graph heads calls."""
1270
1271    def __init__(self, graph):
1272        self.graph = graph
1273        self._heads = {}
1274
1275    def heads(self, keys):
1276        """Return the heads of keys.
1277
1278        This matches the API of Graph.heads(), specifically the return value is
1279        a set which can be mutated, and ordering of the input is not preserved
1280        in the output.
1281
1282        :see also: Graph.heads.
1283        :param keys: The keys to calculate heads for.
1284        :return: A set containing the heads, which may be mutated without
1285            affecting future lookups.
1286        """
1287        keys = frozenset(keys)
1288        try:
1289            return set(self._heads[keys])
1290        except KeyError:
1291            heads = self.graph.heads(keys)
1292            self._heads[keys] = heads
1293            return set(heads)
1294
1295
1296class FrozenHeadsCache(object):
1297    """Cache heads() calls, assuming the caller won't modify them."""
1298
1299    def __init__(self, graph):
1300        self.graph = graph
1301        self._heads = {}
1302
1303    def heads(self, keys):
1304        """Return the heads of keys.
1305
1306        Similar to Graph.heads(). The main difference is that the return value
1307        is a frozen set which cannot be mutated.
1308
1309        :see also: Graph.heads.
1310        :param keys: The keys to calculate heads for.
1311        :return: A frozenset containing the heads.
1312        """
1313        keys = frozenset(keys)
1314        try:
1315            return self._heads[keys]
1316        except KeyError:
1317            heads = frozenset(self.graph.heads(keys))
1318            self._heads[keys] = heads
1319            return heads
1320
1321    def cache(self, keys, heads):
1322        """Store a known value."""
1323        self._heads[frozenset(keys)] = frozenset(heads)
1324
1325
1326class _BreadthFirstSearcher(object):
1327    """Parallel search breadth-first the ancestry of revisions.
1328
1329    This class implements the iterator protocol, but additionally
1330    1. provides a set of seen ancestors, and
1331    2. allows some ancestries to be unsearched, via stop_searching_any
1332    """
1333
1334    def __init__(self, revisions, parents_provider):
1335        self._iterations = 0
1336        self._next_query = set(revisions)
1337        self.seen = set()
1338        self._started_keys = set(self._next_query)
1339        self._stopped_keys = set()
1340        self._parents_provider = parents_provider
1341        self._returning = 'next_with_ghosts'
1342        self._current_present = set()
1343        self._current_ghosts = set()
1344        self._current_parents = {}
1345
1346    def __repr__(self):
1347        if self._iterations:
1348            prefix = "searching"
1349        else:
1350            prefix = "starting"
1351        search = '%s=%r' % (prefix, list(self._next_query))
1352        return ('_BreadthFirstSearcher(iterations=%d, %s,'
1353                ' seen=%r)' % (self._iterations, search, list(self.seen)))
1354
1355    def get_state(self):
1356        """Get the current state of this searcher.
1357
1358        :return: Tuple with started keys, excludes and included keys
1359        """
1360        if self._returning == 'next':
1361            # We have to know the current nodes children to be able to list the
1362            # exclude keys for them. However, while we could have a second
1363            # look-ahead result buffer and shuffle things around, this method
1364            # is typically only called once per search - when memoising the
1365            # results of the search.
1366            found, ghosts, next, parents = self._do_query(self._next_query)
1367            # pretend we didn't query: perhaps we should tweak _do_query to be
1368            # entirely stateless?
1369            self.seen.difference_update(next)
1370            next_query = next.union(ghosts)
1371        else:
1372            next_query = self._next_query
1373        excludes = self._stopped_keys.union(next_query)
1374        included_keys = self.seen.difference(excludes)
1375        return self._started_keys, excludes, included_keys
1376
1377    def step(self):
1378        try:
1379            return next(self)
1380        except StopIteration:
1381            return ()
1382
1383    def __next__(self):
1384        """Return the next ancestors of this revision.
1385
1386        Ancestors are returned in the order they are seen in a breadth-first
1387        traversal.  No ancestor will be returned more than once. Ancestors are
1388        returned before their parentage is queried, so ghosts and missing
1389        revisions (including the start revisions) are included in the result.
1390        This can save a round trip in LCA style calculation by allowing
1391        convergence to be detected without reading the data for the revision
1392        the convergence occurs on.
1393
1394        :return: A set of revision_ids.
1395        """
1396        if self._returning != 'next':
1397            # switch to returning the query, not the results.
1398            self._returning = 'next'
1399            self._iterations += 1
1400        else:
1401            self._advance()
1402        if len(self._next_query) == 0:
1403            raise StopIteration()
1404        # We have seen what we're querying at this point as we are returning
1405        # the query, not the results.
1406        self.seen.update(self._next_query)
1407        return self._next_query
1408
1409    next = __next__
1410
1411    def next_with_ghosts(self):
1412        """Return the next found ancestors, with ghosts split out.
1413
1414        Ancestors are returned in the order they are seen in a breadth-first
1415        traversal.  No ancestor will be returned more than once. Ancestors are
1416        returned only after asking for their parents, which allows us to detect
1417        which revisions are ghosts and which are not.
1418
1419        :return: A tuple with (present ancestors, ghost ancestors) sets.
1420        """
1421        if self._returning != 'next_with_ghosts':
1422            # switch to returning the results, not the current query.
1423            self._returning = 'next_with_ghosts'
1424            self._advance()
1425        if len(self._next_query) == 0:
1426            raise StopIteration()
1427        self._advance()
1428        return self._current_present, self._current_ghosts
1429
1430    def _advance(self):
1431        """Advance the search.
1432
1433        Updates self.seen, self._next_query, self._current_present,
1434        self._current_ghosts, self._current_parents and self._iterations.
1435        """
1436        self._iterations += 1
1437        found, ghosts, next, parents = self._do_query(self._next_query)
1438        self._current_present = found
1439        self._current_ghosts = ghosts
1440        self._next_query = next
1441        self._current_parents = parents
1442        # ghosts are implicit stop points, otherwise the search cannot be
1443        # repeated when ghosts are filled.
1444        self._stopped_keys.update(ghosts)
1445
1446    def _do_query(self, revisions):
1447        """Query for revisions.
1448
1449        Adds revisions to the seen set.
1450
1451        :param revisions: Revisions to query.
1452        :return: A tuple: (set(found_revisions), set(ghost_revisions),
1453           set(parents_of_found_revisions), dict(found_revisions:parents)).
1454        """
1455        found_revisions = set()
1456        parents_of_found = set()
1457        # revisions may contain nodes that point to other nodes in revisions:
1458        # we want to filter them out.
1459        seen = self.seen
1460        seen.update(revisions)
1461        parent_map = self._parents_provider.get_parent_map(revisions)
1462        found_revisions.update(parent_map)
1463        for rev_id, parents in parent_map.items():
1464            if parents is None:
1465                continue
1466            new_found_parents = [p for p in parents if p not in seen]
1467            if new_found_parents:
1468                # Calling set.update() with an empty generator is actually
1469                # rather expensive.
1470                parents_of_found.update(new_found_parents)
1471        ghost_revisions = revisions - found_revisions
1472        return found_revisions, ghost_revisions, parents_of_found, parent_map
1473
1474    def __iter__(self):
1475        return self
1476
1477    def find_seen_ancestors(self, revisions):
1478        """Find ancestors of these revisions that have already been seen.
1479
1480        This function generally makes the assumption that querying for the
1481        parents of a node that has already been queried is reasonably cheap.
1482        (eg, not a round trip to a remote host).
1483        """
1484        # TODO: Often we might ask one searcher for its seen ancestors, and
1485        #       then ask another searcher the same question. This can result in
1486        #       searching the same revisions repeatedly if the two searchers
1487        #       have a lot of overlap.
1488        all_seen = self.seen
1489        pending = set(revisions).intersection(all_seen)
1490        seen_ancestors = set(pending)
1491
1492        if self._returning == 'next':
1493            # self.seen contains what nodes have been returned, not what nodes
1494            # have been queried. We don't want to probe for nodes that haven't
1495            # been searched yet.
1496            not_searched_yet = self._next_query
1497        else:
1498            not_searched_yet = ()
1499        pending.difference_update(not_searched_yet)
1500        get_parent_map = self._parents_provider.get_parent_map
1501        while pending:
1502            parent_map = get_parent_map(pending)
1503            all_parents = []
1504            # We don't care if it is a ghost, since it can't be seen if it is
1505            # a ghost
1506            for parent_ids in parent_map.values():
1507                all_parents.extend(parent_ids)
1508            next_pending = all_seen.intersection(
1509                all_parents).difference(seen_ancestors)
1510            seen_ancestors.update(next_pending)
1511            next_pending.difference_update(not_searched_yet)
1512            pending = next_pending
1513
1514        return seen_ancestors
1515
1516    def stop_searching_any(self, revisions):
1517        """
1518        Remove any of the specified revisions from the search list.
1519
1520        None of the specified revisions are required to be present in the
1521        search list.
1522
1523        It is okay to call stop_searching_any() for revisions which were seen
1524        in previous iterations. It is the callers responsibility to call
1525        find_seen_ancestors() to make sure that current search tips that are
1526        ancestors of those revisions are also stopped.  All explicitly stopped
1527        revisions will be excluded from the search result's get_keys(), though.
1528        """
1529        # TODO: does this help performance?
1530        # if not revisions:
1531        #     return set()
1532        revisions = frozenset(revisions)
1533        if self._returning == 'next':
1534            stopped = self._next_query.intersection(revisions)
1535            self._next_query = self._next_query.difference(revisions)
1536        else:
1537            stopped_present = self._current_present.intersection(revisions)
1538            stopped = stopped_present.union(
1539                self._current_ghosts.intersection(revisions))
1540            self._current_present.difference_update(stopped)
1541            self._current_ghosts.difference_update(stopped)
1542            # stopping 'x' should stop returning parents of 'x', but
1543            # not if 'y' always references those same parents
1544            stop_rev_references = {}
1545            for rev in stopped_present:
1546                for parent_id in self._current_parents[rev]:
1547                    if parent_id not in stop_rev_references:
1548                        stop_rev_references[parent_id] = 0
1549                    stop_rev_references[parent_id] += 1
1550            # if only the stopped revisions reference it, the ref count will be
1551            # 0 after this loop
1552            for parents in self._current_parents.values():
1553                for parent_id in parents:
1554                    try:
1555                        stop_rev_references[parent_id] -= 1
1556                    except KeyError:
1557                        pass
1558            stop_parents = set()
1559            for rev_id, refs in stop_rev_references.items():
1560                if refs == 0:
1561                    stop_parents.add(rev_id)
1562            self._next_query.difference_update(stop_parents)
1563        self._stopped_keys.update(stopped)
1564        self._stopped_keys.update(revisions)
1565        return stopped
1566
1567    def start_searching(self, revisions):
1568        """Add revisions to the search.
1569
1570        The parents of revisions will be returned from the next call to next()
1571        or next_with_ghosts(). If next_with_ghosts was the most recently used
1572        next* call then the return value is the result of looking up the
1573        ghost/not ghost status of revisions. (A tuple (present, ghosted)).
1574        """
1575        revisions = frozenset(revisions)
1576        self._started_keys.update(revisions)
1577        new_revisions = revisions.difference(self.seen)
1578        if self._returning == 'next':
1579            self._next_query.update(new_revisions)
1580            self.seen.update(new_revisions)
1581        else:
1582            # perform a query on revisions
1583            revs, ghosts, query, parents = self._do_query(revisions)
1584            self._stopped_keys.update(ghosts)
1585            self._current_present.update(revs)
1586            self._current_ghosts.update(ghosts)
1587            self._next_query.update(query)
1588            self._current_parents.update(parents)
1589            return revs, ghosts
1590
1591
1592def invert_parent_map(parent_map):
1593    """Given a map from child => parents, create a map of parent=>children"""
1594    child_map = {}
1595    for child, parents in parent_map.items():
1596        for p in parents:
1597            # Any given parent is likely to have only a small handful
1598            # of children, many will have only one. So we avoid mem overhead of
1599            # a list, in exchange for extra copying of tuples
1600            if p not in child_map:
1601                child_map[p] = (child,)
1602            else:
1603                child_map[p] = child_map[p] + (child,)
1604    return child_map
1605
1606
1607def collapse_linear_regions(parent_map):
1608    """Collapse regions of the graph that are 'linear'.
1609
1610    For example::
1611
1612      A:[B], B:[C]
1613
1614    can be collapsed by removing B and getting::
1615
1616      A:[C]
1617
1618    :param parent_map: A dictionary mapping children to their parents
1619    :return: Another dictionary with 'linear' chains collapsed
1620    """
1621    # Note: this isn't a strictly minimal collapse. For example:
1622    #   A
1623    #  / \
1624    # B   C
1625    #  \ /
1626    #   D
1627    #   |
1628    #   E
1629    # Will not have 'D' removed, even though 'E' could fit. Also:
1630    #   A
1631    #   |    A
1632    #   B => |
1633    #   |    C
1634    #   C
1635    # A and C are both kept because they are edges of the graph. We *could* get
1636    # rid of A if we wanted.
1637    #   A
1638    #  / \
1639    # B   C
1640    # |   |
1641    # D   E
1642    #  \ /
1643    #   F
1644    # Will not have any nodes removed, even though you do have an
1645    # 'uninteresting' linear D->B and E->C
1646    children = {}
1647    for child, parents in parent_map.items():
1648        children.setdefault(child, [])
1649        for p in parents:
1650            children.setdefault(p, []).append(child)
1651
1652    removed = set()
1653    result = dict(parent_map)
1654    for node in parent_map:
1655        parents = result[node]
1656        if len(parents) == 1:
1657            parent_children = children[parents[0]]
1658            if len(parent_children) != 1:
1659                # This is not the only child
1660                continue
1661            node_children = children[node]
1662            if len(node_children) != 1:
1663                continue
1664            child_parents = result.get(node_children[0], None)
1665            if len(child_parents) != 1:
1666                # This is not its only parent
1667                continue
1668            # The child of this node only points at it, and the parent only has
1669            # this as a child. remove this node, and join the others together
1670            result[node_children[0]] = parents
1671            children[parents[0]] = node_children
1672            del result[node]
1673            del children[node]
1674            removed.add(node)
1675
1676    return result
1677
1678
1679class GraphThunkIdsToKeys(object):
1680    """Forwards calls about 'ids' to be about keys internally."""
1681
1682    def __init__(self, graph):
1683        self._graph = graph
1684
1685    def topo_sort(self):
1686        return [r for (r,) in self._graph.topo_sort()]
1687
1688    def heads(self, ids):
1689        """See Graph.heads()"""
1690        as_keys = [(i,) for i in ids]
1691        head_keys = self._graph.heads(as_keys)
1692        return {h[0] for h in head_keys}
1693
1694    def merge_sort(self, tip_revision):
1695        nodes = self._graph.merge_sort((tip_revision,))
1696        for node in nodes:
1697            node.key = node.key[0]
1698        return nodes
1699
1700    def add_node(self, revision, parents):
1701        self._graph.add_node((revision,), [(p,) for p in parents])
1702
1703
1704_counters = [0, 0, 0, 0, 0, 0, 0]
1705try:
1706    from ._known_graph_pyx import KnownGraph
1707except ImportError as e:
1708    osutils.failed_to_load_extension(e)
1709    from ._known_graph_py import KnownGraph  # noqa: F401
1710