1# dagop.py - graph ancestry and topology algorithm for revset
2#
3# Copyright 2010 Olivia Mackall <olivia@selenic.com>
4#
5# This software may be used and distributed according to the terms of the
6# GNU General Public License version 2 or any later version.
7
8from __future__ import absolute_import
9
10import heapq
11
12from .node import nullrev
13from .thirdparty import attr
14from .node import nullrev
15from . import (
16    error,
17    mdiff,
18    patch,
19    pycompat,
20    scmutil,
21    smartset,
22)
23
24baseset = smartset.baseset
25generatorset = smartset.generatorset
26
27# possible maximum depth between null and wdir()
28maxlogdepth = 0x80000000
29
30
31def _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse):
32    """Walk DAG using 'pfunc' from the given 'revs' nodes
33
34    'pfunc(rev)' should return the parent/child revisions of the given 'rev'
35    if 'reverse' is True/False respectively.
36
37    Scan ends at the stopdepth (exlusive) if specified. Revisions found
38    earlier than the startdepth are omitted.
39    """
40    if startdepth is None:
41        startdepth = 0
42    if stopdepth is None:
43        stopdepth = maxlogdepth
44    if stopdepth == 0:
45        return
46    if stopdepth < 0:
47        raise error.ProgrammingError(b'negative stopdepth')
48    if reverse:
49        heapsign = -1  # max heap
50    else:
51        heapsign = +1  # min heap
52
53    # load input revs lazily to heap so earlier revisions can be yielded
54    # without fully computing the input revs
55    revs.sort(reverse)
56    irevs = iter(revs)
57    pendingheap = []  # [(heapsign * rev, depth), ...] (i.e. lower depth first)
58
59    inputrev = next(irevs, None)
60    if inputrev is not None:
61        heapq.heappush(pendingheap, (heapsign * inputrev, 0))
62
63    lastrev = None
64    while pendingheap:
65        currev, curdepth = heapq.heappop(pendingheap)
66        currev = heapsign * currev
67        if currev == inputrev:
68            inputrev = next(irevs, None)
69            if inputrev is not None:
70                heapq.heappush(pendingheap, (heapsign * inputrev, 0))
71        # rescan parents until curdepth >= startdepth because queued entries
72        # of the same revision are iterated from the lowest depth
73        foundnew = currev != lastrev
74        if foundnew and curdepth >= startdepth:
75            lastrev = currev
76            yield currev
77        pdepth = curdepth + 1
78        if foundnew and pdepth < stopdepth:
79            for prev in pfunc(currev):
80                if prev != nullrev:
81                    heapq.heappush(pendingheap, (heapsign * prev, pdepth))
82
83
84def filectxancestors(fctxs, followfirst=False):
85    """Like filectx.ancestors(), but can walk from multiple files/revisions,
86    and includes the given fctxs themselves
87
88    Yields (rev, {fctx, ...}) pairs in descending order.
89    """
90    visit = {}
91    visitheap = []
92
93    def addvisit(fctx):
94        rev = scmutil.intrev(fctx)
95        if rev not in visit:
96            visit[rev] = set()
97            heapq.heappush(visitheap, -rev)  # max heap
98        visit[rev].add(fctx)
99
100    if followfirst:
101        cut = 1
102    else:
103        cut = None
104
105    for c in fctxs:
106        addvisit(c)
107    while visit:
108        currev = -(heapq.heappop(visitheap))
109        curfctxs = visit.pop(currev)
110        yield currev, curfctxs
111        for c in curfctxs:
112            for parent in c.parents()[:cut]:
113                addvisit(parent)
114    assert not visitheap
115
116
117def filerevancestors(fctxs, followfirst=False):
118    """Like filectx.ancestors(), but can walk from multiple files/revisions,
119    and includes the given fctxs themselves
120
121    Returns a smartset.
122    """
123    gen = (rev for rev, _cs in filectxancestors(fctxs, followfirst))
124    return generatorset(gen, iterasc=False)
125
126
127def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc):
128    if followfirst:
129        cut = 1
130    else:
131        cut = None
132    cl = repo.changelog
133
134    def plainpfunc(rev):
135        try:
136            return cl.parentrevs(rev)[:cut]
137        except error.WdirUnsupported:
138            return (pctx.rev() for pctx in repo[rev].parents()[:cut])
139
140    if cutfunc is None:
141        pfunc = plainpfunc
142    else:
143        pfunc = lambda rev: [r for r in plainpfunc(rev) if not cutfunc(r)]
144        revs = revs.filter(lambda rev: not cutfunc(rev))
145    return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=True)
146
147
148def revancestors(
149    repo, revs, followfirst=False, startdepth=None, stopdepth=None, cutfunc=None
150):
151    r"""Like revlog.ancestors(), but supports additional options, includes
152    the given revs themselves, and returns a smartset
153
154    Scan ends at the stopdepth (exlusive) if specified. Revisions found
155    earlier than the startdepth are omitted.
156
157    If cutfunc is provided, it will be used to cut the traversal of the DAG.
158    When cutfunc(X) returns True, the DAG traversal stops - revision X and
159    X's ancestors in the traversal path will be skipped. This could be an
160    optimization sometimes.
161
162    Note: if Y is an ancestor of X, cutfunc(X) returning True does not
163    necessarily mean Y will also be cut. Usually cutfunc(Y) also wants to
164    return True in this case. For example,
165
166        D     # revancestors(repo, D, cutfunc=lambda rev: rev == B)
167        |\    # will include "A", because the path D -> C -> A was not cut.
168        B C   # If "B" gets cut, "A" might want to be cut too.
169        |/
170        A
171    """
172    gen = _genrevancestors(
173        repo, revs, followfirst, startdepth, stopdepth, cutfunc
174    )
175    return generatorset(gen, iterasc=False)
176
177
178def _genrevdescendants(repo, revs, followfirst):
179    if followfirst:
180        cut = 1
181    else:
182        cut = None
183
184    cl = repo.changelog
185    first = revs.min()
186    if first == nullrev:
187        # Are there nodes with a null first parent and a non-null
188        # second one? Maybe. Do we care? Probably not.
189        yield first
190        for i in cl:
191            yield i
192    else:
193        seen = set(revs)
194        for i in cl.revs(first):
195            if i in seen:
196                yield i
197                continue
198            for x in cl.parentrevs(i)[:cut]:
199                if x != nullrev and x in seen:
200                    seen.add(i)
201                    yield i
202                    break
203
204
205def _builddescendantsmap(repo, startrev, followfirst):
206    """Build map of 'rev -> child revs', offset from startrev"""
207    cl = repo.changelog
208    descmap = [[] for _rev in pycompat.xrange(startrev, len(cl))]
209    for currev in cl.revs(startrev + 1):
210        p1rev, p2rev = cl.parentrevs(currev)
211        if p1rev >= startrev:
212            descmap[p1rev - startrev].append(currev)
213        if not followfirst and p2rev != nullrev and p2rev >= startrev:
214            descmap[p2rev - startrev].append(currev)
215    return descmap
216
217
218def _genrevdescendantsofdepth(repo, revs, followfirst, startdepth, stopdepth):
219    startrev = revs.min()
220    descmap = _builddescendantsmap(repo, startrev, followfirst)
221
222    def pfunc(rev):
223        return descmap[rev - startrev]
224
225    return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=False)
226
227
228def revdescendants(repo, revs, followfirst, startdepth=None, stopdepth=None):
229    """Like revlog.descendants() but supports additional options, includes
230    the given revs themselves, and returns a smartset
231
232    Scan ends at the stopdepth (exlusive) if specified. Revisions found
233    earlier than the startdepth are omitted.
234    """
235    if startdepth is None and (stopdepth is None or stopdepth >= maxlogdepth):
236        gen = _genrevdescendants(repo, revs, followfirst)
237    else:
238        gen = _genrevdescendantsofdepth(
239            repo, revs, followfirst, startdepth, stopdepth
240        )
241    return generatorset(gen, iterasc=True)
242
243
244def descendantrevs(revs, revsfn, parentrevsfn):
245    """Generate revision number descendants in revision order.
246
247    Yields revision numbers starting with a child of some rev in
248    ``revs``. Results are ordered by revision number and are
249    therefore topological. Each revision is not considered a descendant
250    of itself.
251
252    ``revsfn`` is a callable that with no argument iterates over all
253    revision numbers and with a ``start`` argument iterates over revision
254    numbers beginning with that value.
255
256    ``parentrevsfn`` is a callable that receives a revision number and
257    returns an iterable of parent revision numbers, whose values may include
258    nullrev.
259    """
260    first = min(revs)
261
262    if first == nullrev:
263        for rev in revsfn():
264            yield rev
265        return
266
267    seen = set(revs)
268    for rev in revsfn(start=first + 1):
269        for prev in parentrevsfn(rev):
270            if prev != nullrev and prev in seen:
271                seen.add(rev)
272                yield rev
273                break
274
275
276class subsetparentswalker(object):
277    r"""Scan adjacent ancestors in the graph given by the subset
278
279    This computes parent-child relations in the sub graph filtered by
280    a revset. Primary use case is to draw a revisions graph.
281
282    In the following example, we consider that the node 'f' has edges to all
283    ancestor nodes, but redundant paths are eliminated. The edge 'f'->'b'
284    is eliminated because there is a path 'f'->'c'->'b' for example.
285
286          - d - e -
287         /         \
288        a - b - c - f
289
290    If the node 'c' is filtered out, the edge 'f'->'b' is activated.
291
292          - d - e -
293         /         \
294        a - b -(c)- f
295
296    Likewise, if 'd' and 'e' are filtered out, this edge is fully eliminated
297    since there is a path 'f'->'c'->'b'->'a' for 'f'->'a'.
298
299           (d) (e)
300
301        a - b - c - f
302
303    Implementation-wise, 'f' is passed down to 'a' as unresolved through the
304    'f'->'e'->'d'->'a' path, whereas we do also remember that 'f' has already
305    been resolved while walking down the 'f'->'c'->'b'->'a' path. When
306    processing the node 'a', the unresolved 'f'->'a' path is eliminated as
307    the 'f' end is marked as resolved.
308
309    Ancestors are searched from the tipmost revision in the subset so the
310    results can be cached. You should specify startrev to narrow the search
311    space to ':startrev'.
312    """
313
314    def __init__(self, repo, subset, startrev=None):
315        if startrev is not None:
316            subset = repo.revs(b'%d:null', startrev) & subset
317
318        # equivalent to 'subset = subset.sorted(reverse=True)', but there's
319        # no such function.
320        fastdesc = subset.fastdesc
321        if fastdesc:
322            desciter = fastdesc()
323        else:
324            if not subset.isdescending() and not subset.istopo():
325                subset = smartset.baseset(subset)
326                subset.sort(reverse=True)
327            desciter = iter(subset)
328
329        self._repo = repo
330        self._changelog = repo.changelog
331        self._subset = subset
332
333        # scanning state (see _scanparents):
334        self._tovisit = []
335        self._pendingcnt = {}
336        self._pointers = {}
337        self._parents = {}
338        self._inputhead = nullrev  # reassigned by self._advanceinput()
339        self._inputtail = desciter
340        self._bottomrev = nullrev
341        self._advanceinput()
342
343    def parentsset(self, rev):
344        """Look up parents of the given revision in the subset, and returns
345        as a smartset"""
346        return smartset.baseset(self.parents(rev))
347
348    def parents(self, rev):
349        """Look up parents of the given revision in the subset
350
351        The returned revisions are sorted by parent index (p1/p2).
352        """
353        self._scanparents(rev)
354        return [r for _c, r in sorted(self._parents.get(rev, []))]
355
356    def _parentrevs(self, rev):
357        try:
358            revs = self._changelog.parentrevs(rev)
359            if revs[-1] == nullrev:
360                return revs[:-1]
361            return revs
362        except error.WdirUnsupported:
363            return tuple(pctx.rev() for pctx in self._repo[None].parents())
364
365    def _advanceinput(self):
366        """Advance the input iterator and set the next revision to _inputhead"""
367        if self._inputhead < nullrev:
368            return
369        try:
370            self._inputhead = next(self._inputtail)
371        except StopIteration:
372            self._bottomrev = self._inputhead
373            self._inputhead = nullrev - 1
374
375    def _scanparents(self, stoprev):
376        """Scan ancestors until the parents of the specified stoprev are
377        resolved"""
378
379        # 'tovisit' is the queue of the input revisions and their ancestors.
380        # It will be populated incrementally to minimize the initial cost
381        # of computing the given subset.
382        #
383        # For to-visit revisions, we keep track of
384        # - the number of the unresolved paths: pendingcnt[rev],
385        # - dict of the unresolved descendants and chains: pointers[rev][0],
386        # - set of the already resolved descendants: pointers[rev][1].
387        #
388        # When a revision is visited, 'pointers[rev]' should be popped and
389        # propagated to its parents accordingly.
390        #
391        # Once all pending paths have been resolved, 'pendingcnt[rev]' becomes
392        # 0 and 'parents[rev]' contains the unsorted list of parent revisions
393        # and p1/p2 chains (excluding linear paths.) The p1/p2 chains will be
394        # used as a sort key preferring p1. 'len(chain)' should be the number
395        # of merges between two revisions.
396
397        subset = self._subset
398        tovisit = self._tovisit  # heap queue of [-rev]
399        pendingcnt = self._pendingcnt  # {rev: count} for visited revisions
400        pointers = self._pointers  # {rev: [{unresolved_rev: chain}, resolved]}
401        parents = self._parents  # {rev: [(chain, rev)]}
402
403        while tovisit or self._inputhead >= nullrev:
404            if pendingcnt.get(stoprev) == 0:
405                return
406
407            # feed greater revisions from input set to queue
408            if not tovisit:
409                heapq.heappush(tovisit, -self._inputhead)
410                self._advanceinput()
411            while self._inputhead >= -tovisit[0]:
412                heapq.heappush(tovisit, -self._inputhead)
413                self._advanceinput()
414
415            rev = -heapq.heappop(tovisit)
416            if rev < self._bottomrev:
417                return
418            if rev in pendingcnt and rev not in pointers:
419                continue  # already visited
420
421            curactive = rev in subset
422            pendingcnt.setdefault(rev, 0)  # mark as visited
423            if curactive:
424                assert rev not in parents
425                parents[rev] = []
426            unresolved, resolved = pointers.pop(rev, ({}, set()))
427
428            if curactive:
429                # reached to active rev, resolve pending descendants' parents
430                for r, c in unresolved.items():
431                    pendingcnt[r] -= 1
432                    assert pendingcnt[r] >= 0
433                    if r in resolved:
434                        continue  # eliminate redundant path
435                    parents[r].append((c, rev))
436                    # mark the descendant 'r' as resolved through this path if
437                    # there are still pending pointers. the 'resolved' set may
438                    # be concatenated later at a fork revision.
439                    if pendingcnt[r] > 0:
440                        resolved.add(r)
441                unresolved.clear()
442                # occasionally clean resolved markers. otherwise the set
443                # would grow indefinitely.
444                resolved = {r for r in resolved if pendingcnt[r] > 0}
445
446            parentrevs = self._parentrevs(rev)
447            bothparentsactive = all(p in subset for p in parentrevs)
448
449            # set up or propagate tracking pointers if
450            # - one of the parents is not active,
451            # - or descendants' parents are unresolved.
452            if not bothparentsactive or unresolved or resolved:
453                if len(parentrevs) <= 1:
454                    # can avoid copying the tracking pointer
455                    parentpointers = [(unresolved, resolved)]
456                else:
457                    parentpointers = [
458                        (unresolved, resolved),
459                        (unresolved.copy(), resolved.copy()),
460                    ]
461                    # 'rev' is a merge revision. increment the pending count
462                    # as the 'unresolved' dict will be duplicated, and append
463                    # p1/p2 code to the existing chains.
464                    for r in unresolved:
465                        pendingcnt[r] += 1
466                        parentpointers[0][0][r] += b'1'
467                        parentpointers[1][0][r] += b'2'
468                for i, p in enumerate(parentrevs):
469                    assert p < rev
470                    heapq.heappush(tovisit, -p)
471                    if p in pointers:
472                        # 'p' is a fork revision. concatenate tracking pointers
473                        # and decrement the pending count accordingly.
474                        knownunresolved, knownresolved = pointers[p]
475                        unresolved, resolved = parentpointers[i]
476                        for r, c in unresolved.items():
477                            if r in knownunresolved:
478                                # unresolved at both paths
479                                pendingcnt[r] -= 1
480                                assert pendingcnt[r] > 0
481                                # take shorter chain
482                                knownunresolved[r] = min(c, knownunresolved[r])
483                            else:
484                                knownunresolved[r] = c
485                        # simply propagate the 'resolved' set as deduplicating
486                        # 'unresolved' here would be slightly complicated.
487                        knownresolved.update(resolved)
488                    else:
489                        pointers[p] = parentpointers[i]
490
491            # then, populate the active parents directly and add the current
492            # 'rev' to the tracking pointers of the inactive parents.
493            # 'pointers[p]' may be optimized out if both parents are active.
494            chaincodes = [b''] if len(parentrevs) <= 1 else [b'1', b'2']
495            if curactive and bothparentsactive:
496                for i, p in enumerate(parentrevs):
497                    c = chaincodes[i]
498                    parents[rev].append((c, p))
499                    # no need to mark 'rev' as resolved since the 'rev' should
500                    # be fully resolved (i.e. pendingcnt[rev] == 0)
501                assert pendingcnt[rev] == 0
502            elif curactive:
503                for i, p in enumerate(parentrevs):
504                    unresolved, resolved = pointers[p]
505                    assert rev not in unresolved
506                    c = chaincodes[i]
507                    if p in subset:
508                        parents[rev].append((c, p))
509                        # mark 'rev' as resolved through this path
510                        resolved.add(rev)
511                    else:
512                        pendingcnt[rev] += 1
513                        unresolved[rev] = c
514                assert 0 < pendingcnt[rev] <= 2
515
516
517def _reachablerootspure(pfunc, minroot, roots, heads, includepath):
518    """See revlog.reachableroots"""
519    if not roots:
520        return []
521    roots = set(roots)
522    visit = list(heads)
523    reachable = set()
524    seen = {}
525    # prefetch all the things! (because python is slow)
526    reached = reachable.add
527    dovisit = visit.append
528    nextvisit = visit.pop
529    # open-code the post-order traversal due to the tiny size of
530    # sys.getrecursionlimit()
531    while visit:
532        rev = nextvisit()
533        if rev in roots:
534            reached(rev)
535            if not includepath:
536                continue
537        parents = pfunc(rev)
538        seen[rev] = parents
539        for parent in parents:
540            if parent >= minroot and parent not in seen:
541                dovisit(parent)
542    if not reachable:
543        return baseset()
544    if not includepath:
545        return reachable
546    for rev in sorted(seen):
547        for parent in seen[rev]:
548            if parent in reachable:
549                reached(rev)
550    return reachable
551
552
553def reachableroots(repo, roots, heads, includepath=False):
554    """See revlog.reachableroots"""
555    if not roots:
556        return baseset()
557    minroot = roots.min()
558    roots = list(roots)
559    heads = list(heads)
560    revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
561    revs = baseset(revs)
562    revs.sort()
563    return revs
564
565
566def _changesrange(fctx1, fctx2, linerange2, diffopts):
567    """Return `(diffinrange, linerange1)` where `diffinrange` is True
568    if diff from fctx2 to fctx1 has changes in linerange2 and
569    `linerange1` is the new line range for fctx1.
570    """
571    blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts)
572    filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2)
573    diffinrange = any(stype == b'!' for _, stype in filteredblocks)
574    return diffinrange, linerange1
575
576
577def blockancestors(fctx, fromline, toline, followfirst=False):
578    """Yield ancestors of `fctx` with respect to the block of lines within
579    `fromline`-`toline` range.
580    """
581    diffopts = patch.diffopts(fctx._repo.ui)
582    fctx = fctx.introfilectx()
583    visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))}
584    while visit:
585        c, linerange2 = visit.pop(max(visit))
586        pl = c.parents()
587        if followfirst:
588            pl = pl[:1]
589        if not pl:
590            # The block originates from the initial revision.
591            yield c, linerange2
592            continue
593        inrange = False
594        for p in pl:
595            inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts)
596            inrange = inrange or inrangep
597            if linerange1[0] == linerange1[1]:
598                # Parent's linerange is empty, meaning that the block got
599                # introduced in this revision; no need to go futher in this
600                # branch.
601                continue
602            # Set _descendantrev with 'c' (a known descendant) so that, when
603            # _adjustlinkrev is called for 'p', it receives this descendant
604            # (as srcrev) instead possibly topmost introrev.
605            p._descendantrev = c.rev()
606            visit[p.linkrev(), p.filenode()] = p, linerange1
607        if inrange:
608            yield c, linerange2
609
610
611def blockdescendants(fctx, fromline, toline):
612    """Yield descendants of `fctx` with respect to the block of lines within
613    `fromline`-`toline` range.
614    """
615    # First possibly yield 'fctx' if it has changes in range with respect to
616    # its parents.
617    try:
618        c, linerange1 = next(blockancestors(fctx, fromline, toline))
619    except StopIteration:
620        pass
621    else:
622        if c == fctx:
623            yield c, linerange1
624
625    diffopts = patch.diffopts(fctx._repo.ui)
626    fl = fctx.filelog()
627    seen = {fctx.filerev(): (fctx, (fromline, toline))}
628    for i in fl.descendants([fctx.filerev()]):
629        c = fctx.filectx(i)
630        inrange = False
631        for x in fl.parentrevs(i):
632            try:
633                p, linerange2 = seen[x]
634            except KeyError:
635                # nullrev or other branch
636                continue
637            inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts)
638            inrange = inrange or inrangep
639            # If revision 'i' has been seen (it's a merge) and the line range
640            # previously computed differs from the one we just got, we take the
641            # surrounding interval. This is conservative but avoids loosing
642            # information.
643            if i in seen and seen[i][1] != linerange1:
644                lbs, ubs = zip(linerange1, seen[i][1])
645                linerange1 = min(lbs), max(ubs)
646            seen[i] = c, linerange1
647        if inrange:
648            yield c, linerange1
649
650
651@attr.s(slots=True, frozen=True)
652class annotateline(object):
653    fctx = attr.ib()
654    lineno = attr.ib()
655    # Whether this annotation was the result of a skip-annotate.
656    skip = attr.ib(default=False)
657    text = attr.ib(default=None)
658
659
660@attr.s(slots=True, frozen=True)
661class _annotatedfile(object):
662    # list indexed by lineno - 1
663    fctxs = attr.ib()
664    linenos = attr.ib()
665    skips = attr.ib()
666    # full file content
667    text = attr.ib()
668
669
670def _countlines(text):
671    if text.endswith(b"\n"):
672        return text.count(b"\n")
673    return text.count(b"\n") + int(bool(text))
674
675
676def _decoratelines(text, fctx):
677    n = _countlines(text)
678    linenos = pycompat.rangelist(1, n + 1)
679    return _annotatedfile([fctx] * n, linenos, [False] * n, text)
680
681
682def _annotatepair(parents, childfctx, child, skipchild, diffopts):
683    r"""
684    Given parent and child fctxes and annotate data for parents, for all lines
685    in either parent that match the child, annotate the child with the parent's
686    data.
687
688    Additionally, if `skipchild` is True, replace all other lines with parent
689    annotate data as well such that child is never blamed for any lines.
690
691    See test-annotate.py for unit tests.
692    """
693    pblocks = [
694        (parent, mdiff.allblocks(parent.text, child.text, opts=diffopts))
695        for parent in parents
696    ]
697
698    if skipchild:
699        # Need to iterate over the blocks twice -- make it a list
700        pblocks = [(p, list(blocks)) for (p, blocks) in pblocks]
701    # Mercurial currently prefers p2 over p1 for annotate.
702    # TODO: change this?
703    for parent, blocks in pblocks:
704        for (a1, a2, b1, b2), t in blocks:
705            # Changed blocks ('!') or blocks made only of blank lines ('~')
706            # belong to the child.
707            if t == b'=':
708                child.fctxs[b1:b2] = parent.fctxs[a1:a2]
709                child.linenos[b1:b2] = parent.linenos[a1:a2]
710                child.skips[b1:b2] = parent.skips[a1:a2]
711
712    if skipchild:
713        # Now try and match up anything that couldn't be matched,
714        # Reversing pblocks maintains bias towards p2, matching above
715        # behavior.
716        pblocks.reverse()
717
718        # The heuristics are:
719        # * Work on blocks of changed lines (effectively diff hunks with -U0).
720        # This could potentially be smarter but works well enough.
721        # * For a non-matching section, do a best-effort fit. Match lines in
722        #   diff hunks 1:1, dropping lines as necessary.
723        # * Repeat the last line as a last resort.
724
725        # First, replace as much as possible without repeating the last line.
726        remaining = [(parent, []) for parent, _blocks in pblocks]
727        for idx, (parent, blocks) in enumerate(pblocks):
728            for (a1, a2, b1, b2), _t in blocks:
729                if a2 - a1 >= b2 - b1:
730                    for bk in pycompat.xrange(b1, b2):
731                        if child.fctxs[bk] == childfctx:
732                            ak = min(a1 + (bk - b1), a2 - 1)
733                            child.fctxs[bk] = parent.fctxs[ak]
734                            child.linenos[bk] = parent.linenos[ak]
735                            child.skips[bk] = True
736                else:
737                    remaining[idx][1].append((a1, a2, b1, b2))
738
739        # Then, look at anything left, which might involve repeating the last
740        # line.
741        for parent, blocks in remaining:
742            for a1, a2, b1, b2 in blocks:
743                for bk in pycompat.xrange(b1, b2):
744                    if child.fctxs[bk] == childfctx:
745                        ak = min(a1 + (bk - b1), a2 - 1)
746                        child.fctxs[bk] = parent.fctxs[ak]
747                        child.linenos[bk] = parent.linenos[ak]
748                        child.skips[bk] = True
749    return child
750
751
752def annotate(base, parents, skiprevs=None, diffopts=None):
753    """Core algorithm for filectx.annotate()
754
755    `parents(fctx)` is a function returning a list of parent filectxs.
756    """
757
758    # This algorithm would prefer to be recursive, but Python is a
759    # bit recursion-hostile. Instead we do an iterative
760    # depth-first search.
761
762    # 1st DFS pre-calculates pcache and needed
763    visit = [base]
764    pcache = {}
765    needed = {base: 1}
766    while visit:
767        f = visit.pop()
768        if f in pcache:
769            continue
770        pl = parents(f)
771        pcache[f] = pl
772        for p in pl:
773            needed[p] = needed.get(p, 0) + 1
774            if p not in pcache:
775                visit.append(p)
776
777    # 2nd DFS does the actual annotate
778    visit[:] = [base]
779    hist = {}
780    while visit:
781        f = visit[-1]
782        if f in hist:
783            visit.pop()
784            continue
785
786        ready = True
787        pl = pcache[f]
788        for p in pl:
789            if p not in hist:
790                ready = False
791                visit.append(p)
792        if ready:
793            visit.pop()
794            curr = _decoratelines(f.data(), f)
795            skipchild = False
796            if skiprevs is not None:
797                skipchild = f._changeid in skiprevs
798            curr = _annotatepair(
799                [hist[p] for p in pl], f, curr, skipchild, diffopts
800            )
801            for p in pl:
802                if needed[p] == 1:
803                    del hist[p]
804                    del needed[p]
805                else:
806                    needed[p] -= 1
807
808            hist[f] = curr
809            del pcache[f]
810
811    a = hist[base]
812    return [
813        annotateline(*r)
814        for r in zip(a.fctxs, a.linenos, a.skips, mdiff.splitnewlines(a.text))
815    ]
816
817
818def toposort(revs, parentsfunc, firstbranch=()):
819    """Yield revisions from heads to roots one (topo) branch at a time.
820
821    This function aims to be used by a graph generator that wishes to minimize
822    the number of parallel branches and their interleaving.
823
824    Example iteration order (numbers show the "true" order in a changelog):
825
826      o  4
827      |
828      o  1
829      |
830      | o  3
831      | |
832      | o  2
833      |/
834      o  0
835
836    Note that the ancestors of merges are understood by the current
837    algorithm to be on the same branch. This means no reordering will
838    occur behind a merge.
839    """
840
841    ### Quick summary of the algorithm
842    #
843    # This function is based around a "retention" principle. We keep revisions
844    # in memory until we are ready to emit a whole branch that immediately
845    # "merges" into an existing one. This reduces the number of parallel
846    # branches with interleaved revisions.
847    #
848    # During iteration revs are split into two groups:
849    # A) revision already emitted
850    # B) revision in "retention". They are stored as different subgroups.
851    #
852    # for each REV, we do the following logic:
853    #
854    #   1) if REV is a parent of (A), we will emit it. If there is a
855    #   retention group ((B) above) that is blocked on REV being
856    #   available, we emit all the revisions out of that retention
857    #   group first.
858    #
859    #   2) else, we'll search for a subgroup in (B) awaiting for REV to be
860    #   available, if such subgroup exist, we add REV to it and the subgroup is
861    #   now awaiting for REV.parents() to be available.
862    #
863    #   3) finally if no such group existed in (B), we create a new subgroup.
864    #
865    #
866    # To bootstrap the algorithm, we emit the tipmost revision (which
867    # puts it in group (A) from above).
868
869    revs.sort(reverse=True)
870
871    # Set of parents of revision that have been emitted. They can be considered
872    # unblocked as the graph generator is already aware of them so there is no
873    # need to delay the revisions that reference them.
874    #
875    # If someone wants to prioritize a branch over the others, pre-filling this
876    # set will force all other branches to wait until this branch is ready to be
877    # emitted.
878    unblocked = set(firstbranch)
879
880    # list of groups waiting to be displayed, each group is defined by:
881    #
882    #   (revs:    lists of revs waiting to be displayed,
883    #    blocked: set of that cannot be displayed before those in 'revs')
884    #
885    # The second value ('blocked') correspond to parents of any revision in the
886    # group ('revs') that is not itself contained in the group. The main idea
887    # of this algorithm is to delay as much as possible the emission of any
888    # revision.  This means waiting for the moment we are about to display
889    # these parents to display the revs in a group.
890    #
891    # This first implementation is smart until it encounters a merge: it will
892    # emit revs as soon as any parent is about to be emitted and can grow an
893    # arbitrary number of revs in 'blocked'. In practice this mean we properly
894    # retains new branches but gives up on any special ordering for ancestors
895    # of merges. The implementation can be improved to handle this better.
896    #
897    # The first subgroup is special. It corresponds to all the revision that
898    # were already emitted. The 'revs' lists is expected to be empty and the
899    # 'blocked' set contains the parents revisions of already emitted revision.
900    #
901    # You could pre-seed the <parents> set of groups[0] to a specific
902    # changesets to select what the first emitted branch should be.
903    groups = [([], unblocked)]
904    pendingheap = []
905    pendingset = set()
906
907    heapq.heapify(pendingheap)
908    heappop = heapq.heappop
909    heappush = heapq.heappush
910    for currentrev in revs:
911        # Heap works with smallest element, we want highest so we invert
912        if currentrev not in pendingset:
913            heappush(pendingheap, -currentrev)
914            pendingset.add(currentrev)
915        # iterates on pending rev until after the current rev have been
916        # processed.
917        rev = None
918        while rev != currentrev:
919            rev = -heappop(pendingheap)
920            pendingset.remove(rev)
921
922            # Seek for a subgroup blocked, waiting for the current revision.
923            matching = [i for i, g in enumerate(groups) if rev in g[1]]
924
925            if matching:
926                # The main idea is to gather together all sets that are blocked
927                # on the same revision.
928                #
929                # Groups are merged when a common blocking ancestor is
930                # observed. For example, given two groups:
931                #
932                # revs [5, 4] waiting for 1
933                # revs [3, 2] waiting for 1
934                #
935                # These two groups will be merged when we process
936                # 1. In theory, we could have merged the groups when
937                # we added 2 to the group it is now in (we could have
938                # noticed the groups were both blocked on 1 then), but
939                # the way it works now makes the algorithm simpler.
940                #
941                # We also always keep the oldest subgroup first. We can
942                # probably improve the behavior by having the longest set
943                # first. That way, graph algorithms could minimise the length
944                # of parallel lines their drawing. This is currently not done.
945                targetidx = matching.pop(0)
946                trevs, tparents = groups[targetidx]
947                for i in matching:
948                    gr = groups[i]
949                    trevs.extend(gr[0])
950                    tparents |= gr[1]
951                # delete all merged subgroups (except the one we kept)
952                # (starting from the last subgroup for performance and
953                # sanity reasons)
954                for i in reversed(matching):
955                    del groups[i]
956            else:
957                # This is a new head. We create a new subgroup for it.
958                targetidx = len(groups)
959                groups.append(([], {rev}))
960
961            gr = groups[targetidx]
962
963            # We now add the current nodes to this subgroups. This is done
964            # after the subgroup merging because all elements from a subgroup
965            # that relied on this rev must precede it.
966            #
967            # we also update the <parents> set to include the parents of the
968            # new nodes.
969            if rev == currentrev:  # only display stuff in rev
970                gr[0].append(rev)
971            gr[1].remove(rev)
972            parents = [p for p in parentsfunc(rev) if p > nullrev]
973            gr[1].update(parents)
974            for p in parents:
975                if p not in pendingset:
976                    pendingset.add(p)
977                    heappush(pendingheap, -p)
978
979            # Look for a subgroup to display
980            #
981            # When unblocked is empty (if clause), we were not waiting for any
982            # revisions during the first iteration (if no priority was given) or
983            # if we emitted a whole disconnected set of the graph (reached a
984            # root).  In that case we arbitrarily take the oldest known
985            # subgroup. The heuristic could probably be better.
986            #
987            # Otherwise (elif clause) if the subgroup is blocked on
988            # a revision we just emitted, we can safely emit it as
989            # well.
990            if not unblocked:
991                if len(groups) > 1:  # display other subset
992                    targetidx = 1
993                    gr = groups[1]
994            elif not gr[1] & unblocked:
995                gr = None
996
997            if gr is not None:
998                # update the set of awaited revisions with the one from the
999                # subgroup
1000                unblocked |= gr[1]
1001                # output all revisions in the subgroup
1002                for r in gr[0]:
1003                    yield r
1004                # delete the subgroup that you just output
1005                # unless it is groups[0] in which case you just empty it.
1006                if targetidx:
1007                    del groups[targetidx]
1008                else:
1009                    gr[0][:] = []
1010    # Check if we have some subgroup waiting for revisions we are not going to
1011    # iterate over
1012    for g in groups:
1013        for r in g[0]:
1014            yield r
1015
1016
1017def headrevs(revs, parentsfn):
1018    """Resolve the set of heads from a set of revisions.
1019
1020    Receives an iterable of revision numbers and a callbable that receives a
1021    revision number and returns an iterable of parent revision numbers, possibly
1022    including nullrev.
1023
1024    Returns a set of revision numbers that are DAG heads within the passed
1025    subset.
1026
1027    ``nullrev`` is never included in the returned set, even if it is provided in
1028    the input set.
1029    """
1030    headrevs = set(revs)
1031    parents = {nullrev}
1032    up = parents.update
1033
1034    for rev in revs:
1035        up(parentsfn(rev))
1036    headrevs.difference_update(parents)
1037    return headrevs
1038
1039
1040def headrevssubset(revsfn, parentrevsfn, startrev=None, stoprevs=None):
1041    """Returns the set of all revs that have no children with control.
1042
1043    ``revsfn`` is a callable that with no arguments returns an iterator over
1044    all revision numbers in topological order. With a ``start`` argument, it
1045    returns revision numbers starting at that number.
1046
1047    ``parentrevsfn`` is a callable receiving a revision number and returns an
1048    iterable of parent revision numbers, where values can include nullrev.
1049
1050    ``startrev`` is a revision number at which to start the search.
1051
1052    ``stoprevs`` is an iterable of revision numbers that, when encountered,
1053    will stop DAG traversal beyond them. Parents of revisions in this
1054    collection will be heads.
1055    """
1056    if startrev is None:
1057        startrev = nullrev
1058
1059    stoprevs = set(stoprevs or [])
1060
1061    reachable = {startrev}
1062    heads = {startrev}
1063
1064    for rev in revsfn(start=startrev + 1):
1065        for prev in parentrevsfn(rev):
1066            if prev in reachable:
1067                if rev not in stoprevs:
1068                    reachable.add(rev)
1069                heads.add(rev)
1070
1071            if prev in heads and prev not in stoprevs:
1072                heads.remove(prev)
1073
1074    return heads
1075
1076
1077def linearize(revs, parentsfn):
1078    """Linearize and topologically sort a list of revisions.
1079
1080    The linearization process tries to create long runs of revs where a child
1081    rev comes immediately after its first parent. This is done by visiting the
1082    heads of the revs in inverse topological order, and for each visited rev,
1083    visiting its second parent, then its first parent, then adding the rev
1084    itself to the output list.
1085
1086    Returns a list of revision numbers.
1087    """
1088    visit = list(sorted(headrevs(revs, parentsfn), reverse=True))
1089    finished = set()
1090    result = []
1091
1092    while visit:
1093        rev = visit.pop()
1094        if rev < 0:
1095            rev = -rev - 1
1096
1097            if rev not in finished:
1098                result.append(rev)
1099                finished.add(rev)
1100
1101        else:
1102            visit.append(-rev - 1)
1103
1104            for prev in parentsfn(rev):
1105                if prev == nullrev or prev not in revs or prev in finished:
1106                    continue
1107
1108                visit.append(prev)
1109
1110    assert len(result) == len(revs)
1111
1112    return result
1113