1# dagop.py - graph ancestry and topology algorithm for revset 2# 3# Copyright 2010 Olivia Mackall <olivia@selenic.com> 4# 5# This software may be used and distributed according to the terms of the 6# GNU General Public License version 2 or any later version. 7 8from __future__ import absolute_import 9 10import heapq 11 12from .node import nullrev 13from .thirdparty import attr 14from .node import nullrev 15from . import ( 16 error, 17 mdiff, 18 patch, 19 pycompat, 20 scmutil, 21 smartset, 22) 23 24baseset = smartset.baseset 25generatorset = smartset.generatorset 26 27# possible maximum depth between null and wdir() 28maxlogdepth = 0x80000000 29 30 31def _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse): 32 """Walk DAG using 'pfunc' from the given 'revs' nodes 33 34 'pfunc(rev)' should return the parent/child revisions of the given 'rev' 35 if 'reverse' is True/False respectively. 36 37 Scan ends at the stopdepth (exlusive) if specified. Revisions found 38 earlier than the startdepth are omitted. 39 """ 40 if startdepth is None: 41 startdepth = 0 42 if stopdepth is None: 43 stopdepth = maxlogdepth 44 if stopdepth == 0: 45 return 46 if stopdepth < 0: 47 raise error.ProgrammingError(b'negative stopdepth') 48 if reverse: 49 heapsign = -1 # max heap 50 else: 51 heapsign = +1 # min heap 52 53 # load input revs lazily to heap so earlier revisions can be yielded 54 # without fully computing the input revs 55 revs.sort(reverse) 56 irevs = iter(revs) 57 pendingheap = [] # [(heapsign * rev, depth), ...] (i.e. lower depth first) 58 59 inputrev = next(irevs, None) 60 if inputrev is not None: 61 heapq.heappush(pendingheap, (heapsign * inputrev, 0)) 62 63 lastrev = None 64 while pendingheap: 65 currev, curdepth = heapq.heappop(pendingheap) 66 currev = heapsign * currev 67 if currev == inputrev: 68 inputrev = next(irevs, None) 69 if inputrev is not None: 70 heapq.heappush(pendingheap, (heapsign * inputrev, 0)) 71 # rescan parents until curdepth >= startdepth because queued entries 72 # of the same revision are iterated from the lowest depth 73 foundnew = currev != lastrev 74 if foundnew and curdepth >= startdepth: 75 lastrev = currev 76 yield currev 77 pdepth = curdepth + 1 78 if foundnew and pdepth < stopdepth: 79 for prev in pfunc(currev): 80 if prev != nullrev: 81 heapq.heappush(pendingheap, (heapsign * prev, pdepth)) 82 83 84def filectxancestors(fctxs, followfirst=False): 85 """Like filectx.ancestors(), but can walk from multiple files/revisions, 86 and includes the given fctxs themselves 87 88 Yields (rev, {fctx, ...}) pairs in descending order. 89 """ 90 visit = {} 91 visitheap = [] 92 93 def addvisit(fctx): 94 rev = scmutil.intrev(fctx) 95 if rev not in visit: 96 visit[rev] = set() 97 heapq.heappush(visitheap, -rev) # max heap 98 visit[rev].add(fctx) 99 100 if followfirst: 101 cut = 1 102 else: 103 cut = None 104 105 for c in fctxs: 106 addvisit(c) 107 while visit: 108 currev = -(heapq.heappop(visitheap)) 109 curfctxs = visit.pop(currev) 110 yield currev, curfctxs 111 for c in curfctxs: 112 for parent in c.parents()[:cut]: 113 addvisit(parent) 114 assert not visitheap 115 116 117def filerevancestors(fctxs, followfirst=False): 118 """Like filectx.ancestors(), but can walk from multiple files/revisions, 119 and includes the given fctxs themselves 120 121 Returns a smartset. 122 """ 123 gen = (rev for rev, _cs in filectxancestors(fctxs, followfirst)) 124 return generatorset(gen, iterasc=False) 125 126 127def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc): 128 if followfirst: 129 cut = 1 130 else: 131 cut = None 132 cl = repo.changelog 133 134 def plainpfunc(rev): 135 try: 136 return cl.parentrevs(rev)[:cut] 137 except error.WdirUnsupported: 138 return (pctx.rev() for pctx in repo[rev].parents()[:cut]) 139 140 if cutfunc is None: 141 pfunc = plainpfunc 142 else: 143 pfunc = lambda rev: [r for r in plainpfunc(rev) if not cutfunc(r)] 144 revs = revs.filter(lambda rev: not cutfunc(rev)) 145 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=True) 146 147 148def revancestors( 149 repo, revs, followfirst=False, startdepth=None, stopdepth=None, cutfunc=None 150): 151 r"""Like revlog.ancestors(), but supports additional options, includes 152 the given revs themselves, and returns a smartset 153 154 Scan ends at the stopdepth (exlusive) if specified. Revisions found 155 earlier than the startdepth are omitted. 156 157 If cutfunc is provided, it will be used to cut the traversal of the DAG. 158 When cutfunc(X) returns True, the DAG traversal stops - revision X and 159 X's ancestors in the traversal path will be skipped. This could be an 160 optimization sometimes. 161 162 Note: if Y is an ancestor of X, cutfunc(X) returning True does not 163 necessarily mean Y will also be cut. Usually cutfunc(Y) also wants to 164 return True in this case. For example, 165 166 D # revancestors(repo, D, cutfunc=lambda rev: rev == B) 167 |\ # will include "A", because the path D -> C -> A was not cut. 168 B C # If "B" gets cut, "A" might want to be cut too. 169 |/ 170 A 171 """ 172 gen = _genrevancestors( 173 repo, revs, followfirst, startdepth, stopdepth, cutfunc 174 ) 175 return generatorset(gen, iterasc=False) 176 177 178def _genrevdescendants(repo, revs, followfirst): 179 if followfirst: 180 cut = 1 181 else: 182 cut = None 183 184 cl = repo.changelog 185 first = revs.min() 186 if first == nullrev: 187 # Are there nodes with a null first parent and a non-null 188 # second one? Maybe. Do we care? Probably not. 189 yield first 190 for i in cl: 191 yield i 192 else: 193 seen = set(revs) 194 for i in cl.revs(first): 195 if i in seen: 196 yield i 197 continue 198 for x in cl.parentrevs(i)[:cut]: 199 if x != nullrev and x in seen: 200 seen.add(i) 201 yield i 202 break 203 204 205def _builddescendantsmap(repo, startrev, followfirst): 206 """Build map of 'rev -> child revs', offset from startrev""" 207 cl = repo.changelog 208 descmap = [[] for _rev in pycompat.xrange(startrev, len(cl))] 209 for currev in cl.revs(startrev + 1): 210 p1rev, p2rev = cl.parentrevs(currev) 211 if p1rev >= startrev: 212 descmap[p1rev - startrev].append(currev) 213 if not followfirst and p2rev != nullrev and p2rev >= startrev: 214 descmap[p2rev - startrev].append(currev) 215 return descmap 216 217 218def _genrevdescendantsofdepth(repo, revs, followfirst, startdepth, stopdepth): 219 startrev = revs.min() 220 descmap = _builddescendantsmap(repo, startrev, followfirst) 221 222 def pfunc(rev): 223 return descmap[rev - startrev] 224 225 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=False) 226 227 228def revdescendants(repo, revs, followfirst, startdepth=None, stopdepth=None): 229 """Like revlog.descendants() but supports additional options, includes 230 the given revs themselves, and returns a smartset 231 232 Scan ends at the stopdepth (exlusive) if specified. Revisions found 233 earlier than the startdepth are omitted. 234 """ 235 if startdepth is None and (stopdepth is None or stopdepth >= maxlogdepth): 236 gen = _genrevdescendants(repo, revs, followfirst) 237 else: 238 gen = _genrevdescendantsofdepth( 239 repo, revs, followfirst, startdepth, stopdepth 240 ) 241 return generatorset(gen, iterasc=True) 242 243 244def descendantrevs(revs, revsfn, parentrevsfn): 245 """Generate revision number descendants in revision order. 246 247 Yields revision numbers starting with a child of some rev in 248 ``revs``. Results are ordered by revision number and are 249 therefore topological. Each revision is not considered a descendant 250 of itself. 251 252 ``revsfn`` is a callable that with no argument iterates over all 253 revision numbers and with a ``start`` argument iterates over revision 254 numbers beginning with that value. 255 256 ``parentrevsfn`` is a callable that receives a revision number and 257 returns an iterable of parent revision numbers, whose values may include 258 nullrev. 259 """ 260 first = min(revs) 261 262 if first == nullrev: 263 for rev in revsfn(): 264 yield rev 265 return 266 267 seen = set(revs) 268 for rev in revsfn(start=first + 1): 269 for prev in parentrevsfn(rev): 270 if prev != nullrev and prev in seen: 271 seen.add(rev) 272 yield rev 273 break 274 275 276class subsetparentswalker(object): 277 r"""Scan adjacent ancestors in the graph given by the subset 278 279 This computes parent-child relations in the sub graph filtered by 280 a revset. Primary use case is to draw a revisions graph. 281 282 In the following example, we consider that the node 'f' has edges to all 283 ancestor nodes, but redundant paths are eliminated. The edge 'f'->'b' 284 is eliminated because there is a path 'f'->'c'->'b' for example. 285 286 - d - e - 287 / \ 288 a - b - c - f 289 290 If the node 'c' is filtered out, the edge 'f'->'b' is activated. 291 292 - d - e - 293 / \ 294 a - b -(c)- f 295 296 Likewise, if 'd' and 'e' are filtered out, this edge is fully eliminated 297 since there is a path 'f'->'c'->'b'->'a' for 'f'->'a'. 298 299 (d) (e) 300 301 a - b - c - f 302 303 Implementation-wise, 'f' is passed down to 'a' as unresolved through the 304 'f'->'e'->'d'->'a' path, whereas we do also remember that 'f' has already 305 been resolved while walking down the 'f'->'c'->'b'->'a' path. When 306 processing the node 'a', the unresolved 'f'->'a' path is eliminated as 307 the 'f' end is marked as resolved. 308 309 Ancestors are searched from the tipmost revision in the subset so the 310 results can be cached. You should specify startrev to narrow the search 311 space to ':startrev'. 312 """ 313 314 def __init__(self, repo, subset, startrev=None): 315 if startrev is not None: 316 subset = repo.revs(b'%d:null', startrev) & subset 317 318 # equivalent to 'subset = subset.sorted(reverse=True)', but there's 319 # no such function. 320 fastdesc = subset.fastdesc 321 if fastdesc: 322 desciter = fastdesc() 323 else: 324 if not subset.isdescending() and not subset.istopo(): 325 subset = smartset.baseset(subset) 326 subset.sort(reverse=True) 327 desciter = iter(subset) 328 329 self._repo = repo 330 self._changelog = repo.changelog 331 self._subset = subset 332 333 # scanning state (see _scanparents): 334 self._tovisit = [] 335 self._pendingcnt = {} 336 self._pointers = {} 337 self._parents = {} 338 self._inputhead = nullrev # reassigned by self._advanceinput() 339 self._inputtail = desciter 340 self._bottomrev = nullrev 341 self._advanceinput() 342 343 def parentsset(self, rev): 344 """Look up parents of the given revision in the subset, and returns 345 as a smartset""" 346 return smartset.baseset(self.parents(rev)) 347 348 def parents(self, rev): 349 """Look up parents of the given revision in the subset 350 351 The returned revisions are sorted by parent index (p1/p2). 352 """ 353 self._scanparents(rev) 354 return [r for _c, r in sorted(self._parents.get(rev, []))] 355 356 def _parentrevs(self, rev): 357 try: 358 revs = self._changelog.parentrevs(rev) 359 if revs[-1] == nullrev: 360 return revs[:-1] 361 return revs 362 except error.WdirUnsupported: 363 return tuple(pctx.rev() for pctx in self._repo[None].parents()) 364 365 def _advanceinput(self): 366 """Advance the input iterator and set the next revision to _inputhead""" 367 if self._inputhead < nullrev: 368 return 369 try: 370 self._inputhead = next(self._inputtail) 371 except StopIteration: 372 self._bottomrev = self._inputhead 373 self._inputhead = nullrev - 1 374 375 def _scanparents(self, stoprev): 376 """Scan ancestors until the parents of the specified stoprev are 377 resolved""" 378 379 # 'tovisit' is the queue of the input revisions and their ancestors. 380 # It will be populated incrementally to minimize the initial cost 381 # of computing the given subset. 382 # 383 # For to-visit revisions, we keep track of 384 # - the number of the unresolved paths: pendingcnt[rev], 385 # - dict of the unresolved descendants and chains: pointers[rev][0], 386 # - set of the already resolved descendants: pointers[rev][1]. 387 # 388 # When a revision is visited, 'pointers[rev]' should be popped and 389 # propagated to its parents accordingly. 390 # 391 # Once all pending paths have been resolved, 'pendingcnt[rev]' becomes 392 # 0 and 'parents[rev]' contains the unsorted list of parent revisions 393 # and p1/p2 chains (excluding linear paths.) The p1/p2 chains will be 394 # used as a sort key preferring p1. 'len(chain)' should be the number 395 # of merges between two revisions. 396 397 subset = self._subset 398 tovisit = self._tovisit # heap queue of [-rev] 399 pendingcnt = self._pendingcnt # {rev: count} for visited revisions 400 pointers = self._pointers # {rev: [{unresolved_rev: chain}, resolved]} 401 parents = self._parents # {rev: [(chain, rev)]} 402 403 while tovisit or self._inputhead >= nullrev: 404 if pendingcnt.get(stoprev) == 0: 405 return 406 407 # feed greater revisions from input set to queue 408 if not tovisit: 409 heapq.heappush(tovisit, -self._inputhead) 410 self._advanceinput() 411 while self._inputhead >= -tovisit[0]: 412 heapq.heappush(tovisit, -self._inputhead) 413 self._advanceinput() 414 415 rev = -heapq.heappop(tovisit) 416 if rev < self._bottomrev: 417 return 418 if rev in pendingcnt and rev not in pointers: 419 continue # already visited 420 421 curactive = rev in subset 422 pendingcnt.setdefault(rev, 0) # mark as visited 423 if curactive: 424 assert rev not in parents 425 parents[rev] = [] 426 unresolved, resolved = pointers.pop(rev, ({}, set())) 427 428 if curactive: 429 # reached to active rev, resolve pending descendants' parents 430 for r, c in unresolved.items(): 431 pendingcnt[r] -= 1 432 assert pendingcnt[r] >= 0 433 if r in resolved: 434 continue # eliminate redundant path 435 parents[r].append((c, rev)) 436 # mark the descendant 'r' as resolved through this path if 437 # there are still pending pointers. the 'resolved' set may 438 # be concatenated later at a fork revision. 439 if pendingcnt[r] > 0: 440 resolved.add(r) 441 unresolved.clear() 442 # occasionally clean resolved markers. otherwise the set 443 # would grow indefinitely. 444 resolved = {r for r in resolved if pendingcnt[r] > 0} 445 446 parentrevs = self._parentrevs(rev) 447 bothparentsactive = all(p in subset for p in parentrevs) 448 449 # set up or propagate tracking pointers if 450 # - one of the parents is not active, 451 # - or descendants' parents are unresolved. 452 if not bothparentsactive or unresolved or resolved: 453 if len(parentrevs) <= 1: 454 # can avoid copying the tracking pointer 455 parentpointers = [(unresolved, resolved)] 456 else: 457 parentpointers = [ 458 (unresolved, resolved), 459 (unresolved.copy(), resolved.copy()), 460 ] 461 # 'rev' is a merge revision. increment the pending count 462 # as the 'unresolved' dict will be duplicated, and append 463 # p1/p2 code to the existing chains. 464 for r in unresolved: 465 pendingcnt[r] += 1 466 parentpointers[0][0][r] += b'1' 467 parentpointers[1][0][r] += b'2' 468 for i, p in enumerate(parentrevs): 469 assert p < rev 470 heapq.heappush(tovisit, -p) 471 if p in pointers: 472 # 'p' is a fork revision. concatenate tracking pointers 473 # and decrement the pending count accordingly. 474 knownunresolved, knownresolved = pointers[p] 475 unresolved, resolved = parentpointers[i] 476 for r, c in unresolved.items(): 477 if r in knownunresolved: 478 # unresolved at both paths 479 pendingcnt[r] -= 1 480 assert pendingcnt[r] > 0 481 # take shorter chain 482 knownunresolved[r] = min(c, knownunresolved[r]) 483 else: 484 knownunresolved[r] = c 485 # simply propagate the 'resolved' set as deduplicating 486 # 'unresolved' here would be slightly complicated. 487 knownresolved.update(resolved) 488 else: 489 pointers[p] = parentpointers[i] 490 491 # then, populate the active parents directly and add the current 492 # 'rev' to the tracking pointers of the inactive parents. 493 # 'pointers[p]' may be optimized out if both parents are active. 494 chaincodes = [b''] if len(parentrevs) <= 1 else [b'1', b'2'] 495 if curactive and bothparentsactive: 496 for i, p in enumerate(parentrevs): 497 c = chaincodes[i] 498 parents[rev].append((c, p)) 499 # no need to mark 'rev' as resolved since the 'rev' should 500 # be fully resolved (i.e. pendingcnt[rev] == 0) 501 assert pendingcnt[rev] == 0 502 elif curactive: 503 for i, p in enumerate(parentrevs): 504 unresolved, resolved = pointers[p] 505 assert rev not in unresolved 506 c = chaincodes[i] 507 if p in subset: 508 parents[rev].append((c, p)) 509 # mark 'rev' as resolved through this path 510 resolved.add(rev) 511 else: 512 pendingcnt[rev] += 1 513 unresolved[rev] = c 514 assert 0 < pendingcnt[rev] <= 2 515 516 517def _reachablerootspure(pfunc, minroot, roots, heads, includepath): 518 """See revlog.reachableroots""" 519 if not roots: 520 return [] 521 roots = set(roots) 522 visit = list(heads) 523 reachable = set() 524 seen = {} 525 # prefetch all the things! (because python is slow) 526 reached = reachable.add 527 dovisit = visit.append 528 nextvisit = visit.pop 529 # open-code the post-order traversal due to the tiny size of 530 # sys.getrecursionlimit() 531 while visit: 532 rev = nextvisit() 533 if rev in roots: 534 reached(rev) 535 if not includepath: 536 continue 537 parents = pfunc(rev) 538 seen[rev] = parents 539 for parent in parents: 540 if parent >= minroot and parent not in seen: 541 dovisit(parent) 542 if not reachable: 543 return baseset() 544 if not includepath: 545 return reachable 546 for rev in sorted(seen): 547 for parent in seen[rev]: 548 if parent in reachable: 549 reached(rev) 550 return reachable 551 552 553def reachableroots(repo, roots, heads, includepath=False): 554 """See revlog.reachableroots""" 555 if not roots: 556 return baseset() 557 minroot = roots.min() 558 roots = list(roots) 559 heads = list(heads) 560 revs = repo.changelog.reachableroots(minroot, heads, roots, includepath) 561 revs = baseset(revs) 562 revs.sort() 563 return revs 564 565 566def _changesrange(fctx1, fctx2, linerange2, diffopts): 567 """Return `(diffinrange, linerange1)` where `diffinrange` is True 568 if diff from fctx2 to fctx1 has changes in linerange2 and 569 `linerange1` is the new line range for fctx1. 570 """ 571 blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts) 572 filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2) 573 diffinrange = any(stype == b'!' for _, stype in filteredblocks) 574 return diffinrange, linerange1 575 576 577def blockancestors(fctx, fromline, toline, followfirst=False): 578 """Yield ancestors of `fctx` with respect to the block of lines within 579 `fromline`-`toline` range. 580 """ 581 diffopts = patch.diffopts(fctx._repo.ui) 582 fctx = fctx.introfilectx() 583 visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))} 584 while visit: 585 c, linerange2 = visit.pop(max(visit)) 586 pl = c.parents() 587 if followfirst: 588 pl = pl[:1] 589 if not pl: 590 # The block originates from the initial revision. 591 yield c, linerange2 592 continue 593 inrange = False 594 for p in pl: 595 inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts) 596 inrange = inrange or inrangep 597 if linerange1[0] == linerange1[1]: 598 # Parent's linerange is empty, meaning that the block got 599 # introduced in this revision; no need to go futher in this 600 # branch. 601 continue 602 # Set _descendantrev with 'c' (a known descendant) so that, when 603 # _adjustlinkrev is called for 'p', it receives this descendant 604 # (as srcrev) instead possibly topmost introrev. 605 p._descendantrev = c.rev() 606 visit[p.linkrev(), p.filenode()] = p, linerange1 607 if inrange: 608 yield c, linerange2 609 610 611def blockdescendants(fctx, fromline, toline): 612 """Yield descendants of `fctx` with respect to the block of lines within 613 `fromline`-`toline` range. 614 """ 615 # First possibly yield 'fctx' if it has changes in range with respect to 616 # its parents. 617 try: 618 c, linerange1 = next(blockancestors(fctx, fromline, toline)) 619 except StopIteration: 620 pass 621 else: 622 if c == fctx: 623 yield c, linerange1 624 625 diffopts = patch.diffopts(fctx._repo.ui) 626 fl = fctx.filelog() 627 seen = {fctx.filerev(): (fctx, (fromline, toline))} 628 for i in fl.descendants([fctx.filerev()]): 629 c = fctx.filectx(i) 630 inrange = False 631 for x in fl.parentrevs(i): 632 try: 633 p, linerange2 = seen[x] 634 except KeyError: 635 # nullrev or other branch 636 continue 637 inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts) 638 inrange = inrange or inrangep 639 # If revision 'i' has been seen (it's a merge) and the line range 640 # previously computed differs from the one we just got, we take the 641 # surrounding interval. This is conservative but avoids loosing 642 # information. 643 if i in seen and seen[i][1] != linerange1: 644 lbs, ubs = zip(linerange1, seen[i][1]) 645 linerange1 = min(lbs), max(ubs) 646 seen[i] = c, linerange1 647 if inrange: 648 yield c, linerange1 649 650 651@attr.s(slots=True, frozen=True) 652class annotateline(object): 653 fctx = attr.ib() 654 lineno = attr.ib() 655 # Whether this annotation was the result of a skip-annotate. 656 skip = attr.ib(default=False) 657 text = attr.ib(default=None) 658 659 660@attr.s(slots=True, frozen=True) 661class _annotatedfile(object): 662 # list indexed by lineno - 1 663 fctxs = attr.ib() 664 linenos = attr.ib() 665 skips = attr.ib() 666 # full file content 667 text = attr.ib() 668 669 670def _countlines(text): 671 if text.endswith(b"\n"): 672 return text.count(b"\n") 673 return text.count(b"\n") + int(bool(text)) 674 675 676def _decoratelines(text, fctx): 677 n = _countlines(text) 678 linenos = pycompat.rangelist(1, n + 1) 679 return _annotatedfile([fctx] * n, linenos, [False] * n, text) 680 681 682def _annotatepair(parents, childfctx, child, skipchild, diffopts): 683 r""" 684 Given parent and child fctxes and annotate data for parents, for all lines 685 in either parent that match the child, annotate the child with the parent's 686 data. 687 688 Additionally, if `skipchild` is True, replace all other lines with parent 689 annotate data as well such that child is never blamed for any lines. 690 691 See test-annotate.py for unit tests. 692 """ 693 pblocks = [ 694 (parent, mdiff.allblocks(parent.text, child.text, opts=diffopts)) 695 for parent in parents 696 ] 697 698 if skipchild: 699 # Need to iterate over the blocks twice -- make it a list 700 pblocks = [(p, list(blocks)) for (p, blocks) in pblocks] 701 # Mercurial currently prefers p2 over p1 for annotate. 702 # TODO: change this? 703 for parent, blocks in pblocks: 704 for (a1, a2, b1, b2), t in blocks: 705 # Changed blocks ('!') or blocks made only of blank lines ('~') 706 # belong to the child. 707 if t == b'=': 708 child.fctxs[b1:b2] = parent.fctxs[a1:a2] 709 child.linenos[b1:b2] = parent.linenos[a1:a2] 710 child.skips[b1:b2] = parent.skips[a1:a2] 711 712 if skipchild: 713 # Now try and match up anything that couldn't be matched, 714 # Reversing pblocks maintains bias towards p2, matching above 715 # behavior. 716 pblocks.reverse() 717 718 # The heuristics are: 719 # * Work on blocks of changed lines (effectively diff hunks with -U0). 720 # This could potentially be smarter but works well enough. 721 # * For a non-matching section, do a best-effort fit. Match lines in 722 # diff hunks 1:1, dropping lines as necessary. 723 # * Repeat the last line as a last resort. 724 725 # First, replace as much as possible without repeating the last line. 726 remaining = [(parent, []) for parent, _blocks in pblocks] 727 for idx, (parent, blocks) in enumerate(pblocks): 728 for (a1, a2, b1, b2), _t in blocks: 729 if a2 - a1 >= b2 - b1: 730 for bk in pycompat.xrange(b1, b2): 731 if child.fctxs[bk] == childfctx: 732 ak = min(a1 + (bk - b1), a2 - 1) 733 child.fctxs[bk] = parent.fctxs[ak] 734 child.linenos[bk] = parent.linenos[ak] 735 child.skips[bk] = True 736 else: 737 remaining[idx][1].append((a1, a2, b1, b2)) 738 739 # Then, look at anything left, which might involve repeating the last 740 # line. 741 for parent, blocks in remaining: 742 for a1, a2, b1, b2 in blocks: 743 for bk in pycompat.xrange(b1, b2): 744 if child.fctxs[bk] == childfctx: 745 ak = min(a1 + (bk - b1), a2 - 1) 746 child.fctxs[bk] = parent.fctxs[ak] 747 child.linenos[bk] = parent.linenos[ak] 748 child.skips[bk] = True 749 return child 750 751 752def annotate(base, parents, skiprevs=None, diffopts=None): 753 """Core algorithm for filectx.annotate() 754 755 `parents(fctx)` is a function returning a list of parent filectxs. 756 """ 757 758 # This algorithm would prefer to be recursive, but Python is a 759 # bit recursion-hostile. Instead we do an iterative 760 # depth-first search. 761 762 # 1st DFS pre-calculates pcache and needed 763 visit = [base] 764 pcache = {} 765 needed = {base: 1} 766 while visit: 767 f = visit.pop() 768 if f in pcache: 769 continue 770 pl = parents(f) 771 pcache[f] = pl 772 for p in pl: 773 needed[p] = needed.get(p, 0) + 1 774 if p not in pcache: 775 visit.append(p) 776 777 # 2nd DFS does the actual annotate 778 visit[:] = [base] 779 hist = {} 780 while visit: 781 f = visit[-1] 782 if f in hist: 783 visit.pop() 784 continue 785 786 ready = True 787 pl = pcache[f] 788 for p in pl: 789 if p not in hist: 790 ready = False 791 visit.append(p) 792 if ready: 793 visit.pop() 794 curr = _decoratelines(f.data(), f) 795 skipchild = False 796 if skiprevs is not None: 797 skipchild = f._changeid in skiprevs 798 curr = _annotatepair( 799 [hist[p] for p in pl], f, curr, skipchild, diffopts 800 ) 801 for p in pl: 802 if needed[p] == 1: 803 del hist[p] 804 del needed[p] 805 else: 806 needed[p] -= 1 807 808 hist[f] = curr 809 del pcache[f] 810 811 a = hist[base] 812 return [ 813 annotateline(*r) 814 for r in zip(a.fctxs, a.linenos, a.skips, mdiff.splitnewlines(a.text)) 815 ] 816 817 818def toposort(revs, parentsfunc, firstbranch=()): 819 """Yield revisions from heads to roots one (topo) branch at a time. 820 821 This function aims to be used by a graph generator that wishes to minimize 822 the number of parallel branches and their interleaving. 823 824 Example iteration order (numbers show the "true" order in a changelog): 825 826 o 4 827 | 828 o 1 829 | 830 | o 3 831 | | 832 | o 2 833 |/ 834 o 0 835 836 Note that the ancestors of merges are understood by the current 837 algorithm to be on the same branch. This means no reordering will 838 occur behind a merge. 839 """ 840 841 ### Quick summary of the algorithm 842 # 843 # This function is based around a "retention" principle. We keep revisions 844 # in memory until we are ready to emit a whole branch that immediately 845 # "merges" into an existing one. This reduces the number of parallel 846 # branches with interleaved revisions. 847 # 848 # During iteration revs are split into two groups: 849 # A) revision already emitted 850 # B) revision in "retention". They are stored as different subgroups. 851 # 852 # for each REV, we do the following logic: 853 # 854 # 1) if REV is a parent of (A), we will emit it. If there is a 855 # retention group ((B) above) that is blocked on REV being 856 # available, we emit all the revisions out of that retention 857 # group first. 858 # 859 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be 860 # available, if such subgroup exist, we add REV to it and the subgroup is 861 # now awaiting for REV.parents() to be available. 862 # 863 # 3) finally if no such group existed in (B), we create a new subgroup. 864 # 865 # 866 # To bootstrap the algorithm, we emit the tipmost revision (which 867 # puts it in group (A) from above). 868 869 revs.sort(reverse=True) 870 871 # Set of parents of revision that have been emitted. They can be considered 872 # unblocked as the graph generator is already aware of them so there is no 873 # need to delay the revisions that reference them. 874 # 875 # If someone wants to prioritize a branch over the others, pre-filling this 876 # set will force all other branches to wait until this branch is ready to be 877 # emitted. 878 unblocked = set(firstbranch) 879 880 # list of groups waiting to be displayed, each group is defined by: 881 # 882 # (revs: lists of revs waiting to be displayed, 883 # blocked: set of that cannot be displayed before those in 'revs') 884 # 885 # The second value ('blocked') correspond to parents of any revision in the 886 # group ('revs') that is not itself contained in the group. The main idea 887 # of this algorithm is to delay as much as possible the emission of any 888 # revision. This means waiting for the moment we are about to display 889 # these parents to display the revs in a group. 890 # 891 # This first implementation is smart until it encounters a merge: it will 892 # emit revs as soon as any parent is about to be emitted and can grow an 893 # arbitrary number of revs in 'blocked'. In practice this mean we properly 894 # retains new branches but gives up on any special ordering for ancestors 895 # of merges. The implementation can be improved to handle this better. 896 # 897 # The first subgroup is special. It corresponds to all the revision that 898 # were already emitted. The 'revs' lists is expected to be empty and the 899 # 'blocked' set contains the parents revisions of already emitted revision. 900 # 901 # You could pre-seed the <parents> set of groups[0] to a specific 902 # changesets to select what the first emitted branch should be. 903 groups = [([], unblocked)] 904 pendingheap = [] 905 pendingset = set() 906 907 heapq.heapify(pendingheap) 908 heappop = heapq.heappop 909 heappush = heapq.heappush 910 for currentrev in revs: 911 # Heap works with smallest element, we want highest so we invert 912 if currentrev not in pendingset: 913 heappush(pendingheap, -currentrev) 914 pendingset.add(currentrev) 915 # iterates on pending rev until after the current rev have been 916 # processed. 917 rev = None 918 while rev != currentrev: 919 rev = -heappop(pendingheap) 920 pendingset.remove(rev) 921 922 # Seek for a subgroup blocked, waiting for the current revision. 923 matching = [i for i, g in enumerate(groups) if rev in g[1]] 924 925 if matching: 926 # The main idea is to gather together all sets that are blocked 927 # on the same revision. 928 # 929 # Groups are merged when a common blocking ancestor is 930 # observed. For example, given two groups: 931 # 932 # revs [5, 4] waiting for 1 933 # revs [3, 2] waiting for 1 934 # 935 # These two groups will be merged when we process 936 # 1. In theory, we could have merged the groups when 937 # we added 2 to the group it is now in (we could have 938 # noticed the groups were both blocked on 1 then), but 939 # the way it works now makes the algorithm simpler. 940 # 941 # We also always keep the oldest subgroup first. We can 942 # probably improve the behavior by having the longest set 943 # first. That way, graph algorithms could minimise the length 944 # of parallel lines their drawing. This is currently not done. 945 targetidx = matching.pop(0) 946 trevs, tparents = groups[targetidx] 947 for i in matching: 948 gr = groups[i] 949 trevs.extend(gr[0]) 950 tparents |= gr[1] 951 # delete all merged subgroups (except the one we kept) 952 # (starting from the last subgroup for performance and 953 # sanity reasons) 954 for i in reversed(matching): 955 del groups[i] 956 else: 957 # This is a new head. We create a new subgroup for it. 958 targetidx = len(groups) 959 groups.append(([], {rev})) 960 961 gr = groups[targetidx] 962 963 # We now add the current nodes to this subgroups. This is done 964 # after the subgroup merging because all elements from a subgroup 965 # that relied on this rev must precede it. 966 # 967 # we also update the <parents> set to include the parents of the 968 # new nodes. 969 if rev == currentrev: # only display stuff in rev 970 gr[0].append(rev) 971 gr[1].remove(rev) 972 parents = [p for p in parentsfunc(rev) if p > nullrev] 973 gr[1].update(parents) 974 for p in parents: 975 if p not in pendingset: 976 pendingset.add(p) 977 heappush(pendingheap, -p) 978 979 # Look for a subgroup to display 980 # 981 # When unblocked is empty (if clause), we were not waiting for any 982 # revisions during the first iteration (if no priority was given) or 983 # if we emitted a whole disconnected set of the graph (reached a 984 # root). In that case we arbitrarily take the oldest known 985 # subgroup. The heuristic could probably be better. 986 # 987 # Otherwise (elif clause) if the subgroup is blocked on 988 # a revision we just emitted, we can safely emit it as 989 # well. 990 if not unblocked: 991 if len(groups) > 1: # display other subset 992 targetidx = 1 993 gr = groups[1] 994 elif not gr[1] & unblocked: 995 gr = None 996 997 if gr is not None: 998 # update the set of awaited revisions with the one from the 999 # subgroup 1000 unblocked |= gr[1] 1001 # output all revisions in the subgroup 1002 for r in gr[0]: 1003 yield r 1004 # delete the subgroup that you just output 1005 # unless it is groups[0] in which case you just empty it. 1006 if targetidx: 1007 del groups[targetidx] 1008 else: 1009 gr[0][:] = [] 1010 # Check if we have some subgroup waiting for revisions we are not going to 1011 # iterate over 1012 for g in groups: 1013 for r in g[0]: 1014 yield r 1015 1016 1017def headrevs(revs, parentsfn): 1018 """Resolve the set of heads from a set of revisions. 1019 1020 Receives an iterable of revision numbers and a callbable that receives a 1021 revision number and returns an iterable of parent revision numbers, possibly 1022 including nullrev. 1023 1024 Returns a set of revision numbers that are DAG heads within the passed 1025 subset. 1026 1027 ``nullrev`` is never included in the returned set, even if it is provided in 1028 the input set. 1029 """ 1030 headrevs = set(revs) 1031 parents = {nullrev} 1032 up = parents.update 1033 1034 for rev in revs: 1035 up(parentsfn(rev)) 1036 headrevs.difference_update(parents) 1037 return headrevs 1038 1039 1040def headrevssubset(revsfn, parentrevsfn, startrev=None, stoprevs=None): 1041 """Returns the set of all revs that have no children with control. 1042 1043 ``revsfn`` is a callable that with no arguments returns an iterator over 1044 all revision numbers in topological order. With a ``start`` argument, it 1045 returns revision numbers starting at that number. 1046 1047 ``parentrevsfn`` is a callable receiving a revision number and returns an 1048 iterable of parent revision numbers, where values can include nullrev. 1049 1050 ``startrev`` is a revision number at which to start the search. 1051 1052 ``stoprevs`` is an iterable of revision numbers that, when encountered, 1053 will stop DAG traversal beyond them. Parents of revisions in this 1054 collection will be heads. 1055 """ 1056 if startrev is None: 1057 startrev = nullrev 1058 1059 stoprevs = set(stoprevs or []) 1060 1061 reachable = {startrev} 1062 heads = {startrev} 1063 1064 for rev in revsfn(start=startrev + 1): 1065 for prev in parentrevsfn(rev): 1066 if prev in reachable: 1067 if rev not in stoprevs: 1068 reachable.add(rev) 1069 heads.add(rev) 1070 1071 if prev in heads and prev not in stoprevs: 1072 heads.remove(prev) 1073 1074 return heads 1075 1076 1077def linearize(revs, parentsfn): 1078 """Linearize and topologically sort a list of revisions. 1079 1080 The linearization process tries to create long runs of revs where a child 1081 rev comes immediately after its first parent. This is done by visiting the 1082 heads of the revs in inverse topological order, and for each visited rev, 1083 visiting its second parent, then its first parent, then adding the rev 1084 itself to the output list. 1085 1086 Returns a list of revision numbers. 1087 """ 1088 visit = list(sorted(headrevs(revs, parentsfn), reverse=True)) 1089 finished = set() 1090 result = [] 1091 1092 while visit: 1093 rev = visit.pop() 1094 if rev < 0: 1095 rev = -rev - 1 1096 1097 if rev not in finished: 1098 result.append(rev) 1099 finished.add(rev) 1100 1101 else: 1102 visit.append(-rev - 1) 1103 1104 for prev in parentsfn(rev): 1105 if prev == nullrev or prev not in revs or prev in finished: 1106 continue 1107 1108 visit.append(prev) 1109 1110 assert len(result) == len(revs) 1111 1112 return result 1113