1# Mercurial built-in replacement for cvsps. 2# 3# Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk> 4# 5# This software may be used and distributed according to the terms of the 6# GNU General Public License version 2 or any later version. 7from __future__ import absolute_import 8 9import functools 10import os 11import re 12 13from mercurial.i18n import _ 14from mercurial.pycompat import open 15from mercurial import ( 16 encoding, 17 error, 18 hook, 19 pycompat, 20 util, 21) 22from mercurial.utils import ( 23 dateutil, 24 procutil, 25 stringutil, 26) 27 28pickle = util.pickle 29 30 31class logentry(object): 32 """Class logentry has the following attributes: 33 .author - author name as CVS knows it 34 .branch - name of branch this revision is on 35 .branches - revision tuple of branches starting at this revision 36 .comment - commit message 37 .commitid - CVS commitid or None 38 .date - the commit date as a (time, tz) tuple 39 .dead - true if file revision is dead 40 .file - Name of file 41 .lines - a tuple (+lines, -lines) or None 42 .parent - Previous revision of this entry 43 .rcs - name of file as returned from CVS 44 .revision - revision number as tuple 45 .tags - list of tags on the file 46 .synthetic - is this a synthetic "file ... added on ..." revision? 47 .mergepoint - the branch that has been merged from (if present in 48 rlog output) or None 49 .branchpoints - the branches that start at the current entry or empty 50 """ 51 52 def __init__(self, **entries): 53 self.synthetic = False 54 self.__dict__.update(entries) 55 56 def __repr__(self): 57 items = ("%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__)) 58 return "%s(%s)" % (type(self).__name__, ", ".join(items)) 59 60 61class logerror(Exception): 62 pass 63 64 65def getrepopath(cvspath): 66 """Return the repository path from a CVS path. 67 68 >>> getrepopath(b'/foo/bar') 69 '/foo/bar' 70 >>> getrepopath(b'c:/foo/bar') 71 '/foo/bar' 72 >>> getrepopath(b':pserver:10/foo/bar') 73 '/foo/bar' 74 >>> getrepopath(b':pserver:10c:/foo/bar') 75 '/foo/bar' 76 >>> getrepopath(b':pserver:/foo/bar') 77 '/foo/bar' 78 >>> getrepopath(b':pserver:c:/foo/bar') 79 '/foo/bar' 80 >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar') 81 '/foo/bar' 82 >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar') 83 '/foo/bar' 84 >>> getrepopath(b'user@server/path/to/repository') 85 '/path/to/repository' 86 """ 87 # According to CVS manual, CVS paths are expressed like: 88 # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository 89 # 90 # CVSpath is splitted into parts and then position of the first occurrence 91 # of the '/' char after the '@' is located. The solution is the rest of the 92 # string after that '/' sign including it 93 94 parts = cvspath.split(b':') 95 atposition = parts[-1].find(b'@') 96 start = 0 97 98 if atposition != -1: 99 start = atposition 100 101 repopath = parts[-1][parts[-1].find(b'/', start) :] 102 return repopath 103 104 105def createlog(ui, directory=None, root=b"", rlog=True, cache=None): 106 '''Collect the CVS rlog''' 107 108 # Because we store many duplicate commit log messages, reusing strings 109 # saves a lot of memory and pickle storage space. 110 _scache = {} 111 112 def scache(s): 113 """return a shared version of a string""" 114 return _scache.setdefault(s, s) 115 116 ui.status(_(b'collecting CVS rlog\n')) 117 118 log = [] # list of logentry objects containing the CVS state 119 120 # patterns to match in CVS (r)log output, by state of use 121 re_00 = re.compile(b'RCS file: (.+)$') 122 re_01 = re.compile(b'cvs \\[r?log aborted\\]: (.+)$') 123 re_02 = re.compile(b'cvs (r?log|server): (.+)\n$') 124 re_03 = re.compile( 125 b"(Cannot access.+CVSROOT)|(can't create temporary directory.+)$" 126 ) 127 re_10 = re.compile(b'Working file: (.+)$') 128 re_20 = re.compile(b'symbolic names:') 129 re_30 = re.compile(b'\t(.+): ([\\d.]+)$') 130 re_31 = re.compile(b'----------------------------$') 131 re_32 = re.compile( 132 b'=======================================' 133 b'======================================$' 134 ) 135 re_50 = re.compile(br'revision ([\d.]+)(\s+locked by:\s+.+;)?$') 136 re_60 = re.compile( 137 br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);' 138 br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?' 139 br'(\s+commitid:\s+([^;]+);)?' 140 br'(.*mergepoint:\s+([^;]+);)?' 141 ) 142 re_70 = re.compile(b'branches: (.+);$') 143 144 file_added_re = re.compile(br'file [^/]+ was (initially )?added on branch') 145 146 prefix = b'' # leading path to strip of what we get from CVS 147 148 if directory is None: 149 # Current working directory 150 151 # Get the real directory in the repository 152 try: 153 with open(os.path.join(b'CVS', b'Repository'), b'rb') as f: 154 prefix = f.read().strip() 155 directory = prefix 156 if prefix == b".": 157 prefix = b"" 158 except IOError: 159 raise logerror(_(b'not a CVS sandbox')) 160 161 if prefix and not prefix.endswith(pycompat.ossep): 162 prefix += pycompat.ossep 163 164 # Use the Root file in the sandbox, if it exists 165 try: 166 root = open(os.path.join(b'CVS', b'Root'), b'rb').read().strip() 167 except IOError: 168 pass 169 170 if not root: 171 root = encoding.environ.get(b'CVSROOT', b'') 172 173 # read log cache if one exists 174 oldlog = [] 175 date = None 176 177 if cache: 178 cachedir = os.path.expanduser(b'~/.hg.cvsps') 179 if not os.path.exists(cachedir): 180 os.mkdir(cachedir) 181 182 # The cvsps cache pickle needs a uniquified name, based on the 183 # repository location. The address may have all sort of nasties 184 # in it, slashes, colons and such. So here we take just the 185 # alphanumeric characters, concatenated in a way that does not 186 # mix up the various components, so that 187 # :pserver:user@server:/path 188 # and 189 # /pserver/user/server/path 190 # are mapped to different cache file names. 191 cachefile = root.split(b":") + [directory, b"cache"] 192 cachefile = [b'-'.join(re.findall(br'\w+', s)) for s in cachefile if s] 193 cachefile = os.path.join( 194 cachedir, b'.'.join([s for s in cachefile if s]) 195 ) 196 197 if cache == b'update': 198 try: 199 ui.note(_(b'reading cvs log cache %s\n') % cachefile) 200 oldlog = pickle.load(open(cachefile, b'rb')) 201 for e in oldlog: 202 if not ( 203 util.safehasattr(e, b'branchpoints') 204 and util.safehasattr(e, b'commitid') 205 and util.safehasattr(e, b'mergepoint') 206 ): 207 ui.status(_(b'ignoring old cache\n')) 208 oldlog = [] 209 break 210 211 ui.note(_(b'cache has %d log entries\n') % len(oldlog)) 212 except Exception as e: 213 ui.note(_(b'error reading cache: %r\n') % e) 214 215 if oldlog: 216 date = oldlog[-1].date # last commit date as a (time,tz) tuple 217 date = dateutil.datestr(date, b'%Y/%m/%d %H:%M:%S %1%2') 218 219 # build the CVS commandline 220 cmd = [b'cvs', b'-q'] 221 if root: 222 cmd.append(b'-d%s' % root) 223 p = util.normpath(getrepopath(root)) 224 if not p.endswith(b'/'): 225 p += b'/' 226 if prefix: 227 # looks like normpath replaces "" by "." 228 prefix = p + util.normpath(prefix) 229 else: 230 prefix = p 231 cmd.append([b'log', b'rlog'][rlog]) 232 if date: 233 # no space between option and date string 234 cmd.append(b'-d>%s' % date) 235 cmd.append(directory) 236 237 # state machine begins here 238 tags = {} # dictionary of revisions on current file with their tags 239 branchmap = {} # mapping between branch names and revision numbers 240 rcsmap = {} 241 state = 0 242 store = False # set when a new record can be appended 243 244 cmd = [procutil.shellquote(arg) for arg in cmd] 245 ui.note(_(b"running %s\n") % (b' '.join(cmd))) 246 ui.debug(b"prefix=%r directory=%r root=%r\n" % (prefix, directory, root)) 247 248 pfp = procutil.popen(b' '.join(cmd), b'rb') 249 peek = util.fromnativeeol(pfp.readline()) 250 while True: 251 line = peek 252 if line == b'': 253 break 254 peek = util.fromnativeeol(pfp.readline()) 255 if line.endswith(b'\n'): 256 line = line[:-1] 257 # ui.debug('state=%d line=%r\n' % (state, line)) 258 259 if state == 0: 260 # initial state, consume input until we see 'RCS file' 261 match = re_00.match(line) 262 if match: 263 rcs = match.group(1) 264 tags = {} 265 if rlog: 266 filename = util.normpath(rcs[:-2]) 267 if filename.startswith(prefix): 268 filename = filename[len(prefix) :] 269 if filename.startswith(b'/'): 270 filename = filename[1:] 271 if filename.startswith(b'Attic/'): 272 filename = filename[6:] 273 else: 274 filename = filename.replace(b'/Attic/', b'/') 275 state = 2 276 continue 277 state = 1 278 continue 279 match = re_01.match(line) 280 if match: 281 raise logerror(match.group(1)) 282 match = re_02.match(line) 283 if match: 284 raise logerror(match.group(2)) 285 if re_03.match(line): 286 raise logerror(line) 287 288 elif state == 1: 289 # expect 'Working file' (only when using log instead of rlog) 290 match = re_10.match(line) 291 assert match, _(b'RCS file must be followed by working file') 292 filename = util.normpath(match.group(1)) 293 state = 2 294 295 elif state == 2: 296 # expect 'symbolic names' 297 if re_20.match(line): 298 branchmap = {} 299 state = 3 300 301 elif state == 3: 302 # read the symbolic names and store as tags 303 match = re_30.match(line) 304 if match: 305 rev = [int(x) for x in match.group(2).split(b'.')] 306 307 # Convert magic branch number to an odd-numbered one 308 revn = len(rev) 309 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0: 310 rev = rev[:-2] + rev[-1:] 311 rev = tuple(rev) 312 313 if rev not in tags: 314 tags[rev] = [] 315 tags[rev].append(match.group(1)) 316 branchmap[match.group(1)] = match.group(2) 317 318 elif re_31.match(line): 319 state = 5 320 elif re_32.match(line): 321 state = 0 322 323 elif state == 4: 324 # expecting '------' separator before first revision 325 if re_31.match(line): 326 state = 5 327 else: 328 assert not re_32.match(line), _( 329 b'must have at least some revisions' 330 ) 331 332 elif state == 5: 333 # expecting revision number and possibly (ignored) lock indication 334 # we create the logentry here from values stored in states 0 to 4, 335 # as this state is re-entered for subsequent revisions of a file. 336 match = re_50.match(line) 337 assert match, _(b'expected revision number') 338 e = logentry( 339 rcs=scache(rcs), 340 file=scache(filename), 341 revision=tuple([int(x) for x in match.group(1).split(b'.')]), 342 branches=[], 343 parent=None, 344 commitid=None, 345 mergepoint=None, 346 branchpoints=set(), 347 ) 348 349 state = 6 350 351 elif state == 6: 352 # expecting date, author, state, lines changed 353 match = re_60.match(line) 354 assert match, _(b'revision must be followed by date line') 355 d = match.group(1) 356 if d[2] == b'/': 357 # Y2K 358 d = b'19' + d 359 360 if len(d.split()) != 3: 361 # cvs log dates always in GMT 362 d = d + b' UTC' 363 e.date = dateutil.parsedate( 364 d, 365 [ 366 b'%y/%m/%d %H:%M:%S', 367 b'%Y/%m/%d %H:%M:%S', 368 b'%Y-%m-%d %H:%M:%S', 369 ], 370 ) 371 e.author = scache(match.group(2)) 372 e.dead = match.group(3).lower() == b'dead' 373 374 if match.group(5): 375 if match.group(6): 376 e.lines = (int(match.group(5)), int(match.group(6))) 377 else: 378 e.lines = (int(match.group(5)), 0) 379 elif match.group(6): 380 e.lines = (0, int(match.group(6))) 381 else: 382 e.lines = None 383 384 if match.group(7): # cvs 1.12 commitid 385 e.commitid = match.group(8) 386 387 if match.group(9): # cvsnt mergepoint 388 myrev = match.group(10).split(b'.') 389 if len(myrev) == 2: # head 390 e.mergepoint = b'HEAD' 391 else: 392 myrev = b'.'.join(myrev[:-2] + [b'0', myrev[-2]]) 393 branches = [b for b in branchmap if branchmap[b] == myrev] 394 assert len(branches) == 1, ( 395 b'unknown branch: %s' % e.mergepoint 396 ) 397 e.mergepoint = branches[0] 398 399 e.comment = [] 400 state = 7 401 402 elif state == 7: 403 # read the revision numbers of branches that start at this revision 404 # or store the commit log message otherwise 405 m = re_70.match(line) 406 if m: 407 e.branches = [ 408 tuple([int(y) for y in x.strip().split(b'.')]) 409 for x in m.group(1).split(b';') 410 ] 411 state = 8 412 elif re_31.match(line) and re_50.match(peek): 413 state = 5 414 store = True 415 elif re_32.match(line): 416 state = 0 417 store = True 418 else: 419 e.comment.append(line) 420 421 elif state == 8: 422 # store commit log message 423 if re_31.match(line): 424 cpeek = peek 425 if cpeek.endswith(b'\n'): 426 cpeek = cpeek[:-1] 427 if re_50.match(cpeek): 428 state = 5 429 store = True 430 else: 431 e.comment.append(line) 432 elif re_32.match(line): 433 state = 0 434 store = True 435 else: 436 e.comment.append(line) 437 438 # When a file is added on a branch B1, CVS creates a synthetic 439 # dead trunk revision 1.1 so that the branch has a root. 440 # Likewise, if you merge such a file to a later branch B2 (one 441 # that already existed when the file was added on B1), CVS 442 # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop 443 # these revisions now, but mark them synthetic so 444 # createchangeset() can take care of them. 445 if ( 446 store 447 and e.dead 448 and e.revision[-1] == 1 449 and len(e.comment) == 1 # 1.1 or 1.1.x.1 450 and file_added_re.match(e.comment[0]) 451 ): 452 ui.debug( 453 b'found synthetic revision in %s: %r\n' % (e.rcs, e.comment[0]) 454 ) 455 e.synthetic = True 456 457 if store: 458 # clean up the results and save in the log. 459 store = False 460 e.tags = sorted([scache(x) for x in tags.get(e.revision, [])]) 461 e.comment = scache(b'\n'.join(e.comment)) 462 463 revn = len(e.revision) 464 if revn > 3 and (revn % 2) == 0: 465 e.branch = tags.get(e.revision[:-1], [None])[0] 466 else: 467 e.branch = None 468 469 # find the branches starting from this revision 470 branchpoints = set() 471 for branch, revision in pycompat.iteritems(branchmap): 472 revparts = tuple([int(i) for i in revision.split(b'.')]) 473 if len(revparts) < 2: # bad tags 474 continue 475 if revparts[-2] == 0 and revparts[-1] % 2 == 0: 476 # normal branch 477 if revparts[:-2] == e.revision: 478 branchpoints.add(branch) 479 elif revparts == (1, 1, 1): # vendor branch 480 if revparts in e.branches: 481 branchpoints.add(branch) 482 e.branchpoints = branchpoints 483 484 log.append(e) 485 486 rcsmap[e.rcs.replace(b'/Attic/', b'/')] = e.rcs 487 488 if len(log) % 100 == 0: 489 ui.status( 490 stringutil.ellipsis(b'%d %s' % (len(log), e.file), 80) 491 + b'\n' 492 ) 493 494 log.sort(key=lambda x: (x.rcs, x.revision)) 495 496 # find parent revisions of individual files 497 versions = {} 498 for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)): 499 rcs = e.rcs.replace(b'/Attic/', b'/') 500 if rcs in rcsmap: 501 e.rcs = rcsmap[rcs] 502 branch = e.revision[:-1] 503 versions[(e.rcs, branch)] = e.revision 504 505 for e in log: 506 branch = e.revision[:-1] 507 p = versions.get((e.rcs, branch), None) 508 if p is None: 509 p = e.revision[:-2] 510 e.parent = p 511 versions[(e.rcs, branch)] = e.revision 512 513 # update the log cache 514 if cache: 515 if log: 516 # join up the old and new logs 517 log.sort(key=lambda x: x.date) 518 519 if oldlog and oldlog[-1].date >= log[0].date: 520 raise logerror( 521 _( 522 b'log cache overlaps with new log entries,' 523 b' re-run without cache.' 524 ) 525 ) 526 527 log = oldlog + log 528 529 # write the new cachefile 530 ui.note(_(b'writing cvs log cache %s\n') % cachefile) 531 pickle.dump(log, open(cachefile, b'wb')) 532 else: 533 log = oldlog 534 535 ui.status(_(b'%d log entries\n') % len(log)) 536 537 encodings = ui.configlist(b'convert', b'cvsps.logencoding') 538 if encodings: 539 540 def revstr(r): 541 # this is needed, because logentry.revision is a tuple of "int" 542 # (e.g. (1, 2) for "1.2") 543 return b'.'.join(pycompat.maplist(pycompat.bytestr, r)) 544 545 for entry in log: 546 comment = entry.comment 547 for e in encodings: 548 try: 549 entry.comment = comment.decode(pycompat.sysstr(e)).encode( 550 'utf-8' 551 ) 552 if ui.debugflag: 553 ui.debug( 554 b"transcoding by %s: %s of %s\n" 555 % (e, revstr(entry.revision), entry.file) 556 ) 557 break 558 except UnicodeDecodeError: 559 pass # try next encoding 560 except LookupError as inst: # unknown encoding, maybe 561 raise error.Abort( 562 pycompat.bytestr(inst), 563 hint=_( 564 b'check convert.cvsps.logencoding configuration' 565 ), 566 ) 567 else: 568 raise error.Abort( 569 _( 570 b"no encoding can transcode" 571 b" CVS log message for %s of %s" 572 ) 573 % (revstr(entry.revision), entry.file), 574 hint=_(b'check convert.cvsps.logencoding configuration'), 575 ) 576 577 hook.hook(ui, None, b"cvslog", True, log=log) 578 579 return log 580 581 582class changeset(object): 583 """Class changeset has the following attributes: 584 .id - integer identifying this changeset (list index) 585 .author - author name as CVS knows it 586 .branch - name of branch this changeset is on, or None 587 .comment - commit message 588 .commitid - CVS commitid or None 589 .date - the commit date as a (time,tz) tuple 590 .entries - list of logentry objects in this changeset 591 .parents - list of one or two parent changesets 592 .tags - list of tags on this changeset 593 .synthetic - from synthetic revision "file ... added on branch ..." 594 .mergepoint- the branch that has been merged from or None 595 .branchpoints- the branches that start at the current entry or empty 596 """ 597 598 def __init__(self, **entries): 599 self.id = None 600 self.synthetic = False 601 self.__dict__.update(entries) 602 603 def __repr__(self): 604 items = ( 605 b"%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__) 606 ) 607 return b"%s(%s)" % (type(self).__name__, b", ".join(items)) 608 609 610def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None): 611 '''Convert log into changesets.''' 612 613 ui.status(_(b'creating changesets\n')) 614 615 # try to order commitids by date 616 mindate = {} 617 for e in log: 618 if e.commitid: 619 if e.commitid not in mindate: 620 mindate[e.commitid] = e.date 621 else: 622 mindate[e.commitid] = min(e.date, mindate[e.commitid]) 623 624 # Merge changesets 625 log.sort( 626 key=lambda x: ( 627 mindate.get(x.commitid, (-1, 0)), 628 x.commitid or b'', 629 x.comment, 630 x.author, 631 x.branch or b'', 632 x.date, 633 x.branchpoints, 634 ) 635 ) 636 637 changesets = [] 638 files = set() 639 c = None 640 for i, e in enumerate(log): 641 642 # Check if log entry belongs to the current changeset or not. 643 644 # Since CVS is file-centric, two different file revisions with 645 # different branchpoints should be treated as belonging to two 646 # different changesets (and the ordering is important and not 647 # honoured by cvsps at this point). 648 # 649 # Consider the following case: 650 # foo 1.1 branchpoints: [MYBRANCH] 651 # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2] 652 # 653 # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a 654 # later version of foo may be in MYBRANCH2, so foo should be the 655 # first changeset and bar the next and MYBRANCH and MYBRANCH2 656 # should both start off of the bar changeset. No provisions are 657 # made to ensure that this is, in fact, what happens. 658 if not ( 659 c 660 and e.branchpoints == c.branchpoints 661 and ( # cvs commitids 662 (e.commitid is not None and e.commitid == c.commitid) 663 or ( # no commitids, use fuzzy commit detection 664 (e.commitid is None or c.commitid is None) 665 and e.comment == c.comment 666 and e.author == c.author 667 and e.branch == c.branch 668 and ( 669 (c.date[0] + c.date[1]) 670 <= (e.date[0] + e.date[1]) 671 <= (c.date[0] + c.date[1]) + fuzz 672 ) 673 and e.file not in files 674 ) 675 ) 676 ): 677 c = changeset( 678 comment=e.comment, 679 author=e.author, 680 branch=e.branch, 681 date=e.date, 682 entries=[], 683 mergepoint=e.mergepoint, 684 branchpoints=e.branchpoints, 685 commitid=e.commitid, 686 ) 687 changesets.append(c) 688 689 files = set() 690 if len(changesets) % 100 == 0: 691 t = b'%d %s' % (len(changesets), repr(e.comment)[1:-1]) 692 ui.status(stringutil.ellipsis(t, 80) + b'\n') 693 694 c.entries.append(e) 695 files.add(e.file) 696 c.date = e.date # changeset date is date of latest commit in it 697 698 # Mark synthetic changesets 699 700 for c in changesets: 701 # Synthetic revisions always get their own changeset, because 702 # the log message includes the filename. E.g. if you add file3 703 # and file4 on a branch, you get four log entries and three 704 # changesets: 705 # "File file3 was added on branch ..." (synthetic, 1 entry) 706 # "File file4 was added on branch ..." (synthetic, 1 entry) 707 # "Add file3 and file4 to fix ..." (real, 2 entries) 708 # Hence the check for 1 entry here. 709 c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic 710 711 # Sort files in each changeset 712 713 def entitycompare(l, r): 714 """Mimic cvsps sorting order""" 715 l = l.file.split(b'/') 716 r = r.file.split(b'/') 717 nl = len(l) 718 nr = len(r) 719 n = min(nl, nr) 720 for i in range(n): 721 if i + 1 == nl and nl < nr: 722 return -1 723 elif i + 1 == nr and nl > nr: 724 return +1 725 elif l[i] < r[i]: 726 return -1 727 elif l[i] > r[i]: 728 return +1 729 return 0 730 731 for c in changesets: 732 c.entries.sort(key=functools.cmp_to_key(entitycompare)) 733 734 # Sort changesets by date 735 736 odd = set() 737 738 def cscmp(l, r): 739 d = sum(l.date) - sum(r.date) 740 if d: 741 return d 742 743 # detect vendor branches and initial commits on a branch 744 le = {} 745 for e in l.entries: 746 le[e.rcs] = e.revision 747 re = {} 748 for e in r.entries: 749 re[e.rcs] = e.revision 750 751 d = 0 752 for e in l.entries: 753 if re.get(e.rcs, None) == e.parent: 754 assert not d 755 d = 1 756 break 757 758 for e in r.entries: 759 if le.get(e.rcs, None) == e.parent: 760 if d: 761 odd.add((l, r)) 762 d = -1 763 break 764 # By this point, the changesets are sufficiently compared that 765 # we don't really care about ordering. However, this leaves 766 # some race conditions in the tests, so we compare on the 767 # number of files modified, the files contained in each 768 # changeset, and the branchpoints in the change to ensure test 769 # output remains stable. 770 771 # recommended replacement for cmp from 772 # https://docs.python.org/3.0/whatsnew/3.0.html 773 c = lambda x, y: (x > y) - (x < y) 774 # Sort bigger changes first. 775 if not d: 776 d = c(len(l.entries), len(r.entries)) 777 # Try sorting by filename in the change. 778 if not d: 779 d = c([e.file for e in l.entries], [e.file for e in r.entries]) 780 # Try and put changes without a branch point before ones with 781 # a branch point. 782 if not d: 783 d = c(len(l.branchpoints), len(r.branchpoints)) 784 return d 785 786 changesets.sort(key=functools.cmp_to_key(cscmp)) 787 788 # Collect tags 789 790 globaltags = {} 791 for c in changesets: 792 for e in c.entries: 793 for tag in e.tags: 794 # remember which is the latest changeset to have this tag 795 globaltags[tag] = c 796 797 for c in changesets: 798 tags = set() 799 for e in c.entries: 800 tags.update(e.tags) 801 # remember tags only if this is the latest changeset to have it 802 c.tags = sorted(tag for tag in tags if globaltags[tag] is c) 803 804 # Find parent changesets, handle {{mergetobranch BRANCHNAME}} 805 # by inserting dummy changesets with two parents, and handle 806 # {{mergefrombranch BRANCHNAME}} by setting two parents. 807 808 if mergeto is None: 809 mergeto = br'{{mergetobranch ([-\w]+)}}' 810 if mergeto: 811 mergeto = re.compile(mergeto) 812 813 if mergefrom is None: 814 mergefrom = br'{{mergefrombranch ([-\w]+)}}' 815 if mergefrom: 816 mergefrom = re.compile(mergefrom) 817 818 versions = {} # changeset index where we saw any particular file version 819 branches = {} # changeset index where we saw a branch 820 n = len(changesets) 821 i = 0 822 while i < n: 823 c = changesets[i] 824 825 for f in c.entries: 826 versions[(f.rcs, f.revision)] = i 827 828 p = None 829 if c.branch in branches: 830 p = branches[c.branch] 831 else: 832 # first changeset on a new branch 833 # the parent is a changeset with the branch in its 834 # branchpoints such that it is the latest possible 835 # commit without any intervening, unrelated commits. 836 837 for candidate in pycompat.xrange(i): 838 if c.branch not in changesets[candidate].branchpoints: 839 if p is not None: 840 break 841 continue 842 p = candidate 843 844 c.parents = [] 845 if p is not None: 846 p = changesets[p] 847 848 # Ensure no changeset has a synthetic changeset as a parent. 849 while p.synthetic: 850 assert len(p.parents) <= 1, _( 851 b'synthetic changeset cannot have multiple parents' 852 ) 853 if p.parents: 854 p = p.parents[0] 855 else: 856 p = None 857 break 858 859 if p is not None: 860 c.parents.append(p) 861 862 if c.mergepoint: 863 if c.mergepoint == b'HEAD': 864 c.mergepoint = None 865 c.parents.append(changesets[branches[c.mergepoint]]) 866 867 if mergefrom: 868 m = mergefrom.search(c.comment) 869 if m: 870 m = m.group(1) 871 if m == b'HEAD': 872 m = None 873 try: 874 candidate = changesets[branches[m]] 875 except KeyError: 876 ui.warn( 877 _( 878 b"warning: CVS commit message references " 879 b"non-existent branch %r:\n%s\n" 880 ) 881 % (pycompat.bytestr(m), c.comment) 882 ) 883 if m in branches and c.branch != m and not candidate.synthetic: 884 c.parents.append(candidate) 885 886 if mergeto: 887 m = mergeto.search(c.comment) 888 if m: 889 if m.groups(): 890 m = m.group(1) 891 if m == b'HEAD': 892 m = None 893 else: 894 m = None # if no group found then merge to HEAD 895 if m in branches and c.branch != m: 896 # insert empty changeset for merge 897 cc = changeset( 898 author=c.author, 899 branch=m, 900 date=c.date, 901 comment=b'convert-repo: CVS merge from branch %s' 902 % c.branch, 903 entries=[], 904 tags=[], 905 parents=[changesets[branches[m]], c], 906 ) 907 changesets.insert(i + 1, cc) 908 branches[m] = i + 1 909 910 # adjust our loop counters now we have inserted a new entry 911 n += 1 912 i += 2 913 continue 914 915 branches[c.branch] = i 916 i += 1 917 918 # Drop synthetic changesets (safe now that we have ensured no other 919 # changesets can have them as parents). 920 i = 0 921 while i < len(changesets): 922 if changesets[i].synthetic: 923 del changesets[i] 924 else: 925 i += 1 926 927 # Number changesets 928 929 for i, c in enumerate(changesets): 930 c.id = i + 1 931 932 if odd: 933 for l, r in odd: 934 if l.id is not None and r.id is not None: 935 ui.warn( 936 _(b'changeset %d is both before and after %d\n') 937 % (l.id, r.id) 938 ) 939 940 ui.status(_(b'%d changeset entries\n') % len(changesets)) 941 942 hook.hook(ui, None, b"cvschangesets", True, changesets=changesets) 943 944 return changesets 945 946 947def debugcvsps(ui, *args, **opts): 948 """Read CVS rlog for current directory or named path in 949 repository, and convert the log to changesets based on matching 950 commit log entries and dates. 951 """ 952 opts = pycompat.byteskwargs(opts) 953 if opts[b"new_cache"]: 954 cache = b"write" 955 elif opts[b"update_cache"]: 956 cache = b"update" 957 else: 958 cache = None 959 960 revisions = opts[b"revisions"] 961 962 try: 963 if args: 964 log = [] 965 for d in args: 966 log += createlog(ui, d, root=opts[b"root"], cache=cache) 967 else: 968 log = createlog(ui, root=opts[b"root"], cache=cache) 969 except logerror as e: 970 ui.write(b"%r\n" % e) 971 return 972 973 changesets = createchangeset(ui, log, opts[b"fuzz"]) 974 del log 975 976 # Print changesets (optionally filtered) 977 978 off = len(revisions) 979 branches = {} # latest version number in each branch 980 ancestors = {} # parent branch 981 for cs in changesets: 982 983 if opts[b"ancestors"]: 984 if cs.branch not in branches and cs.parents and cs.parents[0].id: 985 ancestors[cs.branch] = ( 986 changesets[cs.parents[0].id - 1].branch, 987 cs.parents[0].id, 988 ) 989 branches[cs.branch] = cs.id 990 991 # limit by branches 992 if ( 993 opts[b"branches"] 994 and (cs.branch or b'HEAD') not in opts[b"branches"] 995 ): 996 continue 997 998 if not off: 999 # Note: trailing spaces on several lines here are needed to have 1000 # bug-for-bug compatibility with cvsps. 1001 ui.write(b'---------------------\n') 1002 ui.write((b'PatchSet %d \n' % cs.id)) 1003 ui.write( 1004 ( 1005 b'Date: %s\n' 1006 % dateutil.datestr(cs.date, b'%Y/%m/%d %H:%M:%S %1%2') 1007 ) 1008 ) 1009 ui.write((b'Author: %s\n' % cs.author)) 1010 ui.write((b'Branch: %s\n' % (cs.branch or b'HEAD'))) 1011 ui.write( 1012 ( 1013 b'Tag%s: %s \n' 1014 % ( 1015 [b'', b's'][len(cs.tags) > 1], 1016 b','.join(cs.tags) or b'(none)', 1017 ) 1018 ) 1019 ) 1020 if cs.branchpoints: 1021 ui.writenoi18n( 1022 b'Branchpoints: %s \n' % b', '.join(sorted(cs.branchpoints)) 1023 ) 1024 if opts[b"parents"] and cs.parents: 1025 if len(cs.parents) > 1: 1026 ui.write( 1027 ( 1028 b'Parents: %s\n' 1029 % (b','.join([(b"%d" % p.id) for p in cs.parents])) 1030 ) 1031 ) 1032 else: 1033 ui.write((b'Parent: %d\n' % cs.parents[0].id)) 1034 1035 if opts[b"ancestors"]: 1036 b = cs.branch 1037 r = [] 1038 while b: 1039 b, c = ancestors[b] 1040 r.append(b'%s:%d:%d' % (b or b"HEAD", c, branches[b])) 1041 if r: 1042 ui.write((b'Ancestors: %s\n' % (b','.join(r)))) 1043 1044 ui.writenoi18n(b'Log:\n') 1045 ui.write(b'%s\n\n' % cs.comment) 1046 ui.writenoi18n(b'Members: \n') 1047 for f in cs.entries: 1048 fn = f.file 1049 if fn.startswith(opts[b"prefix"]): 1050 fn = fn[len(opts[b"prefix"]) :] 1051 ui.write( 1052 b'\t%s:%s->%s%s \n' 1053 % ( 1054 fn, 1055 b'.'.join([b"%d" % x for x in f.parent]) or b'INITIAL', 1056 b'.'.join([(b"%d" % x) for x in f.revision]), 1057 [b'', b'(DEAD)'][f.dead], 1058 ) 1059 ) 1060 ui.write(b'\n') 1061 1062 # have we seen the start tag? 1063 if revisions and off: 1064 if revisions[0] == (b"%d" % cs.id) or revisions[0] in cs.tags: 1065 off = False 1066 1067 # see if we reached the end tag 1068 if len(revisions) > 1 and not off: 1069 if revisions[1] == (b"%d" % cs.id) or revisions[1] in cs.tags: 1070 break 1071