1# Mercurial built-in replacement for cvsps.
2#
3# Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
4#
5# This software may be used and distributed according to the terms of the
6# GNU General Public License version 2 or any later version.
7from __future__ import absolute_import
8
9import functools
10import os
11import re
12
13from mercurial.i18n import _
14from mercurial.pycompat import open
15from mercurial import (
16    encoding,
17    error,
18    hook,
19    pycompat,
20    util,
21)
22from mercurial.utils import (
23    dateutil,
24    procutil,
25    stringutil,
26)
27
28pickle = util.pickle
29
30
31class logentry(object):
32    """Class logentry has the following attributes:
33    .author    - author name as CVS knows it
34    .branch    - name of branch this revision is on
35    .branches  - revision tuple of branches starting at this revision
36    .comment   - commit message
37    .commitid  - CVS commitid or None
38    .date      - the commit date as a (time, tz) tuple
39    .dead      - true if file revision is dead
40    .file      - Name of file
41    .lines     - a tuple (+lines, -lines) or None
42    .parent    - Previous revision of this entry
43    .rcs       - name of file as returned from CVS
44    .revision  - revision number as tuple
45    .tags      - list of tags on the file
46    .synthetic - is this a synthetic "file ... added on ..." revision?
47    .mergepoint - the branch that has been merged from (if present in
48                  rlog output) or None
49    .branchpoints - the branches that start at the current entry or empty
50    """
51
52    def __init__(self, **entries):
53        self.synthetic = False
54        self.__dict__.update(entries)
55
56    def __repr__(self):
57        items = ("%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__))
58        return "%s(%s)" % (type(self).__name__, ", ".join(items))
59
60
61class logerror(Exception):
62    pass
63
64
65def getrepopath(cvspath):
66    """Return the repository path from a CVS path.
67
68    >>> getrepopath(b'/foo/bar')
69    '/foo/bar'
70    >>> getrepopath(b'c:/foo/bar')
71    '/foo/bar'
72    >>> getrepopath(b':pserver:10/foo/bar')
73    '/foo/bar'
74    >>> getrepopath(b':pserver:10c:/foo/bar')
75    '/foo/bar'
76    >>> getrepopath(b':pserver:/foo/bar')
77    '/foo/bar'
78    >>> getrepopath(b':pserver:c:/foo/bar')
79    '/foo/bar'
80    >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar')
81    '/foo/bar'
82    >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar')
83    '/foo/bar'
84    >>> getrepopath(b'user@server/path/to/repository')
85    '/path/to/repository'
86    """
87    # According to CVS manual, CVS paths are expressed like:
88    # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
89    #
90    # CVSpath is splitted into parts and then position of the first occurrence
91    # of the '/' char after the '@' is located. The solution is the rest of the
92    # string after that '/' sign including it
93
94    parts = cvspath.split(b':')
95    atposition = parts[-1].find(b'@')
96    start = 0
97
98    if atposition != -1:
99        start = atposition
100
101    repopath = parts[-1][parts[-1].find(b'/', start) :]
102    return repopath
103
104
105def createlog(ui, directory=None, root=b"", rlog=True, cache=None):
106    '''Collect the CVS rlog'''
107
108    # Because we store many duplicate commit log messages, reusing strings
109    # saves a lot of memory and pickle storage space.
110    _scache = {}
111
112    def scache(s):
113        """return a shared version of a string"""
114        return _scache.setdefault(s, s)
115
116    ui.status(_(b'collecting CVS rlog\n'))
117
118    log = []  # list of logentry objects containing the CVS state
119
120    # patterns to match in CVS (r)log output, by state of use
121    re_00 = re.compile(b'RCS file: (.+)$')
122    re_01 = re.compile(b'cvs \\[r?log aborted\\]: (.+)$')
123    re_02 = re.compile(b'cvs (r?log|server): (.+)\n$')
124    re_03 = re.compile(
125        b"(Cannot access.+CVSROOT)|(can't create temporary directory.+)$"
126    )
127    re_10 = re.compile(b'Working file: (.+)$')
128    re_20 = re.compile(b'symbolic names:')
129    re_30 = re.compile(b'\t(.+): ([\\d.]+)$')
130    re_31 = re.compile(b'----------------------------$')
131    re_32 = re.compile(
132        b'======================================='
133        b'======================================$'
134    )
135    re_50 = re.compile(br'revision ([\d.]+)(\s+locked by:\s+.+;)?$')
136    re_60 = re.compile(
137        br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
138        br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
139        br'(\s+commitid:\s+([^;]+);)?'
140        br'(.*mergepoint:\s+([^;]+);)?'
141    )
142    re_70 = re.compile(b'branches: (.+);$')
143
144    file_added_re = re.compile(br'file [^/]+ was (initially )?added on branch')
145
146    prefix = b''  # leading path to strip of what we get from CVS
147
148    if directory is None:
149        # Current working directory
150
151        # Get the real directory in the repository
152        try:
153            with open(os.path.join(b'CVS', b'Repository'), b'rb') as f:
154                prefix = f.read().strip()
155            directory = prefix
156            if prefix == b".":
157                prefix = b""
158        except IOError:
159            raise logerror(_(b'not a CVS sandbox'))
160
161        if prefix and not prefix.endswith(pycompat.ossep):
162            prefix += pycompat.ossep
163
164        # Use the Root file in the sandbox, if it exists
165        try:
166            root = open(os.path.join(b'CVS', b'Root'), b'rb').read().strip()
167        except IOError:
168            pass
169
170    if not root:
171        root = encoding.environ.get(b'CVSROOT', b'')
172
173    # read log cache if one exists
174    oldlog = []
175    date = None
176
177    if cache:
178        cachedir = os.path.expanduser(b'~/.hg.cvsps')
179        if not os.path.exists(cachedir):
180            os.mkdir(cachedir)
181
182        # The cvsps cache pickle needs a uniquified name, based on the
183        # repository location. The address may have all sort of nasties
184        # in it, slashes, colons and such. So here we take just the
185        # alphanumeric characters, concatenated in a way that does not
186        # mix up the various components, so that
187        #    :pserver:user@server:/path
188        # and
189        #    /pserver/user/server/path
190        # are mapped to different cache file names.
191        cachefile = root.split(b":") + [directory, b"cache"]
192        cachefile = [b'-'.join(re.findall(br'\w+', s)) for s in cachefile if s]
193        cachefile = os.path.join(
194            cachedir, b'.'.join([s for s in cachefile if s])
195        )
196
197    if cache == b'update':
198        try:
199            ui.note(_(b'reading cvs log cache %s\n') % cachefile)
200            oldlog = pickle.load(open(cachefile, b'rb'))
201            for e in oldlog:
202                if not (
203                    util.safehasattr(e, b'branchpoints')
204                    and util.safehasattr(e, b'commitid')
205                    and util.safehasattr(e, b'mergepoint')
206                ):
207                    ui.status(_(b'ignoring old cache\n'))
208                    oldlog = []
209                    break
210
211            ui.note(_(b'cache has %d log entries\n') % len(oldlog))
212        except Exception as e:
213            ui.note(_(b'error reading cache: %r\n') % e)
214
215        if oldlog:
216            date = oldlog[-1].date  # last commit date as a (time,tz) tuple
217            date = dateutil.datestr(date, b'%Y/%m/%d %H:%M:%S %1%2')
218
219    # build the CVS commandline
220    cmd = [b'cvs', b'-q']
221    if root:
222        cmd.append(b'-d%s' % root)
223        p = util.normpath(getrepopath(root))
224        if not p.endswith(b'/'):
225            p += b'/'
226        if prefix:
227            # looks like normpath replaces "" by "."
228            prefix = p + util.normpath(prefix)
229        else:
230            prefix = p
231    cmd.append([b'log', b'rlog'][rlog])
232    if date:
233        # no space between option and date string
234        cmd.append(b'-d>%s' % date)
235    cmd.append(directory)
236
237    # state machine begins here
238    tags = {}  # dictionary of revisions on current file with their tags
239    branchmap = {}  # mapping between branch names and revision numbers
240    rcsmap = {}
241    state = 0
242    store = False  # set when a new record can be appended
243
244    cmd = [procutil.shellquote(arg) for arg in cmd]
245    ui.note(_(b"running %s\n") % (b' '.join(cmd)))
246    ui.debug(b"prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
247
248    pfp = procutil.popen(b' '.join(cmd), b'rb')
249    peek = util.fromnativeeol(pfp.readline())
250    while True:
251        line = peek
252        if line == b'':
253            break
254        peek = util.fromnativeeol(pfp.readline())
255        if line.endswith(b'\n'):
256            line = line[:-1]
257        # ui.debug('state=%d line=%r\n' % (state, line))
258
259        if state == 0:
260            # initial state, consume input until we see 'RCS file'
261            match = re_00.match(line)
262            if match:
263                rcs = match.group(1)
264                tags = {}
265                if rlog:
266                    filename = util.normpath(rcs[:-2])
267                    if filename.startswith(prefix):
268                        filename = filename[len(prefix) :]
269                    if filename.startswith(b'/'):
270                        filename = filename[1:]
271                    if filename.startswith(b'Attic/'):
272                        filename = filename[6:]
273                    else:
274                        filename = filename.replace(b'/Attic/', b'/')
275                    state = 2
276                    continue
277                state = 1
278                continue
279            match = re_01.match(line)
280            if match:
281                raise logerror(match.group(1))
282            match = re_02.match(line)
283            if match:
284                raise logerror(match.group(2))
285            if re_03.match(line):
286                raise logerror(line)
287
288        elif state == 1:
289            # expect 'Working file' (only when using log instead of rlog)
290            match = re_10.match(line)
291            assert match, _(b'RCS file must be followed by working file')
292            filename = util.normpath(match.group(1))
293            state = 2
294
295        elif state == 2:
296            # expect 'symbolic names'
297            if re_20.match(line):
298                branchmap = {}
299                state = 3
300
301        elif state == 3:
302            # read the symbolic names and store as tags
303            match = re_30.match(line)
304            if match:
305                rev = [int(x) for x in match.group(2).split(b'.')]
306
307                # Convert magic branch number to an odd-numbered one
308                revn = len(rev)
309                if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
310                    rev = rev[:-2] + rev[-1:]
311                rev = tuple(rev)
312
313                if rev not in tags:
314                    tags[rev] = []
315                tags[rev].append(match.group(1))
316                branchmap[match.group(1)] = match.group(2)
317
318            elif re_31.match(line):
319                state = 5
320            elif re_32.match(line):
321                state = 0
322
323        elif state == 4:
324            # expecting '------' separator before first revision
325            if re_31.match(line):
326                state = 5
327            else:
328                assert not re_32.match(line), _(
329                    b'must have at least some revisions'
330                )
331
332        elif state == 5:
333            # expecting revision number and possibly (ignored) lock indication
334            # we create the logentry here from values stored in states 0 to 4,
335            # as this state is re-entered for subsequent revisions of a file.
336            match = re_50.match(line)
337            assert match, _(b'expected revision number')
338            e = logentry(
339                rcs=scache(rcs),
340                file=scache(filename),
341                revision=tuple([int(x) for x in match.group(1).split(b'.')]),
342                branches=[],
343                parent=None,
344                commitid=None,
345                mergepoint=None,
346                branchpoints=set(),
347            )
348
349            state = 6
350
351        elif state == 6:
352            # expecting date, author, state, lines changed
353            match = re_60.match(line)
354            assert match, _(b'revision must be followed by date line')
355            d = match.group(1)
356            if d[2] == b'/':
357                # Y2K
358                d = b'19' + d
359
360            if len(d.split()) != 3:
361                # cvs log dates always in GMT
362                d = d + b' UTC'
363            e.date = dateutil.parsedate(
364                d,
365                [
366                    b'%y/%m/%d %H:%M:%S',
367                    b'%Y/%m/%d %H:%M:%S',
368                    b'%Y-%m-%d %H:%M:%S',
369                ],
370            )
371            e.author = scache(match.group(2))
372            e.dead = match.group(3).lower() == b'dead'
373
374            if match.group(5):
375                if match.group(6):
376                    e.lines = (int(match.group(5)), int(match.group(6)))
377                else:
378                    e.lines = (int(match.group(5)), 0)
379            elif match.group(6):
380                e.lines = (0, int(match.group(6)))
381            else:
382                e.lines = None
383
384            if match.group(7):  # cvs 1.12 commitid
385                e.commitid = match.group(8)
386
387            if match.group(9):  # cvsnt mergepoint
388                myrev = match.group(10).split(b'.')
389                if len(myrev) == 2:  # head
390                    e.mergepoint = b'HEAD'
391                else:
392                    myrev = b'.'.join(myrev[:-2] + [b'0', myrev[-2]])
393                    branches = [b for b in branchmap if branchmap[b] == myrev]
394                    assert len(branches) == 1, (
395                        b'unknown branch: %s' % e.mergepoint
396                    )
397                    e.mergepoint = branches[0]
398
399            e.comment = []
400            state = 7
401
402        elif state == 7:
403            # read the revision numbers of branches that start at this revision
404            # or store the commit log message otherwise
405            m = re_70.match(line)
406            if m:
407                e.branches = [
408                    tuple([int(y) for y in x.strip().split(b'.')])
409                    for x in m.group(1).split(b';')
410                ]
411                state = 8
412            elif re_31.match(line) and re_50.match(peek):
413                state = 5
414                store = True
415            elif re_32.match(line):
416                state = 0
417                store = True
418            else:
419                e.comment.append(line)
420
421        elif state == 8:
422            # store commit log message
423            if re_31.match(line):
424                cpeek = peek
425                if cpeek.endswith(b'\n'):
426                    cpeek = cpeek[:-1]
427                if re_50.match(cpeek):
428                    state = 5
429                    store = True
430                else:
431                    e.comment.append(line)
432            elif re_32.match(line):
433                state = 0
434                store = True
435            else:
436                e.comment.append(line)
437
438        # When a file is added on a branch B1, CVS creates a synthetic
439        # dead trunk revision 1.1 so that the branch has a root.
440        # Likewise, if you merge such a file to a later branch B2 (one
441        # that already existed when the file was added on B1), CVS
442        # creates a synthetic dead revision 1.1.x.1 on B2.  Don't drop
443        # these revisions now, but mark them synthetic so
444        # createchangeset() can take care of them.
445        if (
446            store
447            and e.dead
448            and e.revision[-1] == 1
449            and len(e.comment) == 1  # 1.1 or 1.1.x.1
450            and file_added_re.match(e.comment[0])
451        ):
452            ui.debug(
453                b'found synthetic revision in %s: %r\n' % (e.rcs, e.comment[0])
454            )
455            e.synthetic = True
456
457        if store:
458            # clean up the results and save in the log.
459            store = False
460            e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
461            e.comment = scache(b'\n'.join(e.comment))
462
463            revn = len(e.revision)
464            if revn > 3 and (revn % 2) == 0:
465                e.branch = tags.get(e.revision[:-1], [None])[0]
466            else:
467                e.branch = None
468
469            # find the branches starting from this revision
470            branchpoints = set()
471            for branch, revision in pycompat.iteritems(branchmap):
472                revparts = tuple([int(i) for i in revision.split(b'.')])
473                if len(revparts) < 2:  # bad tags
474                    continue
475                if revparts[-2] == 0 and revparts[-1] % 2 == 0:
476                    # normal branch
477                    if revparts[:-2] == e.revision:
478                        branchpoints.add(branch)
479                elif revparts == (1, 1, 1):  # vendor branch
480                    if revparts in e.branches:
481                        branchpoints.add(branch)
482            e.branchpoints = branchpoints
483
484            log.append(e)
485
486            rcsmap[e.rcs.replace(b'/Attic/', b'/')] = e.rcs
487
488            if len(log) % 100 == 0:
489                ui.status(
490                    stringutil.ellipsis(b'%d %s' % (len(log), e.file), 80)
491                    + b'\n'
492                )
493
494    log.sort(key=lambda x: (x.rcs, x.revision))
495
496    # find parent revisions of individual files
497    versions = {}
498    for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)):
499        rcs = e.rcs.replace(b'/Attic/', b'/')
500        if rcs in rcsmap:
501            e.rcs = rcsmap[rcs]
502        branch = e.revision[:-1]
503        versions[(e.rcs, branch)] = e.revision
504
505    for e in log:
506        branch = e.revision[:-1]
507        p = versions.get((e.rcs, branch), None)
508        if p is None:
509            p = e.revision[:-2]
510        e.parent = p
511        versions[(e.rcs, branch)] = e.revision
512
513    # update the log cache
514    if cache:
515        if log:
516            # join up the old and new logs
517            log.sort(key=lambda x: x.date)
518
519            if oldlog and oldlog[-1].date >= log[0].date:
520                raise logerror(
521                    _(
522                        b'log cache overlaps with new log entries,'
523                        b' re-run without cache.'
524                    )
525                )
526
527            log = oldlog + log
528
529            # write the new cachefile
530            ui.note(_(b'writing cvs log cache %s\n') % cachefile)
531            pickle.dump(log, open(cachefile, b'wb'))
532        else:
533            log = oldlog
534
535    ui.status(_(b'%d log entries\n') % len(log))
536
537    encodings = ui.configlist(b'convert', b'cvsps.logencoding')
538    if encodings:
539
540        def revstr(r):
541            # this is needed, because logentry.revision is a tuple of "int"
542            # (e.g. (1, 2) for "1.2")
543            return b'.'.join(pycompat.maplist(pycompat.bytestr, r))
544
545        for entry in log:
546            comment = entry.comment
547            for e in encodings:
548                try:
549                    entry.comment = comment.decode(pycompat.sysstr(e)).encode(
550                        'utf-8'
551                    )
552                    if ui.debugflag:
553                        ui.debug(
554                            b"transcoding by %s: %s of %s\n"
555                            % (e, revstr(entry.revision), entry.file)
556                        )
557                    break
558                except UnicodeDecodeError:
559                    pass  # try next encoding
560                except LookupError as inst:  # unknown encoding, maybe
561                    raise error.Abort(
562                        pycompat.bytestr(inst),
563                        hint=_(
564                            b'check convert.cvsps.logencoding configuration'
565                        ),
566                    )
567            else:
568                raise error.Abort(
569                    _(
570                        b"no encoding can transcode"
571                        b" CVS log message for %s of %s"
572                    )
573                    % (revstr(entry.revision), entry.file),
574                    hint=_(b'check convert.cvsps.logencoding configuration'),
575                )
576
577    hook.hook(ui, None, b"cvslog", True, log=log)
578
579    return log
580
581
582class changeset(object):
583    """Class changeset has the following attributes:
584    .id        - integer identifying this changeset (list index)
585    .author    - author name as CVS knows it
586    .branch    - name of branch this changeset is on, or None
587    .comment   - commit message
588    .commitid  - CVS commitid or None
589    .date      - the commit date as a (time,tz) tuple
590    .entries   - list of logentry objects in this changeset
591    .parents   - list of one or two parent changesets
592    .tags      - list of tags on this changeset
593    .synthetic - from synthetic revision "file ... added on branch ..."
594    .mergepoint- the branch that has been merged from or None
595    .branchpoints- the branches that start at the current entry or empty
596    """
597
598    def __init__(self, **entries):
599        self.id = None
600        self.synthetic = False
601        self.__dict__.update(entries)
602
603    def __repr__(self):
604        items = (
605            b"%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__)
606        )
607        return b"%s(%s)" % (type(self).__name__, b", ".join(items))
608
609
610def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
611    '''Convert log into changesets.'''
612
613    ui.status(_(b'creating changesets\n'))
614
615    # try to order commitids by date
616    mindate = {}
617    for e in log:
618        if e.commitid:
619            if e.commitid not in mindate:
620                mindate[e.commitid] = e.date
621            else:
622                mindate[e.commitid] = min(e.date, mindate[e.commitid])
623
624    # Merge changesets
625    log.sort(
626        key=lambda x: (
627            mindate.get(x.commitid, (-1, 0)),
628            x.commitid or b'',
629            x.comment,
630            x.author,
631            x.branch or b'',
632            x.date,
633            x.branchpoints,
634        )
635    )
636
637    changesets = []
638    files = set()
639    c = None
640    for i, e in enumerate(log):
641
642        # Check if log entry belongs to the current changeset or not.
643
644        # Since CVS is file-centric, two different file revisions with
645        # different branchpoints should be treated as belonging to two
646        # different changesets (and the ordering is important and not
647        # honoured by cvsps at this point).
648        #
649        # Consider the following case:
650        # foo 1.1 branchpoints: [MYBRANCH]
651        # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
652        #
653        # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
654        # later version of foo may be in MYBRANCH2, so foo should be the
655        # first changeset and bar the next and MYBRANCH and MYBRANCH2
656        # should both start off of the bar changeset. No provisions are
657        # made to ensure that this is, in fact, what happens.
658        if not (
659            c
660            and e.branchpoints == c.branchpoints
661            and (  # cvs commitids
662                (e.commitid is not None and e.commitid == c.commitid)
663                or (  # no commitids, use fuzzy commit detection
664                    (e.commitid is None or c.commitid is None)
665                    and e.comment == c.comment
666                    and e.author == c.author
667                    and e.branch == c.branch
668                    and (
669                        (c.date[0] + c.date[1])
670                        <= (e.date[0] + e.date[1])
671                        <= (c.date[0] + c.date[1]) + fuzz
672                    )
673                    and e.file not in files
674                )
675            )
676        ):
677            c = changeset(
678                comment=e.comment,
679                author=e.author,
680                branch=e.branch,
681                date=e.date,
682                entries=[],
683                mergepoint=e.mergepoint,
684                branchpoints=e.branchpoints,
685                commitid=e.commitid,
686            )
687            changesets.append(c)
688
689            files = set()
690            if len(changesets) % 100 == 0:
691                t = b'%d %s' % (len(changesets), repr(e.comment)[1:-1])
692                ui.status(stringutil.ellipsis(t, 80) + b'\n')
693
694        c.entries.append(e)
695        files.add(e.file)
696        c.date = e.date  # changeset date is date of latest commit in it
697
698    # Mark synthetic changesets
699
700    for c in changesets:
701        # Synthetic revisions always get their own changeset, because
702        # the log message includes the filename.  E.g. if you add file3
703        # and file4 on a branch, you get four log entries and three
704        # changesets:
705        #   "File file3 was added on branch ..." (synthetic, 1 entry)
706        #   "File file4 was added on branch ..." (synthetic, 1 entry)
707        #   "Add file3 and file4 to fix ..."     (real, 2 entries)
708        # Hence the check for 1 entry here.
709        c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
710
711    # Sort files in each changeset
712
713    def entitycompare(l, r):
714        """Mimic cvsps sorting order"""
715        l = l.file.split(b'/')
716        r = r.file.split(b'/')
717        nl = len(l)
718        nr = len(r)
719        n = min(nl, nr)
720        for i in range(n):
721            if i + 1 == nl and nl < nr:
722                return -1
723            elif i + 1 == nr and nl > nr:
724                return +1
725            elif l[i] < r[i]:
726                return -1
727            elif l[i] > r[i]:
728                return +1
729        return 0
730
731    for c in changesets:
732        c.entries.sort(key=functools.cmp_to_key(entitycompare))
733
734    # Sort changesets by date
735
736    odd = set()
737
738    def cscmp(l, r):
739        d = sum(l.date) - sum(r.date)
740        if d:
741            return d
742
743        # detect vendor branches and initial commits on a branch
744        le = {}
745        for e in l.entries:
746            le[e.rcs] = e.revision
747        re = {}
748        for e in r.entries:
749            re[e.rcs] = e.revision
750
751        d = 0
752        for e in l.entries:
753            if re.get(e.rcs, None) == e.parent:
754                assert not d
755                d = 1
756                break
757
758        for e in r.entries:
759            if le.get(e.rcs, None) == e.parent:
760                if d:
761                    odd.add((l, r))
762                d = -1
763                break
764        # By this point, the changesets are sufficiently compared that
765        # we don't really care about ordering. However, this leaves
766        # some race conditions in the tests, so we compare on the
767        # number of files modified, the files contained in each
768        # changeset, and the branchpoints in the change to ensure test
769        # output remains stable.
770
771        # recommended replacement for cmp from
772        # https://docs.python.org/3.0/whatsnew/3.0.html
773        c = lambda x, y: (x > y) - (x < y)
774        # Sort bigger changes first.
775        if not d:
776            d = c(len(l.entries), len(r.entries))
777        # Try sorting by filename in the change.
778        if not d:
779            d = c([e.file for e in l.entries], [e.file for e in r.entries])
780        # Try and put changes without a branch point before ones with
781        # a branch point.
782        if not d:
783            d = c(len(l.branchpoints), len(r.branchpoints))
784        return d
785
786    changesets.sort(key=functools.cmp_to_key(cscmp))
787
788    # Collect tags
789
790    globaltags = {}
791    for c in changesets:
792        for e in c.entries:
793            for tag in e.tags:
794                # remember which is the latest changeset to have this tag
795                globaltags[tag] = c
796
797    for c in changesets:
798        tags = set()
799        for e in c.entries:
800            tags.update(e.tags)
801        # remember tags only if this is the latest changeset to have it
802        c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
803
804    # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
805    # by inserting dummy changesets with two parents, and handle
806    # {{mergefrombranch BRANCHNAME}} by setting two parents.
807
808    if mergeto is None:
809        mergeto = br'{{mergetobranch ([-\w]+)}}'
810    if mergeto:
811        mergeto = re.compile(mergeto)
812
813    if mergefrom is None:
814        mergefrom = br'{{mergefrombranch ([-\w]+)}}'
815    if mergefrom:
816        mergefrom = re.compile(mergefrom)
817
818    versions = {}  # changeset index where we saw any particular file version
819    branches = {}  # changeset index where we saw a branch
820    n = len(changesets)
821    i = 0
822    while i < n:
823        c = changesets[i]
824
825        for f in c.entries:
826            versions[(f.rcs, f.revision)] = i
827
828        p = None
829        if c.branch in branches:
830            p = branches[c.branch]
831        else:
832            # first changeset on a new branch
833            # the parent is a changeset with the branch in its
834            # branchpoints such that it is the latest possible
835            # commit without any intervening, unrelated commits.
836
837            for candidate in pycompat.xrange(i):
838                if c.branch not in changesets[candidate].branchpoints:
839                    if p is not None:
840                        break
841                    continue
842                p = candidate
843
844        c.parents = []
845        if p is not None:
846            p = changesets[p]
847
848            # Ensure no changeset has a synthetic changeset as a parent.
849            while p.synthetic:
850                assert len(p.parents) <= 1, _(
851                    b'synthetic changeset cannot have multiple parents'
852                )
853                if p.parents:
854                    p = p.parents[0]
855                else:
856                    p = None
857                    break
858
859            if p is not None:
860                c.parents.append(p)
861
862        if c.mergepoint:
863            if c.mergepoint == b'HEAD':
864                c.mergepoint = None
865            c.parents.append(changesets[branches[c.mergepoint]])
866
867        if mergefrom:
868            m = mergefrom.search(c.comment)
869            if m:
870                m = m.group(1)
871                if m == b'HEAD':
872                    m = None
873                try:
874                    candidate = changesets[branches[m]]
875                except KeyError:
876                    ui.warn(
877                        _(
878                            b"warning: CVS commit message references "
879                            b"non-existent branch %r:\n%s\n"
880                        )
881                        % (pycompat.bytestr(m), c.comment)
882                    )
883                if m in branches and c.branch != m and not candidate.synthetic:
884                    c.parents.append(candidate)
885
886        if mergeto:
887            m = mergeto.search(c.comment)
888            if m:
889                if m.groups():
890                    m = m.group(1)
891                    if m == b'HEAD':
892                        m = None
893                else:
894                    m = None  # if no group found then merge to HEAD
895                if m in branches and c.branch != m:
896                    # insert empty changeset for merge
897                    cc = changeset(
898                        author=c.author,
899                        branch=m,
900                        date=c.date,
901                        comment=b'convert-repo: CVS merge from branch %s'
902                        % c.branch,
903                        entries=[],
904                        tags=[],
905                        parents=[changesets[branches[m]], c],
906                    )
907                    changesets.insert(i + 1, cc)
908                    branches[m] = i + 1
909
910                    # adjust our loop counters now we have inserted a new entry
911                    n += 1
912                    i += 2
913                    continue
914
915        branches[c.branch] = i
916        i += 1
917
918    # Drop synthetic changesets (safe now that we have ensured no other
919    # changesets can have them as parents).
920    i = 0
921    while i < len(changesets):
922        if changesets[i].synthetic:
923            del changesets[i]
924        else:
925            i += 1
926
927    # Number changesets
928
929    for i, c in enumerate(changesets):
930        c.id = i + 1
931
932    if odd:
933        for l, r in odd:
934            if l.id is not None and r.id is not None:
935                ui.warn(
936                    _(b'changeset %d is both before and after %d\n')
937                    % (l.id, r.id)
938                )
939
940    ui.status(_(b'%d changeset entries\n') % len(changesets))
941
942    hook.hook(ui, None, b"cvschangesets", True, changesets=changesets)
943
944    return changesets
945
946
947def debugcvsps(ui, *args, **opts):
948    """Read CVS rlog for current directory or named path in
949    repository, and convert the log to changesets based on matching
950    commit log entries and dates.
951    """
952    opts = pycompat.byteskwargs(opts)
953    if opts[b"new_cache"]:
954        cache = b"write"
955    elif opts[b"update_cache"]:
956        cache = b"update"
957    else:
958        cache = None
959
960    revisions = opts[b"revisions"]
961
962    try:
963        if args:
964            log = []
965            for d in args:
966                log += createlog(ui, d, root=opts[b"root"], cache=cache)
967        else:
968            log = createlog(ui, root=opts[b"root"], cache=cache)
969    except logerror as e:
970        ui.write(b"%r\n" % e)
971        return
972
973    changesets = createchangeset(ui, log, opts[b"fuzz"])
974    del log
975
976    # Print changesets (optionally filtered)
977
978    off = len(revisions)
979    branches = {}  # latest version number in each branch
980    ancestors = {}  # parent branch
981    for cs in changesets:
982
983        if opts[b"ancestors"]:
984            if cs.branch not in branches and cs.parents and cs.parents[0].id:
985                ancestors[cs.branch] = (
986                    changesets[cs.parents[0].id - 1].branch,
987                    cs.parents[0].id,
988                )
989            branches[cs.branch] = cs.id
990
991        # limit by branches
992        if (
993            opts[b"branches"]
994            and (cs.branch or b'HEAD') not in opts[b"branches"]
995        ):
996            continue
997
998        if not off:
999            # Note: trailing spaces on several lines here are needed to have
1000            #       bug-for-bug compatibility with cvsps.
1001            ui.write(b'---------------------\n')
1002            ui.write((b'PatchSet %d \n' % cs.id))
1003            ui.write(
1004                (
1005                    b'Date: %s\n'
1006                    % dateutil.datestr(cs.date, b'%Y/%m/%d %H:%M:%S %1%2')
1007                )
1008            )
1009            ui.write((b'Author: %s\n' % cs.author))
1010            ui.write((b'Branch: %s\n' % (cs.branch or b'HEAD')))
1011            ui.write(
1012                (
1013                    b'Tag%s: %s \n'
1014                    % (
1015                        [b'', b's'][len(cs.tags) > 1],
1016                        b','.join(cs.tags) or b'(none)',
1017                    )
1018                )
1019            )
1020            if cs.branchpoints:
1021                ui.writenoi18n(
1022                    b'Branchpoints: %s \n' % b', '.join(sorted(cs.branchpoints))
1023                )
1024            if opts[b"parents"] and cs.parents:
1025                if len(cs.parents) > 1:
1026                    ui.write(
1027                        (
1028                            b'Parents: %s\n'
1029                            % (b','.join([(b"%d" % p.id) for p in cs.parents]))
1030                        )
1031                    )
1032                else:
1033                    ui.write((b'Parent: %d\n' % cs.parents[0].id))
1034
1035            if opts[b"ancestors"]:
1036                b = cs.branch
1037                r = []
1038                while b:
1039                    b, c = ancestors[b]
1040                    r.append(b'%s:%d:%d' % (b or b"HEAD", c, branches[b]))
1041                if r:
1042                    ui.write((b'Ancestors: %s\n' % (b','.join(r))))
1043
1044            ui.writenoi18n(b'Log:\n')
1045            ui.write(b'%s\n\n' % cs.comment)
1046            ui.writenoi18n(b'Members: \n')
1047            for f in cs.entries:
1048                fn = f.file
1049                if fn.startswith(opts[b"prefix"]):
1050                    fn = fn[len(opts[b"prefix"]) :]
1051                ui.write(
1052                    b'\t%s:%s->%s%s \n'
1053                    % (
1054                        fn,
1055                        b'.'.join([b"%d" % x for x in f.parent]) or b'INITIAL',
1056                        b'.'.join([(b"%d" % x) for x in f.revision]),
1057                        [b'', b'(DEAD)'][f.dead],
1058                    )
1059                )
1060            ui.write(b'\n')
1061
1062        # have we seen the start tag?
1063        if revisions and off:
1064            if revisions[0] == (b"%d" % cs.id) or revisions[0] in cs.tags:
1065                off = False
1066
1067        # see if we reached the end tag
1068        if len(revisions) > 1 and not off:
1069            if revisions[1] == (b"%d" % cs.id) or revisions[1] in cs.tags:
1070                break
1071