1# hg.py - hg backend for convert extension
2#
3#  Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others
4#
5# This software may be used and distributed according to the terms of the
6# GNU General Public License version 2 or any later version.
7
8# Notes for hg->hg conversion:
9#
10# * Old versions of Mercurial didn't trim the whitespace from the ends
11#   of commit messages, but new versions do.  Changesets created by
12#   those older versions, then converted, may thus have different
13#   hashes for changesets that are otherwise identical.
14#
15# * Using "--config convert.hg.saverev=true" will make the source
16#   identifier to be stored in the converted revision. This will cause
17#   the converted revision to have a different identity than the
18#   source.
19from __future__ import absolute_import
20
21import os
22import re
23import time
24
25from mercurial.i18n import _
26from mercurial.pycompat import open
27from mercurial.node import (
28    bin,
29    hex,
30    sha1nodeconstants,
31)
32from mercurial import (
33    bookmarks,
34    context,
35    error,
36    exchange,
37    hg,
38    lock as lockmod,
39    logcmdutil,
40    merge as mergemod,
41    phases,
42    pycompat,
43    util,
44)
45from mercurial.utils import dateutil
46
47stringio = util.stringio
48
49from . import common
50
51mapfile = common.mapfile
52NoRepo = common.NoRepo
53
54sha1re = re.compile(br'\b[0-9a-f]{12,40}\b')
55
56
57class mercurial_sink(common.converter_sink):
58    def __init__(self, ui, repotype, path):
59        common.converter_sink.__init__(self, ui, repotype, path)
60        self.branchnames = ui.configbool(b'convert', b'hg.usebranchnames')
61        self.clonebranches = ui.configbool(b'convert', b'hg.clonebranches')
62        self.tagsbranch = ui.config(b'convert', b'hg.tagsbranch')
63        self.lastbranch = None
64        if os.path.isdir(path) and len(os.listdir(path)) > 0:
65            try:
66                self.repo = hg.repository(self.ui, path)
67                if not self.repo.local():
68                    raise NoRepo(
69                        _(b'%s is not a local Mercurial repository') % path
70                    )
71            except error.RepoError as err:
72                ui.traceback()
73                raise NoRepo(err.args[0])
74        else:
75            try:
76                ui.status(_(b'initializing destination %s repository\n') % path)
77                self.repo = hg.repository(self.ui, path, create=True)
78                if not self.repo.local():
79                    raise NoRepo(
80                        _(b'%s is not a local Mercurial repository') % path
81                    )
82                self.created.append(path)
83            except error.RepoError:
84                ui.traceback()
85                raise NoRepo(
86                    _(b"could not create hg repository %s as sink") % path
87                )
88        self.lock = None
89        self.wlock = None
90        self.filemapmode = False
91        self.subrevmaps = {}
92
93    def before(self):
94        self.ui.debug(b'run hg sink pre-conversion action\n')
95        self.wlock = self.repo.wlock()
96        self.lock = self.repo.lock()
97
98    def after(self):
99        self.ui.debug(b'run hg sink post-conversion action\n')
100        if self.lock:
101            self.lock.release()
102        if self.wlock:
103            self.wlock.release()
104
105    def revmapfile(self):
106        return self.repo.vfs.join(b"shamap")
107
108    def authorfile(self):
109        return self.repo.vfs.join(b"authormap")
110
111    def setbranch(self, branch, pbranches):
112        if not self.clonebranches:
113            return
114
115        setbranch = branch != self.lastbranch
116        self.lastbranch = branch
117        if not branch:
118            branch = b'default'
119        pbranches = [(b[0], b[1] and b[1] or b'default') for b in pbranches]
120
121        branchpath = os.path.join(self.path, branch)
122        if setbranch:
123            self.after()
124            try:
125                self.repo = hg.repository(self.ui, branchpath)
126            except Exception:
127                self.repo = hg.repository(self.ui, branchpath, create=True)
128            self.before()
129
130        # pbranches may bring revisions from other branches (merge parents)
131        # Make sure we have them, or pull them.
132        missings = {}
133        for b in pbranches:
134            try:
135                self.repo.lookup(b[0])
136            except Exception:
137                missings.setdefault(b[1], []).append(b[0])
138
139        if missings:
140            self.after()
141            for pbranch, heads in sorted(pycompat.iteritems(missings)):
142                pbranchpath = os.path.join(self.path, pbranch)
143                prepo = hg.peer(self.ui, {}, pbranchpath)
144                self.ui.note(
145                    _(b'pulling from %s into %s\n') % (pbranch, branch)
146                )
147                exchange.pull(
148                    self.repo, prepo, heads=[prepo.lookup(h) for h in heads]
149                )
150            self.before()
151
152    def _rewritetags(self, source, revmap, data):
153        fp = stringio()
154        for line in data.splitlines():
155            s = line.split(b' ', 1)
156            if len(s) != 2:
157                self.ui.warn(_(b'invalid tag entry: "%s"\n') % line)
158                fp.write(b'%s\n' % line)  # Bogus, but keep for hash stability
159                continue
160            revid = revmap.get(source.lookuprev(s[0]))
161            if not revid:
162                if s[0] == sha1nodeconstants.nullhex:
163                    revid = s[0]
164                else:
165                    # missing, but keep for hash stability
166                    self.ui.warn(_(b'missing tag entry: "%s"\n') % line)
167                    fp.write(b'%s\n' % line)
168                    continue
169            fp.write(b'%s %s\n' % (revid, s[1]))
170        return fp.getvalue()
171
172    def _rewritesubstate(self, source, data):
173        fp = stringio()
174        for line in data.splitlines():
175            s = line.split(b' ', 1)
176            if len(s) != 2:
177                continue
178
179            revid = s[0]
180            subpath = s[1]
181            if revid != sha1nodeconstants.nullhex:
182                revmap = self.subrevmaps.get(subpath)
183                if revmap is None:
184                    revmap = mapfile(
185                        self.ui, self.repo.wjoin(subpath, b'.hg/shamap')
186                    )
187                    self.subrevmaps[subpath] = revmap
188
189                    # It is reasonable that one or more of the subrepos don't
190                    # need to be converted, in which case they can be cloned
191                    # into place instead of converted.  Therefore, only warn
192                    # once.
193                    msg = _(b'no ".hgsubstate" updates will be made for "%s"\n')
194                    if len(revmap) == 0:
195                        sub = self.repo.wvfs.reljoin(subpath, b'.hg')
196
197                        if self.repo.wvfs.exists(sub):
198                            self.ui.warn(msg % subpath)
199
200                newid = revmap.get(revid)
201                if not newid:
202                    if len(revmap) > 0:
203                        self.ui.warn(
204                            _(b"%s is missing from %s/.hg/shamap\n")
205                            % (revid, subpath)
206                        )
207                else:
208                    revid = newid
209
210            fp.write(b'%s %s\n' % (revid, subpath))
211
212        return fp.getvalue()
213
214    def _calculatemergedfiles(self, source, p1ctx, p2ctx):
215        """Calculates the files from p2 that we need to pull in when merging p1
216        and p2, given that the merge is coming from the given source.
217
218        This prevents us from losing files that only exist in the target p2 and
219        that don't come from the source repo (like if you're merging multiple
220        repositories together).
221        """
222        anc = [p1ctx.ancestor(p2ctx)]
223        # Calculate what files are coming from p2
224        # TODO: mresult.commitinfo might be able to get that info
225        mresult = mergemod.calculateupdates(
226            self.repo,
227            p1ctx,
228            p2ctx,
229            anc,
230            branchmerge=True,
231            force=True,
232            acceptremote=False,
233            followcopies=False,
234        )
235
236        for file, (action, info, msg) in mresult.filemap():
237            if source.targetfilebelongstosource(file):
238                # If the file belongs to the source repo, ignore the p2
239                # since it will be covered by the existing fileset.
240                continue
241
242            # If the file requires actual merging, abort. We don't have enough
243            # context to resolve merges correctly.
244            if action in [b'm', b'dm', b'cd', b'dc']:
245                raise error.Abort(
246                    _(
247                        b"unable to convert merge commit "
248                        b"since target parents do not merge cleanly (file "
249                        b"%s, parents %s and %s)"
250                    )
251                    % (file, p1ctx, p2ctx)
252                )
253            elif action == b'k':
254                # 'keep' means nothing changed from p1
255                continue
256            else:
257                # Any other change means we want to take the p2 version
258                yield file
259
260    def putcommit(
261        self, files, copies, parents, commit, source, revmap, full, cleanp2
262    ):
263        files = dict(files)
264
265        def getfilectx(repo, memctx, f):
266            if p2ctx and f in p2files and f not in copies:
267                self.ui.debug(b'reusing %s from p2\n' % f)
268                try:
269                    return p2ctx[f]
270                except error.ManifestLookupError:
271                    # If the file doesn't exist in p2, then we're syncing a
272                    # delete, so just return None.
273                    return None
274            try:
275                v = files[f]
276            except KeyError:
277                return None
278            data, mode = source.getfile(f, v)
279            if data is None:
280                return None
281            if f == b'.hgtags':
282                data = self._rewritetags(source, revmap, data)
283            if f == b'.hgsubstate':
284                data = self._rewritesubstate(source, data)
285            return context.memfilectx(
286                self.repo,
287                memctx,
288                f,
289                data,
290                b'l' in mode,
291                b'x' in mode,
292                copies.get(f),
293            )
294
295        pl = []
296        for p in parents:
297            if p not in pl:
298                pl.append(p)
299        parents = pl
300        nparents = len(parents)
301        if self.filemapmode and nparents == 1:
302            m1node = self.repo.changelog.read(bin(parents[0]))[0]
303            parent = parents[0]
304
305        if len(parents) < 2:
306            parents.append(self.repo.nullid)
307        if len(parents) < 2:
308            parents.append(self.repo.nullid)
309        p2 = parents.pop(0)
310
311        text = commit.desc
312
313        sha1s = re.findall(sha1re, text)
314        for sha1 in sha1s:
315            oldrev = source.lookuprev(sha1)
316            newrev = revmap.get(oldrev)
317            if newrev is not None:
318                text = text.replace(sha1, newrev[: len(sha1)])
319
320        extra = commit.extra.copy()
321
322        sourcename = self.repo.ui.config(b'convert', b'hg.sourcename')
323        if sourcename:
324            extra[b'convert_source'] = sourcename
325
326        for label in (
327            b'source',
328            b'transplant_source',
329            b'rebase_source',
330            b'intermediate-source',
331        ):
332            node = extra.get(label)
333
334            if node is None:
335                continue
336
337            # Only transplant stores its reference in binary
338            if label == b'transplant_source':
339                node = hex(node)
340
341            newrev = revmap.get(node)
342            if newrev is not None:
343                if label == b'transplant_source':
344                    newrev = bin(newrev)
345
346                extra[label] = newrev
347
348        if self.branchnames and commit.branch:
349            extra[b'branch'] = commit.branch
350        if commit.rev and commit.saverev:
351            extra[b'convert_revision'] = commit.rev
352
353        while parents:
354            p1 = p2
355            p2 = parents.pop(0)
356            p1ctx = self.repo[p1]
357            p2ctx = None
358            if p2 != self.repo.nullid:
359                p2ctx = self.repo[p2]
360            fileset = set(files)
361            if full:
362                fileset.update(self.repo[p1])
363                fileset.update(self.repo[p2])
364
365            if p2ctx:
366                p2files = set(cleanp2)
367                for file in self._calculatemergedfiles(source, p1ctx, p2ctx):
368                    p2files.add(file)
369                    fileset.add(file)
370
371            ctx = context.memctx(
372                self.repo,
373                (p1, p2),
374                text,
375                fileset,
376                getfilectx,
377                commit.author,
378                commit.date,
379                extra,
380            )
381
382            # We won't know if the conversion changes the node until after the
383            # commit, so copy the source's phase for now.
384            self.repo.ui.setconfig(
385                b'phases',
386                b'new-commit',
387                phases.phasenames[commit.phase],
388                b'convert',
389            )
390
391            with self.repo.transaction(b"convert") as tr:
392                if self.repo.ui.config(b'convert', b'hg.preserve-hash'):
393                    origctx = commit.ctx
394                else:
395                    origctx = None
396                node = hex(self.repo.commitctx(ctx, origctx=origctx))
397
398                # If the node value has changed, but the phase is lower than
399                # draft, set it back to draft since it hasn't been exposed
400                # anywhere.
401                if commit.rev != node:
402                    ctx = self.repo[node]
403                    if ctx.phase() < phases.draft:
404                        phases.registernew(
405                            self.repo, tr, phases.draft, [ctx.rev()]
406                        )
407
408            text = b"(octopus merge fixup)\n"
409            p2 = node
410
411        if self.filemapmode and nparents == 1:
412            man = self.repo.manifestlog.getstorage(b'')
413            mnode = self.repo.changelog.read(bin(p2))[0]
414            closed = b'close' in commit.extra
415            if not closed and not man.cmp(m1node, man.revision(mnode)):
416                self.ui.status(_(b"filtering out empty revision\n"))
417                self.repo.rollback(force=True)
418                return parent
419        return p2
420
421    def puttags(self, tags):
422        tagparent = self.repo.branchtip(self.tagsbranch, ignoremissing=True)
423        tagparent = tagparent or self.repo.nullid
424
425        oldlines = set()
426        for branch, heads in pycompat.iteritems(self.repo.branchmap()):
427            for h in heads:
428                if b'.hgtags' in self.repo[h]:
429                    oldlines.update(
430                        set(self.repo[h][b'.hgtags'].data().splitlines(True))
431                    )
432        oldlines = sorted(list(oldlines))
433
434        newlines = sorted([(b"%s %s\n" % (tags[tag], tag)) for tag in tags])
435        if newlines == oldlines:
436            return None, None
437
438        # if the old and new tags match, then there is nothing to update
439        oldtags = set()
440        newtags = set()
441        for line in oldlines:
442            s = line.strip().split(b' ', 1)
443            if len(s) != 2:
444                continue
445            oldtags.add(s[1])
446        for line in newlines:
447            s = line.strip().split(b' ', 1)
448            if len(s) != 2:
449                continue
450            if s[1] not in oldtags:
451                newtags.add(s[1].strip())
452
453        if not newtags:
454            return None, None
455
456        data = b"".join(newlines)
457
458        def getfilectx(repo, memctx, f):
459            return context.memfilectx(repo, memctx, f, data, False, False, None)
460
461        self.ui.status(_(b"updating tags\n"))
462        date = b"%d 0" % int(time.mktime(time.gmtime()))
463        extra = {b'branch': self.tagsbranch}
464        ctx = context.memctx(
465            self.repo,
466            (tagparent, None),
467            b"update tags",
468            [b".hgtags"],
469            getfilectx,
470            b"convert-repo",
471            date,
472            extra,
473        )
474        node = self.repo.commitctx(ctx)
475        return hex(node), hex(tagparent)
476
477    def setfilemapmode(self, active):
478        self.filemapmode = active
479
480    def putbookmarks(self, updatedbookmark):
481        if not len(updatedbookmark):
482            return
483        wlock = lock = tr = None
484        try:
485            wlock = self.repo.wlock()
486            lock = self.repo.lock()
487            tr = self.repo.transaction(b'bookmark')
488            self.ui.status(_(b"updating bookmarks\n"))
489            destmarks = self.repo._bookmarks
490            changes = [
491                (bookmark, bin(updatedbookmark[bookmark]))
492                for bookmark in updatedbookmark
493            ]
494            destmarks.applychanges(self.repo, tr, changes)
495            tr.close()
496        finally:
497            lockmod.release(lock, wlock, tr)
498
499    def hascommitfrommap(self, rev):
500        # the exact semantics of clonebranches is unclear so we can't say no
501        return rev in self.repo or self.clonebranches
502
503    def hascommitforsplicemap(self, rev):
504        if rev not in self.repo and self.clonebranches:
505            raise error.Abort(
506                _(
507                    b'revision %s not found in destination '
508                    b'repository (lookups with clonebranches=true '
509                    b'are not implemented)'
510                )
511                % rev
512            )
513        return rev in self.repo
514
515
516class mercurial_source(common.converter_source):
517    def __init__(self, ui, repotype, path, revs=None):
518        common.converter_source.__init__(self, ui, repotype, path, revs)
519        self.ignoreerrors = ui.configbool(b'convert', b'hg.ignoreerrors')
520        self.ignored = set()
521        self.saverev = ui.configbool(b'convert', b'hg.saverev')
522        try:
523            self.repo = hg.repository(self.ui, path)
524            # try to provoke an exception if this isn't really a hg
525            # repo, but some other bogus compatible-looking url
526            if not self.repo.local():
527                raise error.RepoError
528        except error.RepoError:
529            ui.traceback()
530            raise NoRepo(_(b"%s is not a local Mercurial repository") % path)
531        self.lastrev = None
532        self.lastctx = None
533        self._changescache = None, None
534        self.convertfp = None
535        # Restrict converted revisions to startrev descendants
536        startnode = ui.config(b'convert', b'hg.startrev')
537        hgrevs = ui.config(b'convert', b'hg.revs')
538        if hgrevs is None:
539            if startnode is not None:
540                try:
541                    startnode = self.repo.lookup(startnode)
542                except error.RepoError:
543                    raise error.Abort(
544                        _(b'%s is not a valid start revision') % startnode
545                    )
546                startrev = self.repo.changelog.rev(startnode)
547                children = {startnode: 1}
548                for r in self.repo.changelog.descendants([startrev]):
549                    children[self.repo.changelog.node(r)] = 1
550                self.keep = children.__contains__
551            else:
552                self.keep = util.always
553            if revs:
554                self._heads = [self.repo.lookup(r) for r in revs]
555            else:
556                self._heads = self.repo.heads()
557        else:
558            if revs or startnode is not None:
559                raise error.Abort(
560                    _(
561                        b'hg.revs cannot be combined with '
562                        b'hg.startrev or --rev'
563                    )
564                )
565            nodes = set()
566            parents = set()
567            for r in logcmdutil.revrange(self.repo, [hgrevs]):
568                ctx = self.repo[r]
569                nodes.add(ctx.node())
570                parents.update(p.node() for p in ctx.parents())
571            self.keep = nodes.__contains__
572            self._heads = nodes - parents
573
574    def _changectx(self, rev):
575        if self.lastrev != rev:
576            self.lastctx = self.repo[rev]
577            self.lastrev = rev
578        return self.lastctx
579
580    def _parents(self, ctx):
581        return [p for p in ctx.parents() if p and self.keep(p.node())]
582
583    def getheads(self):
584        return [hex(h) for h in self._heads if self.keep(h)]
585
586    def getfile(self, name, rev):
587        try:
588            fctx = self._changectx(rev)[name]
589            return fctx.data(), fctx.flags()
590        except error.LookupError:
591            return None, None
592
593    def _changedfiles(self, ctx1, ctx2):
594        ma, r = [], []
595        maappend = ma.append
596        rappend = r.append
597        d = ctx1.manifest().diff(ctx2.manifest())
598        for f, ((node1, flag1), (node2, flag2)) in pycompat.iteritems(d):
599            if node2 is None:
600                rappend(f)
601            else:
602                maappend(f)
603        return ma, r
604
605    def getchanges(self, rev, full):
606        ctx = self._changectx(rev)
607        parents = self._parents(ctx)
608        if full or not parents:
609            files = copyfiles = ctx.manifest()
610        if parents:
611            if self._changescache[0] == rev:
612                ma, r = self._changescache[1]
613            else:
614                ma, r = self._changedfiles(parents[0], ctx)
615            if not full:
616                files = ma + r
617            copyfiles = ma
618        # _getcopies() is also run for roots and before filtering so missing
619        # revlogs are detected early
620        copies = self._getcopies(ctx, parents, copyfiles)
621        cleanp2 = set()
622        if len(parents) == 2:
623            d = parents[1].manifest().diff(ctx.manifest(), clean=True)
624            for f, value in pycompat.iteritems(d):
625                if value is None:
626                    cleanp2.add(f)
627        changes = [(f, rev) for f in files if f not in self.ignored]
628        changes.sort()
629        return changes, copies, cleanp2
630
631    def _getcopies(self, ctx, parents, files):
632        copies = {}
633        for name in files:
634            if name in self.ignored:
635                continue
636            try:
637                copysource = ctx.filectx(name).copysource()
638                if copysource in self.ignored:
639                    continue
640                # Ignore copy sources not in parent revisions
641                if not any(copysource in p for p in parents):
642                    continue
643                copies[name] = copysource
644            except TypeError:
645                pass
646            except error.LookupError as e:
647                if not self.ignoreerrors:
648                    raise
649                self.ignored.add(name)
650                self.ui.warn(_(b'ignoring: %s\n') % e)
651        return copies
652
653    def getcommit(self, rev):
654        ctx = self._changectx(rev)
655        _parents = self._parents(ctx)
656        parents = [p.hex() for p in _parents]
657        optparents = [p.hex() for p in ctx.parents() if p and p not in _parents]
658        crev = rev
659
660        return common.commit(
661            author=ctx.user(),
662            date=dateutil.datestr(ctx.date(), b'%Y-%m-%d %H:%M:%S %1%2'),
663            desc=ctx.description(),
664            rev=crev,
665            parents=parents,
666            optparents=optparents,
667            branch=ctx.branch(),
668            extra=ctx.extra(),
669            sortkey=ctx.rev(),
670            saverev=self.saverev,
671            phase=ctx.phase(),
672            ctx=ctx,
673        )
674
675    def numcommits(self):
676        return len(self.repo)
677
678    def gettags(self):
679        # This will get written to .hgtags, filter non global tags out.
680        tags = [
681            t
682            for t in self.repo.tagslist()
683            if self.repo.tagtype(t[0]) == b'global'
684        ]
685        return {name: hex(node) for name, node in tags if self.keep(node)}
686
687    def getchangedfiles(self, rev, i):
688        ctx = self._changectx(rev)
689        parents = self._parents(ctx)
690        if not parents and i is None:
691            i = 0
692            ma, r = ctx.manifest().keys(), []
693        else:
694            i = i or 0
695            ma, r = self._changedfiles(parents[i], ctx)
696        ma, r = [[f for f in l if f not in self.ignored] for l in (ma, r)]
697
698        if i == 0:
699            self._changescache = (rev, (ma, r))
700
701        return ma + r
702
703    def converted(self, rev, destrev):
704        if self.convertfp is None:
705            self.convertfp = open(self.repo.vfs.join(b'shamap'), b'ab')
706        self.convertfp.write(util.tonativeeol(b'%s %s\n' % (destrev, rev)))
707        self.convertfp.flush()
708
709    def before(self):
710        self.ui.debug(b'run hg source pre-conversion action\n')
711
712    def after(self):
713        self.ui.debug(b'run hg source post-conversion action\n')
714
715    def hasnativeorder(self):
716        return True
717
718    def hasnativeclose(self):
719        return True
720
721    def lookuprev(self, rev):
722        try:
723            return hex(self.repo.lookup(rev))
724        except (error.RepoError, error.LookupError):
725            return None
726
727    def getbookmarks(self):
728        return bookmarks.listbookmarks(self.repo)
729
730    def checkrevformat(self, revstr, mapname=b'splicemap'):
731        """Mercurial, revision string is a 40 byte hex"""
732        self.checkhexformat(revstr, mapname)
733