1# hg.py - hg backend for convert extension 2# 3# Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others 4# 5# This software may be used and distributed according to the terms of the 6# GNU General Public License version 2 or any later version. 7 8# Notes for hg->hg conversion: 9# 10# * Old versions of Mercurial didn't trim the whitespace from the ends 11# of commit messages, but new versions do. Changesets created by 12# those older versions, then converted, may thus have different 13# hashes for changesets that are otherwise identical. 14# 15# * Using "--config convert.hg.saverev=true" will make the source 16# identifier to be stored in the converted revision. This will cause 17# the converted revision to have a different identity than the 18# source. 19from __future__ import absolute_import 20 21import os 22import re 23import time 24 25from mercurial.i18n import _ 26from mercurial.pycompat import open 27from mercurial.node import ( 28 bin, 29 hex, 30 sha1nodeconstants, 31) 32from mercurial import ( 33 bookmarks, 34 context, 35 error, 36 exchange, 37 hg, 38 lock as lockmod, 39 logcmdutil, 40 merge as mergemod, 41 phases, 42 pycompat, 43 util, 44) 45from mercurial.utils import dateutil 46 47stringio = util.stringio 48 49from . import common 50 51mapfile = common.mapfile 52NoRepo = common.NoRepo 53 54sha1re = re.compile(br'\b[0-9a-f]{12,40}\b') 55 56 57class mercurial_sink(common.converter_sink): 58 def __init__(self, ui, repotype, path): 59 common.converter_sink.__init__(self, ui, repotype, path) 60 self.branchnames = ui.configbool(b'convert', b'hg.usebranchnames') 61 self.clonebranches = ui.configbool(b'convert', b'hg.clonebranches') 62 self.tagsbranch = ui.config(b'convert', b'hg.tagsbranch') 63 self.lastbranch = None 64 if os.path.isdir(path) and len(os.listdir(path)) > 0: 65 try: 66 self.repo = hg.repository(self.ui, path) 67 if not self.repo.local(): 68 raise NoRepo( 69 _(b'%s is not a local Mercurial repository') % path 70 ) 71 except error.RepoError as err: 72 ui.traceback() 73 raise NoRepo(err.args[0]) 74 else: 75 try: 76 ui.status(_(b'initializing destination %s repository\n') % path) 77 self.repo = hg.repository(self.ui, path, create=True) 78 if not self.repo.local(): 79 raise NoRepo( 80 _(b'%s is not a local Mercurial repository') % path 81 ) 82 self.created.append(path) 83 except error.RepoError: 84 ui.traceback() 85 raise NoRepo( 86 _(b"could not create hg repository %s as sink") % path 87 ) 88 self.lock = None 89 self.wlock = None 90 self.filemapmode = False 91 self.subrevmaps = {} 92 93 def before(self): 94 self.ui.debug(b'run hg sink pre-conversion action\n') 95 self.wlock = self.repo.wlock() 96 self.lock = self.repo.lock() 97 98 def after(self): 99 self.ui.debug(b'run hg sink post-conversion action\n') 100 if self.lock: 101 self.lock.release() 102 if self.wlock: 103 self.wlock.release() 104 105 def revmapfile(self): 106 return self.repo.vfs.join(b"shamap") 107 108 def authorfile(self): 109 return self.repo.vfs.join(b"authormap") 110 111 def setbranch(self, branch, pbranches): 112 if not self.clonebranches: 113 return 114 115 setbranch = branch != self.lastbranch 116 self.lastbranch = branch 117 if not branch: 118 branch = b'default' 119 pbranches = [(b[0], b[1] and b[1] or b'default') for b in pbranches] 120 121 branchpath = os.path.join(self.path, branch) 122 if setbranch: 123 self.after() 124 try: 125 self.repo = hg.repository(self.ui, branchpath) 126 except Exception: 127 self.repo = hg.repository(self.ui, branchpath, create=True) 128 self.before() 129 130 # pbranches may bring revisions from other branches (merge parents) 131 # Make sure we have them, or pull them. 132 missings = {} 133 for b in pbranches: 134 try: 135 self.repo.lookup(b[0]) 136 except Exception: 137 missings.setdefault(b[1], []).append(b[0]) 138 139 if missings: 140 self.after() 141 for pbranch, heads in sorted(pycompat.iteritems(missings)): 142 pbranchpath = os.path.join(self.path, pbranch) 143 prepo = hg.peer(self.ui, {}, pbranchpath) 144 self.ui.note( 145 _(b'pulling from %s into %s\n') % (pbranch, branch) 146 ) 147 exchange.pull( 148 self.repo, prepo, heads=[prepo.lookup(h) for h in heads] 149 ) 150 self.before() 151 152 def _rewritetags(self, source, revmap, data): 153 fp = stringio() 154 for line in data.splitlines(): 155 s = line.split(b' ', 1) 156 if len(s) != 2: 157 self.ui.warn(_(b'invalid tag entry: "%s"\n') % line) 158 fp.write(b'%s\n' % line) # Bogus, but keep for hash stability 159 continue 160 revid = revmap.get(source.lookuprev(s[0])) 161 if not revid: 162 if s[0] == sha1nodeconstants.nullhex: 163 revid = s[0] 164 else: 165 # missing, but keep for hash stability 166 self.ui.warn(_(b'missing tag entry: "%s"\n') % line) 167 fp.write(b'%s\n' % line) 168 continue 169 fp.write(b'%s %s\n' % (revid, s[1])) 170 return fp.getvalue() 171 172 def _rewritesubstate(self, source, data): 173 fp = stringio() 174 for line in data.splitlines(): 175 s = line.split(b' ', 1) 176 if len(s) != 2: 177 continue 178 179 revid = s[0] 180 subpath = s[1] 181 if revid != sha1nodeconstants.nullhex: 182 revmap = self.subrevmaps.get(subpath) 183 if revmap is None: 184 revmap = mapfile( 185 self.ui, self.repo.wjoin(subpath, b'.hg/shamap') 186 ) 187 self.subrevmaps[subpath] = revmap 188 189 # It is reasonable that one or more of the subrepos don't 190 # need to be converted, in which case they can be cloned 191 # into place instead of converted. Therefore, only warn 192 # once. 193 msg = _(b'no ".hgsubstate" updates will be made for "%s"\n') 194 if len(revmap) == 0: 195 sub = self.repo.wvfs.reljoin(subpath, b'.hg') 196 197 if self.repo.wvfs.exists(sub): 198 self.ui.warn(msg % subpath) 199 200 newid = revmap.get(revid) 201 if not newid: 202 if len(revmap) > 0: 203 self.ui.warn( 204 _(b"%s is missing from %s/.hg/shamap\n") 205 % (revid, subpath) 206 ) 207 else: 208 revid = newid 209 210 fp.write(b'%s %s\n' % (revid, subpath)) 211 212 return fp.getvalue() 213 214 def _calculatemergedfiles(self, source, p1ctx, p2ctx): 215 """Calculates the files from p2 that we need to pull in when merging p1 216 and p2, given that the merge is coming from the given source. 217 218 This prevents us from losing files that only exist in the target p2 and 219 that don't come from the source repo (like if you're merging multiple 220 repositories together). 221 """ 222 anc = [p1ctx.ancestor(p2ctx)] 223 # Calculate what files are coming from p2 224 # TODO: mresult.commitinfo might be able to get that info 225 mresult = mergemod.calculateupdates( 226 self.repo, 227 p1ctx, 228 p2ctx, 229 anc, 230 branchmerge=True, 231 force=True, 232 acceptremote=False, 233 followcopies=False, 234 ) 235 236 for file, (action, info, msg) in mresult.filemap(): 237 if source.targetfilebelongstosource(file): 238 # If the file belongs to the source repo, ignore the p2 239 # since it will be covered by the existing fileset. 240 continue 241 242 # If the file requires actual merging, abort. We don't have enough 243 # context to resolve merges correctly. 244 if action in [b'm', b'dm', b'cd', b'dc']: 245 raise error.Abort( 246 _( 247 b"unable to convert merge commit " 248 b"since target parents do not merge cleanly (file " 249 b"%s, parents %s and %s)" 250 ) 251 % (file, p1ctx, p2ctx) 252 ) 253 elif action == b'k': 254 # 'keep' means nothing changed from p1 255 continue 256 else: 257 # Any other change means we want to take the p2 version 258 yield file 259 260 def putcommit( 261 self, files, copies, parents, commit, source, revmap, full, cleanp2 262 ): 263 files = dict(files) 264 265 def getfilectx(repo, memctx, f): 266 if p2ctx and f in p2files and f not in copies: 267 self.ui.debug(b'reusing %s from p2\n' % f) 268 try: 269 return p2ctx[f] 270 except error.ManifestLookupError: 271 # If the file doesn't exist in p2, then we're syncing a 272 # delete, so just return None. 273 return None 274 try: 275 v = files[f] 276 except KeyError: 277 return None 278 data, mode = source.getfile(f, v) 279 if data is None: 280 return None 281 if f == b'.hgtags': 282 data = self._rewritetags(source, revmap, data) 283 if f == b'.hgsubstate': 284 data = self._rewritesubstate(source, data) 285 return context.memfilectx( 286 self.repo, 287 memctx, 288 f, 289 data, 290 b'l' in mode, 291 b'x' in mode, 292 copies.get(f), 293 ) 294 295 pl = [] 296 for p in parents: 297 if p not in pl: 298 pl.append(p) 299 parents = pl 300 nparents = len(parents) 301 if self.filemapmode and nparents == 1: 302 m1node = self.repo.changelog.read(bin(parents[0]))[0] 303 parent = parents[0] 304 305 if len(parents) < 2: 306 parents.append(self.repo.nullid) 307 if len(parents) < 2: 308 parents.append(self.repo.nullid) 309 p2 = parents.pop(0) 310 311 text = commit.desc 312 313 sha1s = re.findall(sha1re, text) 314 for sha1 in sha1s: 315 oldrev = source.lookuprev(sha1) 316 newrev = revmap.get(oldrev) 317 if newrev is not None: 318 text = text.replace(sha1, newrev[: len(sha1)]) 319 320 extra = commit.extra.copy() 321 322 sourcename = self.repo.ui.config(b'convert', b'hg.sourcename') 323 if sourcename: 324 extra[b'convert_source'] = sourcename 325 326 for label in ( 327 b'source', 328 b'transplant_source', 329 b'rebase_source', 330 b'intermediate-source', 331 ): 332 node = extra.get(label) 333 334 if node is None: 335 continue 336 337 # Only transplant stores its reference in binary 338 if label == b'transplant_source': 339 node = hex(node) 340 341 newrev = revmap.get(node) 342 if newrev is not None: 343 if label == b'transplant_source': 344 newrev = bin(newrev) 345 346 extra[label] = newrev 347 348 if self.branchnames and commit.branch: 349 extra[b'branch'] = commit.branch 350 if commit.rev and commit.saverev: 351 extra[b'convert_revision'] = commit.rev 352 353 while parents: 354 p1 = p2 355 p2 = parents.pop(0) 356 p1ctx = self.repo[p1] 357 p2ctx = None 358 if p2 != self.repo.nullid: 359 p2ctx = self.repo[p2] 360 fileset = set(files) 361 if full: 362 fileset.update(self.repo[p1]) 363 fileset.update(self.repo[p2]) 364 365 if p2ctx: 366 p2files = set(cleanp2) 367 for file in self._calculatemergedfiles(source, p1ctx, p2ctx): 368 p2files.add(file) 369 fileset.add(file) 370 371 ctx = context.memctx( 372 self.repo, 373 (p1, p2), 374 text, 375 fileset, 376 getfilectx, 377 commit.author, 378 commit.date, 379 extra, 380 ) 381 382 # We won't know if the conversion changes the node until after the 383 # commit, so copy the source's phase for now. 384 self.repo.ui.setconfig( 385 b'phases', 386 b'new-commit', 387 phases.phasenames[commit.phase], 388 b'convert', 389 ) 390 391 with self.repo.transaction(b"convert") as tr: 392 if self.repo.ui.config(b'convert', b'hg.preserve-hash'): 393 origctx = commit.ctx 394 else: 395 origctx = None 396 node = hex(self.repo.commitctx(ctx, origctx=origctx)) 397 398 # If the node value has changed, but the phase is lower than 399 # draft, set it back to draft since it hasn't been exposed 400 # anywhere. 401 if commit.rev != node: 402 ctx = self.repo[node] 403 if ctx.phase() < phases.draft: 404 phases.registernew( 405 self.repo, tr, phases.draft, [ctx.rev()] 406 ) 407 408 text = b"(octopus merge fixup)\n" 409 p2 = node 410 411 if self.filemapmode and nparents == 1: 412 man = self.repo.manifestlog.getstorage(b'') 413 mnode = self.repo.changelog.read(bin(p2))[0] 414 closed = b'close' in commit.extra 415 if not closed and not man.cmp(m1node, man.revision(mnode)): 416 self.ui.status(_(b"filtering out empty revision\n")) 417 self.repo.rollback(force=True) 418 return parent 419 return p2 420 421 def puttags(self, tags): 422 tagparent = self.repo.branchtip(self.tagsbranch, ignoremissing=True) 423 tagparent = tagparent or self.repo.nullid 424 425 oldlines = set() 426 for branch, heads in pycompat.iteritems(self.repo.branchmap()): 427 for h in heads: 428 if b'.hgtags' in self.repo[h]: 429 oldlines.update( 430 set(self.repo[h][b'.hgtags'].data().splitlines(True)) 431 ) 432 oldlines = sorted(list(oldlines)) 433 434 newlines = sorted([(b"%s %s\n" % (tags[tag], tag)) for tag in tags]) 435 if newlines == oldlines: 436 return None, None 437 438 # if the old and new tags match, then there is nothing to update 439 oldtags = set() 440 newtags = set() 441 for line in oldlines: 442 s = line.strip().split(b' ', 1) 443 if len(s) != 2: 444 continue 445 oldtags.add(s[1]) 446 for line in newlines: 447 s = line.strip().split(b' ', 1) 448 if len(s) != 2: 449 continue 450 if s[1] not in oldtags: 451 newtags.add(s[1].strip()) 452 453 if not newtags: 454 return None, None 455 456 data = b"".join(newlines) 457 458 def getfilectx(repo, memctx, f): 459 return context.memfilectx(repo, memctx, f, data, False, False, None) 460 461 self.ui.status(_(b"updating tags\n")) 462 date = b"%d 0" % int(time.mktime(time.gmtime())) 463 extra = {b'branch': self.tagsbranch} 464 ctx = context.memctx( 465 self.repo, 466 (tagparent, None), 467 b"update tags", 468 [b".hgtags"], 469 getfilectx, 470 b"convert-repo", 471 date, 472 extra, 473 ) 474 node = self.repo.commitctx(ctx) 475 return hex(node), hex(tagparent) 476 477 def setfilemapmode(self, active): 478 self.filemapmode = active 479 480 def putbookmarks(self, updatedbookmark): 481 if not len(updatedbookmark): 482 return 483 wlock = lock = tr = None 484 try: 485 wlock = self.repo.wlock() 486 lock = self.repo.lock() 487 tr = self.repo.transaction(b'bookmark') 488 self.ui.status(_(b"updating bookmarks\n")) 489 destmarks = self.repo._bookmarks 490 changes = [ 491 (bookmark, bin(updatedbookmark[bookmark])) 492 for bookmark in updatedbookmark 493 ] 494 destmarks.applychanges(self.repo, tr, changes) 495 tr.close() 496 finally: 497 lockmod.release(lock, wlock, tr) 498 499 def hascommitfrommap(self, rev): 500 # the exact semantics of clonebranches is unclear so we can't say no 501 return rev in self.repo or self.clonebranches 502 503 def hascommitforsplicemap(self, rev): 504 if rev not in self.repo and self.clonebranches: 505 raise error.Abort( 506 _( 507 b'revision %s not found in destination ' 508 b'repository (lookups with clonebranches=true ' 509 b'are not implemented)' 510 ) 511 % rev 512 ) 513 return rev in self.repo 514 515 516class mercurial_source(common.converter_source): 517 def __init__(self, ui, repotype, path, revs=None): 518 common.converter_source.__init__(self, ui, repotype, path, revs) 519 self.ignoreerrors = ui.configbool(b'convert', b'hg.ignoreerrors') 520 self.ignored = set() 521 self.saverev = ui.configbool(b'convert', b'hg.saverev') 522 try: 523 self.repo = hg.repository(self.ui, path) 524 # try to provoke an exception if this isn't really a hg 525 # repo, but some other bogus compatible-looking url 526 if not self.repo.local(): 527 raise error.RepoError 528 except error.RepoError: 529 ui.traceback() 530 raise NoRepo(_(b"%s is not a local Mercurial repository") % path) 531 self.lastrev = None 532 self.lastctx = None 533 self._changescache = None, None 534 self.convertfp = None 535 # Restrict converted revisions to startrev descendants 536 startnode = ui.config(b'convert', b'hg.startrev') 537 hgrevs = ui.config(b'convert', b'hg.revs') 538 if hgrevs is None: 539 if startnode is not None: 540 try: 541 startnode = self.repo.lookup(startnode) 542 except error.RepoError: 543 raise error.Abort( 544 _(b'%s is not a valid start revision') % startnode 545 ) 546 startrev = self.repo.changelog.rev(startnode) 547 children = {startnode: 1} 548 for r in self.repo.changelog.descendants([startrev]): 549 children[self.repo.changelog.node(r)] = 1 550 self.keep = children.__contains__ 551 else: 552 self.keep = util.always 553 if revs: 554 self._heads = [self.repo.lookup(r) for r in revs] 555 else: 556 self._heads = self.repo.heads() 557 else: 558 if revs or startnode is not None: 559 raise error.Abort( 560 _( 561 b'hg.revs cannot be combined with ' 562 b'hg.startrev or --rev' 563 ) 564 ) 565 nodes = set() 566 parents = set() 567 for r in logcmdutil.revrange(self.repo, [hgrevs]): 568 ctx = self.repo[r] 569 nodes.add(ctx.node()) 570 parents.update(p.node() for p in ctx.parents()) 571 self.keep = nodes.__contains__ 572 self._heads = nodes - parents 573 574 def _changectx(self, rev): 575 if self.lastrev != rev: 576 self.lastctx = self.repo[rev] 577 self.lastrev = rev 578 return self.lastctx 579 580 def _parents(self, ctx): 581 return [p for p in ctx.parents() if p and self.keep(p.node())] 582 583 def getheads(self): 584 return [hex(h) for h in self._heads if self.keep(h)] 585 586 def getfile(self, name, rev): 587 try: 588 fctx = self._changectx(rev)[name] 589 return fctx.data(), fctx.flags() 590 except error.LookupError: 591 return None, None 592 593 def _changedfiles(self, ctx1, ctx2): 594 ma, r = [], [] 595 maappend = ma.append 596 rappend = r.append 597 d = ctx1.manifest().diff(ctx2.manifest()) 598 for f, ((node1, flag1), (node2, flag2)) in pycompat.iteritems(d): 599 if node2 is None: 600 rappend(f) 601 else: 602 maappend(f) 603 return ma, r 604 605 def getchanges(self, rev, full): 606 ctx = self._changectx(rev) 607 parents = self._parents(ctx) 608 if full or not parents: 609 files = copyfiles = ctx.manifest() 610 if parents: 611 if self._changescache[0] == rev: 612 ma, r = self._changescache[1] 613 else: 614 ma, r = self._changedfiles(parents[0], ctx) 615 if not full: 616 files = ma + r 617 copyfiles = ma 618 # _getcopies() is also run for roots and before filtering so missing 619 # revlogs are detected early 620 copies = self._getcopies(ctx, parents, copyfiles) 621 cleanp2 = set() 622 if len(parents) == 2: 623 d = parents[1].manifest().diff(ctx.manifest(), clean=True) 624 for f, value in pycompat.iteritems(d): 625 if value is None: 626 cleanp2.add(f) 627 changes = [(f, rev) for f in files if f not in self.ignored] 628 changes.sort() 629 return changes, copies, cleanp2 630 631 def _getcopies(self, ctx, parents, files): 632 copies = {} 633 for name in files: 634 if name in self.ignored: 635 continue 636 try: 637 copysource = ctx.filectx(name).copysource() 638 if copysource in self.ignored: 639 continue 640 # Ignore copy sources not in parent revisions 641 if not any(copysource in p for p in parents): 642 continue 643 copies[name] = copysource 644 except TypeError: 645 pass 646 except error.LookupError as e: 647 if not self.ignoreerrors: 648 raise 649 self.ignored.add(name) 650 self.ui.warn(_(b'ignoring: %s\n') % e) 651 return copies 652 653 def getcommit(self, rev): 654 ctx = self._changectx(rev) 655 _parents = self._parents(ctx) 656 parents = [p.hex() for p in _parents] 657 optparents = [p.hex() for p in ctx.parents() if p and p not in _parents] 658 crev = rev 659 660 return common.commit( 661 author=ctx.user(), 662 date=dateutil.datestr(ctx.date(), b'%Y-%m-%d %H:%M:%S %1%2'), 663 desc=ctx.description(), 664 rev=crev, 665 parents=parents, 666 optparents=optparents, 667 branch=ctx.branch(), 668 extra=ctx.extra(), 669 sortkey=ctx.rev(), 670 saverev=self.saverev, 671 phase=ctx.phase(), 672 ctx=ctx, 673 ) 674 675 def numcommits(self): 676 return len(self.repo) 677 678 def gettags(self): 679 # This will get written to .hgtags, filter non global tags out. 680 tags = [ 681 t 682 for t in self.repo.tagslist() 683 if self.repo.tagtype(t[0]) == b'global' 684 ] 685 return {name: hex(node) for name, node in tags if self.keep(node)} 686 687 def getchangedfiles(self, rev, i): 688 ctx = self._changectx(rev) 689 parents = self._parents(ctx) 690 if not parents and i is None: 691 i = 0 692 ma, r = ctx.manifest().keys(), [] 693 else: 694 i = i or 0 695 ma, r = self._changedfiles(parents[i], ctx) 696 ma, r = [[f for f in l if f not in self.ignored] for l in (ma, r)] 697 698 if i == 0: 699 self._changescache = (rev, (ma, r)) 700 701 return ma + r 702 703 def converted(self, rev, destrev): 704 if self.convertfp is None: 705 self.convertfp = open(self.repo.vfs.join(b'shamap'), b'ab') 706 self.convertfp.write(util.tonativeeol(b'%s %s\n' % (destrev, rev))) 707 self.convertfp.flush() 708 709 def before(self): 710 self.ui.debug(b'run hg source pre-conversion action\n') 711 712 def after(self): 713 self.ui.debug(b'run hg source post-conversion action\n') 714 715 def hasnativeorder(self): 716 return True 717 718 def hasnativeclose(self): 719 return True 720 721 def lookuprev(self, rev): 722 try: 723 return hex(self.repo.lookup(rev)) 724 except (error.RepoError, error.LookupError): 725 return None 726 727 def getbookmarks(self): 728 return bookmarks.listbookmarks(self.repo) 729 730 def checkrevformat(self, revstr, mapname=b'splicemap'): 731 """Mercurial, revision string is a 40 byte hex""" 732 self.checkhexformat(revstr, mapname) 733