1from __future__ import absolute_import, division, unicode_literals 2from binascii import hexlify, unhexlify 3try: 4 from itertools import izip as zip 5except ImportError: 6 pass 7import io 8import os 9import shutil 10import subprocess 11import sys 12try: 13 from urllib.parse import quote_from_bytes, unquote_to_bytes 14except ImportError: 15 from urllib import quote as quote_from_bytes 16 from urllib import unquote as unquote_to_bytes 17from collections import ( 18 OrderedDict, 19 defaultdict, 20) 21try: 22 from collections.abc import Sequence 23except ImportError: 24 from collections import Sequence 25try: 26 from urllib2 import URLError 27except ImportError: 28 from urllib.error import URLError 29try: 30 from urlparse import urlparse 31except ImportError: 32 from urllib.parse import urlparse 33from .exceptions import ( 34 Abort, 35 AmbiguousGraftAbort, 36 NothingToGraftException, 37 OldUpgradeAbort, 38 UpgradeAbort, 39) 40from .util import ( 41 HTTPReader, 42 Seekable, 43 byte_diff, 44 check_enabled, 45 one, 46 VersionedDict, 47) 48from .git import ( 49 EMPTY_BLOB, 50 EMPTY_TREE, 51 Git, 52 GitProcess, 53 NULL_NODE_ID, 54) 55from .hg.changegroup import ( 56 RawRevChunk, 57 RevDiff, 58) 59from .hg.objects import ( 60 Authorship, 61 Changeset, 62 File, 63 Manifest, 64) 65from .helper import GitHgHelper 66from .util import progress_iter 67from cinnabar import util 68from cinnabar.util import fsdecode 69 70import logging 71 72 73# An empty mercurial file with no parent has a fixed sha1 which is that of 74# "\0" * 40 (incidentally, this is the same as for an empty manifest with 75# no parent. 76HG_EMPTY_FILE = b'b80de5d138758541c5f05265ad144ab9fa86d1db' 77 78 79revchunk_log = logging.getLogger('revchunks') 80 81 82class FileFindParents(object): 83 logger = logging.getLogger('generated_file') 84 85 @staticmethod 86 def _invalid_if_new(file): 87 if file.node == NULL_NODE_ID: 88 raise Exception('Trying to create an invalid file. ' 89 'Please open an issue with details.') 90 91 @staticmethod 92 def set_parents(file, parent1=NULL_NODE_ID, parent2=NULL_NODE_ID): 93 # Remove null nodes 94 parents = tuple(p for p in (parent1, parent2) if p != NULL_NODE_ID) 95 orig_parents = parents 96 97 # On merges, a file with copy metadata has either no parent, or only 98 # one. In that latter case, the parent is always set as second parent. 99 # On non-merges, a file with copy metadata doesn't have a parent. 100 if file.metadata or file.content.startswith(b'\1\n'): 101 if len(parents) == 2: 102 FileFindParents._invalid_if_new(file) 103 elif len(parents) == 1: 104 parents = (NULL_NODE_ID, parents[0]) 105 elif len(parents) == 2: 106 if parents[0] == parents[1]: 107 parents = parents[:1] 108 109 file.parents = parents 110 if file.node != NULL_NODE_ID and file.node != file.sha1: 111 if parents != orig_parents: 112 if FileFindParents._try_parents(file, *orig_parents): 113 FileFindParents.logger.debug( 114 'Right parents given for %s, but they don\'t match ' 115 'what modern mercurial normally would do', file.node) 116 return 117 FileFindParents._set_parents_fallback(file, parent1, parent2) 118 119 @staticmethod 120 def _set_parents_fallback(file, parent1=NULL_NODE_ID, 121 parent2=NULL_NODE_ID): 122 result = ( # In some cases, only one parent is stored in a merge, 123 # because the other parent is actually an ancestor of the 124 # first one, but checking that is likely more expensive 125 # than to check if the sha1 matches with either parent. 126 FileFindParents._try_parents(file, parent1) or 127 FileFindParents._try_parents(file, parent2) or 128 # Some mercurial versions stores the first parent twice in 129 # merges. 130 FileFindParents._try_parents(file, parent1, parent1) or 131 # As last resort, try without any parents. 132 FileFindParents._try_parents(file)) 133 134 FileFindParents.logger.debug('Wrong parents given for %s', file.node) 135 FileFindParents.logger.debug(' Got: %s %s', parent1, parent2) 136 if result: 137 FileFindParents.logger.debug(' Expected: %s %s', file.parent1, 138 file.parent2) 139 140 # If none of the above worked, we failed big time 141 if not result: 142 raise Exception('Failed to create file. ' 143 'Please open an issue with details.') 144 145 @staticmethod 146 def _try_parents(file, *parents): 147 file.parents = parents 148 return file.node == file.sha1 149 150 151class ChangesetPatcher(bytes): 152 class ChangesetPatch(RawRevChunk): 153 __slots__ = ('patch', '_changeset') 154 155 class Patch(RevDiff): 156 class Part(object): 157 __slots__ = ('start', 'end', 'text_data') 158 159 def __init__(self, buf): 160 self._buf = buf 161 162 def __str__(self): 163 raise RuntimeError('Use to_str()') 164 165 def to_str(self): 166 return self._buf 167 168 def __iter__(self): 169 for line in self._buf.split(b'\0'): 170 if line: 171 part = self.Part() 172 start, end, text_data = line.split(b',') 173 part.start = int(start) 174 part.end = int(end) 175 part.text_data = unquote_to_bytes(text_data) 176 yield part 177 178 @classmethod 179 def from_items(cls, items): 180 return cls(b'\0'.join( 181 b','.join((b'%d,%d' % (start, end), 182 quote_from_bytes(text_data).encode('ascii'))) 183 for start, end, text_data in items)) 184 185 def __init__(self, changeset, patch_data): 186 self._changeset = changeset 187 self.patch = self.Patch(patch_data) 188 189 def __getattr__(self, name): 190 if name == 'delta_node': 191 name = 'node' 192 return getattr(self._changeset, name) 193 194 def apply(self, changeset): 195 # Sneaky way to create a copy of the changeset 196 chunk = self.ChangesetPatch(changeset, b'') 197 changeset = Changeset.from_chunk(chunk, changeset) 198 199 for k, v in (l.split(b' ', 1) for l in self.splitlines()): 200 if k == b'changeset': 201 changeset.node = v 202 elif k == b'manifest': 203 changeset.manifest = v 204 elif k == b'author': 205 changeset.author = v 206 elif k == b'extra': 207 extra = changeset.extra 208 changeset.extra = v 209 if extra is not None: 210 changeset.extra.update( 211 (k, v) for k, v in extra.items() 212 if k not in changeset.extra) 213 elif k == b'files': 214 changeset.files = v.split(b'\0') 215 elif k == b'patch': 216 chunk = self.ChangesetPatch(changeset, v) 217 changeset = Changeset.from_chunk(chunk, changeset) 218 219 # This should not occur in normal changeset bodies. If it occurs, 220 # it likely comes from our handling of conflicting commits. 221 # So in that case, adjust until we have the right sha1. 222 while changeset.body.endswith(b'\0') and \ 223 changeset.sha1 != changeset.node: 224 changeset.body = changeset.body[:-1] 225 226 return changeset 227 228 @classmethod 229 def from_diff(cls, changeset1, changeset2): 230 items = [] 231 if changeset1.node != changeset2.node: 232 items.append(b'changeset %s' % changeset2.node) 233 if changeset1.manifest != changeset2.manifest: 234 items.append(b'manifest %s' % changeset2.manifest) 235 if changeset1.author != changeset2.author: 236 items.append(b'author %s' % changeset2.author) 237 if changeset1.extra != changeset2.extra: 238 if changeset2.extra is not None: 239 items.append(b'extra %s' % Changeset.ExtraData({ 240 k: v 241 for k, v in changeset2.extra.items() 242 if not changeset1.extra or changeset1.extra.get(k) != v 243 }).to_str()) 244 if changeset1.files != changeset2.files: 245 items.append(b'files %s' % b'\0'.join(changeset2.files)) 246 247 this = cls(b'\n'.join(items)) 248 new = this.apply(changeset1) 249 if new.raw_data != changeset2.raw_data: 250 items.append(b'patch %s' % cls.ChangesetPatch.Patch.from_items( 251 byte_diff(new.raw_data, changeset2.raw_data)).to_str()) 252 this = cls(b'\n'.join(items)) 253 254 return this 255 256 257class Changeset(Changeset): 258 @classmethod 259 def from_git_commit(cls, git_commit): 260 if not isinstance(git_commit, GitCommit): 261 git_commit = GitCommit(git_commit) 262 263 changeset = cls() 264 265 (changeset.author, changeset.timestamp, changeset.utcoffset) = \ 266 Authorship.from_git_str(git_commit.author).to_hg() 267 268 if git_commit.committer != git_commit.author: 269 changeset.committer = Authorship.from_git_str( 270 git_commit.committer).to_hg_str() 271 272 changeset.body = git_commit.body 273 274 return changeset 275 276 277class GeneratedManifestInfo(Manifest): 278 __slots__ = ('delta_node', 'removed', 'modified') 279 280 def __init__(self, node): 281 super(GeneratedManifestInfo, self).__init__(node) 282 self.removed = set() 283 self.modified = {} 284 285 def add(self, path, sha1=None, attr=b'', modified=False): 286 super(GeneratedManifestInfo, self).add(path, sha1, attr) 287 if modified: 288 self.modified[path] = (sha1, attr) 289 290 291class TagSet(object): 292 def __init__(self): 293 self._tags = OrderedDict() 294 self._taghist = defaultdict(set) 295 296 def __setitem__(self, key, value): 297 old = self._tags.get(key) 298 if old: 299 self._taghist[key].add(old) 300 self._tags[key] = value 301 302 def __getitem__(self, key): 303 return self._tags[key] 304 305 def update(self, other): 306 if not other: 307 return 308 assert isinstance(other, TagSet) 309 for key, anode in util.iteritems(other._tags): 310 # derived from mercurial's _updatetags 311 ahist = other._taghist[key] 312 if key not in self._tags: 313 self._tags[key] = anode 314 self._taghist[key] = set(ahist) 315 continue 316 bnode = self._tags[key] 317 bhist = self._taghist[key] 318 if (bnode != anode and anode in bhist and 319 (bnode not in ahist or len(bhist) > len(ahist))): 320 anode = bnode 321 self._tags[key] = anode 322 self._taghist[key] = ahist | set( 323 n for n in bhist if n not in ahist) 324 325 def __iter__(self): 326 return util.iteritems(self._tags) 327 328 def hist(self, key): 329 return iter(sorted(self._taghist[key])) 330 331 332class GitCommit(object): 333 __slots__ = ('sha1', 'body', 'parents', 'tree', 'author', 'committer') 334 335 def __init__(self, sha1): 336 self.sha1 = sha1 337 commit = GitHgHelper.cat_file(b'commit', sha1) 338 header, self.body = commit.split(b'\n\n', 1) 339 parents = [] 340 for line in header.splitlines(): 341 if line == b'\n': 342 break 343 typ, data = line.split(b' ', 1) 344 typ = typ.decode('ascii') 345 if typ == 'parent': 346 parents.append(data.strip()) 347 elif typ in self.__slots__: 348 assert not hasattr(self, typ) 349 setattr(self, typ, data) 350 self.parents = tuple(parents) 351 352 353class PseudoGitCommit(GitCommit): 354 def __init__(self, sha1): 355 self.sha1 = sha1 356 357 358def autohexlify(h): 359 if len(h) == 40: 360 return h 361 elif len(h) == 20: 362 return hexlify(h) 363 assert False 364 365 366class BranchMap(object): 367 __slots__ = "_heads", "_all_heads", "_tips", "_git_sha1s", "_unknown_heads" 368 369 def __init__(self, store, remote_branchmap, remote_heads): 370 self._heads = {} 371 self._all_heads = tuple(autohexlify(h) for h in remote_heads) 372 self._tips = {} 373 self._git_sha1s = {} 374 self._unknown_heads = set() 375 for branch, heads in util.iteritems(remote_branchmap): 376 # We can't keep track of tips if the list of heads is not sequenced 377 sequenced = isinstance(heads, Sequence) or len(heads) == 1 378 branch_heads = [] 379 for head in heads: 380 head = autohexlify(head) 381 branch_heads.append(head) 382 sha1 = store.changeset_ref(head) 383 if not sha1: 384 self._unknown_heads.add(head) 385 continue 386 assert head not in self._git_sha1s 387 self._git_sha1s[head] = sha1 388 # Use last non-closed head as tip if there's more than one head. 389 # Caveat: we don't know a head is closed until we've pulled it. 390 if branch and heads and sequenced: 391 for head in reversed(branch_heads): 392 self._tips[branch] = head 393 if head in self._git_sha1s: 394 changeset = store.changeset(head) 395 if changeset.close: 396 continue 397 break 398 if branch: 399 self._heads[branch] = tuple(branch_heads) 400 401 def names(self): 402 return self._heads.keys() 403 404 def heads(self, branch=None): 405 if branch: 406 return self._heads.get(branch, ()) 407 return self._all_heads 408 409 def unknown_heads(self): 410 return self._unknown_heads 411 412 def git_sha1(self, head): 413 return self._git_sha1s.get(head, b'?') 414 415 def tip(self, branch): 416 return self._tips.get(branch, None) 417 418 419class Grafter(object): 420 __slots__ = "_store", "_early_history", "_graft_trees", "_grafted" 421 422 def __init__(self, store): 423 self._store = store 424 self._early_history = set() 425 self._graft_trees = defaultdict(list) 426 self._grafted = False 427 refs = [ 428 b'--exclude=refs/cinnabar/*', 429 b'--exclude=refs/notes/cinnabar', 430 b'--exclude=refs/original/*', 431 b'--all', 432 ] 433 if store._has_metadata: 434 refs += [b'--not', b'refs/cinnabar/metadata^'] 435 for node, tree, parents in progress_iter( 436 'Reading {} graft candidates', 437 GitHgHelper.rev_list(b'--full-history', *refs)): 438 self._graft_trees[tree].append(node) 439 440 def _is_cinnabar_commit(self, commit): 441 data = self._store.read_changeset_data(commit) 442 return b'\npatch' not in data if data else False 443 444 def _graft(self, changeset, parents): 445 store = self._store 446 tree = store.git_tree(changeset.manifest, *changeset.parents[:1]) 447 do_graft = tree and tree in self._graft_trees 448 if not do_graft: 449 return None 450 451 commits = {} 452 453 def graftable(c): 454 commit = commits.get(c) 455 if not commit: 456 commit = commits[c] = GitCommit(c) 457 if (Authorship.from_git_str(commit.author).timestamp != 458 int(changeset.timestamp)): 459 return False 460 461 if all(store._replace.get(p1, p1) == store._replace.get(p2, p2) 462 for p1, p2 in zip(commit.parents, parents)): 463 return True 464 465 # Allow to graft if one of the parents is from early history 466 return any(p in self._early_history for p in parents) 467 468 nodes = tuple(c for c in self._graft_trees[tree] if graftable(c)) 469 470 if len(nodes) > 1: 471 # Ideally, this should all be tried with fuzziness, and 472 # independently of the number of nodes we got, but the 473 # following is enough to graft github.com/mozilla/gecko-dev 474 # to mozilla-central and related repositories. 475 # Try with commits with the same subject line 476 subject = changeset.body.split(b'\n', 1)[0] 477 possible_nodes = tuple( 478 n for n in nodes 479 if commits[n].body.split(b'\n', 1)[0] == subject 480 ) 481 if len(possible_nodes) > 1: 482 # Try with commits with the same author ; this is attempted 483 # separately from checking timestamps because author may 484 # have been munged. 485 possible_nodes = tuple( 486 n for n in possible_nodes 487 if (Authorship.from_git_str(commits[n].author) 488 .to_hg()[0] == changeset.author) 489 ) 490 if len(possible_nodes) == 1: 491 nodes = possible_nodes 492 493 # If we still have multiple nodes, check if one of them is one that 494 # cinnabar would have created. If it is, we prefer other commits on 495 # the premise that it means we've been asked to reclone with a graft. 496 # on a repo that was already handled by cinnabar. 497 if len(nodes) > 1: 498 possible_nodes = [] 499 for node in nodes: 500 commit = commits[node] 501 cs = Changeset.from_git_commit(commit) 502 patcher = ChangesetPatcher.from_diff(cs, changeset) 503 if b'\npatch' in patcher: 504 possible_nodes.append(node) 505 nodes = possible_nodes 506 507 if len(nodes) > 1: 508 raise AmbiguousGraftAbort( 509 'Cannot graft changeset %s. Candidates: %s' 510 % (changeset.node.decode('ascii'), 511 ', '.join(n.decode('ascii') for n in nodes))) 512 513 if nodes: 514 node = nodes[0] 515 self._graft_trees[tree].remove(node) 516 return commits[node] 517 return None 518 519 def graft(self, changeset): 520 # TODO: clarify this function because it's hard to follow. 521 store = self._store 522 parents = tuple(store.changeset_ref(p) for p in changeset.parents) 523 if None in parents: 524 result = None 525 else: 526 result = self._graft(changeset, parents) 527 if parents: 528 is_early_history = all(p in self._early_history for p in parents) 529 else: 530 is_early_history = not result 531 if not (is_early_history or result): 532 raise NothingToGraftException() 533 if is_early_history or not result: 534 commit = store.changeset_ref(changeset.node) 535 else: 536 commit = result 537 store.store_changeset(changeset, commit or False) 538 commit = store.changeset_ref(changeset.node) 539 if is_early_history: 540 if result and result.sha1 != commit: 541 store._replace[result.sha1] = commit 542 else: 543 self._early_history.add(commit) 544 elif not parents: 545 if result: 546 commit = result.sha1 547 if self._is_cinnabar_commit(commit): 548 self._early_history.add(commit) 549 550 if result: 551 self._grafted = True 552 553 def close(self): 554 if not self._grafted and self._early_history: 555 raise NothingToGraftException() 556 557 558class GitHgStore(object): 559 FLAGS = [ 560 b'files-meta', 561 b'unified-manifests-v2', 562 ] 563 564 METADATA_REFS = ( 565 b'refs/cinnabar/changesets', 566 b'refs/cinnabar/manifests', 567 b'refs/cinnabar/hg2git', 568 b'refs/notes/cinnabar', 569 b'refs/cinnabar/files-meta', 570 ) 571 572 def _metadata(self): 573 if self._metadata_sha1: 574 metadata = GitCommit(self._metadata_sha1) 575 self._flags = set(metadata.body.split()) 576 refs = self.METADATA_REFS 577 if b'files-meta' not in self._flags: 578 refs = list(refs) 579 refs.remove(b'refs/cinnabar/files-meta') 580 return metadata, dict(zip(refs, metadata.parents)) 581 582 def metadata(self): 583 metadata = self._metadata() 584 if metadata: 585 if len(self._flags) > len(self.FLAGS): 586 raise UpgradeAbort( 587 'It looks like this repository was used with a newer ' 588 'version of git-cinnabar. Cannot use this version.') 589 if set(self._flags) != set(self.FLAGS): 590 raise UpgradeAbort() 591 return metadata 592 593 def __init__(self): 594 self._flags = set() 595 self._closed = False 596 self._graft = None 597 598 self._hgheads = VersionedDict() 599 self._branches = {} 600 601 self._replace = Git._replace 602 self._tagcache_ref = None 603 self._metadata_sha1 = None 604 # While doing a for_each_ref, ensure refs/notes/cinnabar is in the 605 # cache. 606 for sha1, ref in Git.for_each_ref('refs/cinnabar', 607 'refs/notes/cinnabar'): 608 if ref.startswith(b'refs/cinnabar/replace/'): 609 self._replace[ref[22:]] = sha1 610 elif ref.startswith(b'refs/cinnabar/branches/'): 611 raise OldUpgradeAbort() 612 elif ref == b'refs/cinnabar/metadata': 613 self._metadata_sha1 = sha1 614 elif ref == b'refs/cinnabar/tag_cache': 615 self._tagcache_ref = sha1 616 self._replace = VersionedDict(self._replace) 617 618 self._tagcache = {} 619 self._tagfiles = {} 620 self._tags = {NULL_NODE_ID: {}} 621 self._cached_changeset_ref = {} 622 self._tagcache_items = set() 623 if self._tagcache_ref: 624 for line in Git.ls_tree(self._tagcache_ref): 625 mode, typ, sha1, path = line 626 if typ == b'blob': 627 if self.ATTR[mode] == b'x': 628 self._tagfiles[path] = sha1 629 else: 630 self._tagcache[path] = sha1 631 elif typ == b'commit': 632 assert sha1 == NULL_NODE_ID 633 self._tagcache[path] = sha1 634 self._tagcache_items.add(path) 635 636 self.tag_changes = False 637 638 metadata = self.metadata() 639 if metadata: 640 metadata, refs = metadata 641 self._has_metadata = bool(metadata) 642 self._metadata_refs = refs if metadata else {} 643 self._manifest_heads_orig = set() 644 self._generation = 0 645 if metadata: 646 changesets_ref = self._metadata_refs.get( 647 b'refs/cinnabar/changesets') 648 if changesets_ref: 649 commit = GitCommit(changesets_ref) 650 for n, head in enumerate(commit.body.splitlines()): 651 hghead, branch = head.split(b' ', 1) 652 self._hgheads._previous[hghead] = (branch, n) 653 self._generation = n + 1 654 655 self._manifest_heads_orig = set(GitHgHelper.heads(b'manifests')) 656 657 replace = {} 658 for line in Git.ls_tree(metadata.tree): 659 mode, typ, sha1, path = line 660 replace[path] = sha1 661 662 if self._replace and not replace: 663 raise OldUpgradeAbort() 664 665 # Delete old tag-cache, which may contain incomplete data. 666 Git.delete_ref(b'refs/cinnabar/tag-cache') 667 668 def prepare_graft(self): 669 self._graft = Grafter(self) 670 671 @staticmethod 672 def _try_merge_branches(repo_url): 673 parsed_url = urlparse(repo_url) 674 branches = [] 675 path = parsed_url.path.lstrip(b'/').rstrip(b'/') 676 if path: 677 parts = list(reversed(path.split(b'/'))) 678 else: 679 parts = [] 680 host = parsed_url.netloc.split(b':', 1)[0] 681 if host: 682 parts.append(host) 683 last_path = b'' 684 for part in parts: 685 if last_path: 686 last_path = b'%s/%s' % (part, last_path) 687 else: 688 last_path = part 689 branches.append(last_path) 690 branches.append(b'metadata') 691 return branches 692 693 @staticmethod 694 def _find_branch(branches, remote_refs): 695 for branch in branches: 696 if branch in remote_refs: 697 return branch 698 if b'refs/cinnabar/%s' % branch in remote_refs: 699 return b'refs/cinnabar/%s' % branch 700 if b'refs/heads/%s' % branch in remote_refs: 701 return b'refs/heads/%s' % branch 702 703 def merge(self, git_repo_url, hg_repo_url, branch=None): 704 # Eventually we'll want to handle a full merge, but for now, we only 705 # handle the case where we don't have metadata to begin with. 706 # The caller should avoid calling this function otherwise. 707 assert not self._has_metadata 708 remote_refs = OrderedDict() 709 for line in Git.iter('ls-remote', fsdecode(git_repo_url), 710 stderr=open(os.devnull, 'wb')): 711 sha1, ref = line.split(None, 1) 712 remote_refs[ref] = sha1 713 bundle = None 714 if not remote_refs and urlparse(git_repo_url).scheme in (b'http', 715 b'https'): 716 try: 717 bundle = HTTPReader(git_repo_url) 718 except URLError as e: 719 logging.error(e.reason) 720 return False 721 if bundle.fh.headers.get('Content-Encoding', 'identity') == 'gzip': 722 from gzip import GzipFile 723 bundle = Seekable(bundle, bundle.length) 724 bundle = GzipFile(mode='rb', fileobj=bundle) 725 BUNDLE_SIGNATURE = b'# v2 git bundle\n' 726 signature = bundle.read(len(BUNDLE_SIGNATURE)) 727 if signature != BUNDLE_SIGNATURE: 728 logging.error('Could not find cinnabar metadata') 729 return False 730 bundle = io.BufferedReader(bundle) 731 while True: 732 line = bundle.readline().rstrip() 733 if not line: 734 break 735 sha1, ref = line.split(b' ', 1) 736 remote_refs[ref] = sha1 737 if branch: 738 branches = [branch] 739 else: 740 branches = self._try_merge_branches(hg_repo_url) 741 742 ref = self._find_branch(branches, remote_refs) 743 if ref is None: 744 logging.error('Could not find cinnabar metadata') 745 return False 746 747 if bundle: 748 args = ('-v',) if util.progress else () 749 proc = GitProcess('index-pack', '--stdin', '--fix-thin', *args, 750 stdin=subprocess.PIPE, 751 stdout=open(os.devnull, 'wb')) 752 shutil.copyfileobj(bundle, proc.stdin) 753 else: 754 fetch = ['fetch', '--no-tags', '--no-recurse-submodules', '-q'] 755 fetch.append('--progress' if util.progress else '--no-progress') 756 fetch.append(fsdecode(git_repo_url)) 757 cmd = fetch + [fsdecode(ref) + ':refs/cinnabar/fetch'] 758 proc = GitProcess(*cmd, stdout=sys.stdout) 759 if proc.wait(): 760 logging.error('Failed to fetch cinnabar metadata.') 761 return False 762 763 # Do some basic validation on the metadata we just got. 764 commit = GitCommit(remote_refs[ref]) 765 if b'cinnabar@git' not in commit.author: 766 logging.error('Invalid cinnabar metadata.') 767 return False 768 769 flags = set(commit.body.split()) 770 if b'files-meta' not in flags or b'unified-manifests-v2' not in flags \ 771 or len(commit.parents) != len(self.METADATA_REFS): 772 logging.error('Invalid cinnabar metadata.') 773 return False 774 775 # At this point, we'll just assume this is good enough. 776 777 # Get replace refs. 778 if commit.tree != EMPTY_TREE: 779 errors = False 780 by_sha1 = {} 781 for k, v in util.iteritems(remote_refs): 782 if v not in by_sha1: 783 by_sha1[v] = k 784 needed = [] 785 for line in Git.ls_tree(commit.tree): 786 mode, typ, sha1, path = line 787 if sha1 in by_sha1: 788 ref = b'refs/cinnabar/replace/%s' % path 789 if bundle: 790 Git.update_ref(ref, sha1) 791 else: 792 needed.append( 793 fsdecode(b':'.join((by_sha1[sha1], ref)))) 794 else: 795 logging.error('Missing commit: %s', sha1) 796 errors = True 797 if errors: 798 return False 799 800 if not bundle: 801 cmd = fetch + needed 802 proc = GitProcess(*cmd, stdout=sys.stdout) 803 if proc.wait(): 804 logging.error('Failed to fetch cinnabar metadata.') 805 return False 806 807 Git.update_ref(b'refs/cinnabar/metadata', commit.sha1) 808 self._metadata_sha1 = commit.sha1 809 GitHgHelper.reload() 810 Git.delete_ref(b'refs/cinnabar/fetch') 811 812 # TODO: avoid the duplication of code with __init__ 813 metadata = self.metadata() 814 815 if not metadata: 816 # This should never happen, but just in case. 817 logging.warn('Could not find cinnabar metadata') 818 Git.delete_ref(b'refs/cinnabar/metadata') 819 GitHgHelper.reload() 820 return False 821 822 metadata, refs = metadata 823 self._has_metadata = True 824 self._metadata_refs = refs if metadata else {} 825 changesets_ref = self._metadata_refs.get(b'refs/cinnabar/changesets') 826 self._generation = 0 827 if changesets_ref: 828 commit = GitCommit(changesets_ref) 829 for n, head in enumerate(commit.body.splitlines()): 830 hghead, branch = head.split(b' ', 1) 831 self._hgheads._previous[hghead] = (branch, 1) 832 self._generation = n + 1 833 834 self._manifest_heads_orig = set(GitHgHelper.heads(b'manifests')) 835 836 for line in Git.ls_tree(metadata.tree): 837 mode, typ, sha1, path = line 838 self._replace[path] = sha1 839 840 return True 841 842 def tags(self): 843 tags = TagSet() 844 heads = sorted((n, h) for h, (b, n) in util.iteritems(self._hgheads)) 845 for _, h in heads: 846 h = self.changeset_ref(h) 847 tags.update(self._get_hgtags(h)) 848 for tag, node in tags: 849 if node != NULL_NODE_ID: 850 yield tag, node 851 852 def _get_hgtags(self, head): 853 tags = TagSet() 854 if not self._tagcache.get(head): 855 ls = one(Git.ls_tree(head, b'.hgtags')) 856 if not ls: 857 self._tagcache[head] = NULL_NODE_ID 858 return tags 859 mode, typ, self._tagcache[head], path = ls 860 tagfile = self._tagcache[head] 861 if tagfile not in self._tags: 862 if tagfile in self._tagfiles: 863 data = GitHgHelper.cat_file(b'blob', self._tagfiles[tagfile]) 864 for line in data.splitlines(): 865 tag, nodes = line.split(b'\0', 1) 866 nodes = nodes.split(b' ') 867 for node in reversed(nodes): 868 tags[tag] = node 869 else: 870 data = GitHgHelper.cat_file(b'blob', tagfile) or b'' 871 for line in data.splitlines(): 872 if not line: 873 continue 874 try: 875 node, tag = line.split(b' ', 1) 876 except ValueError: 877 continue 878 tag = tag.strip() 879 try: 880 unhexlify(node) 881 except TypeError: 882 continue 883 if node != NULL_NODE_ID: 884 node = self.cached_changeset_ref(node) 885 if node: 886 tags[tag] = node 887 self._tags[tagfile] = tags 888 return self._tags[tagfile] 889 890 def heads(self, branches={}): 891 if not isinstance(branches, (dict, set)): 892 branches = set(branches) 893 return set(h for h, (b, _) in util.iteritems(self._hgheads) 894 if not branches or b in branches) 895 896 def _head_branch(self, head): 897 if head in self._hgheads: 898 return self._hgheads[head][0], head 899 if head in self._branches: 900 return self._branches[head], head 901 branch = self.changeset(head).branch or b'default' 902 self._branches[head] = branch 903 return branch, head 904 905 def add_head(self, head, parent1=NULL_NODE_ID, parent2=NULL_NODE_ID): 906 branch, head = self._head_branch(head) 907 for p in (parent1, parent2): 908 if p == NULL_NODE_ID: 909 continue 910 parent_branch, parent_head = self._head_branch(p) 911 if parent_branch == branch: 912 if parent_head in self._hgheads: 913 assert parent_branch == self._hgheads[parent_head][0] 914 del self._hgheads[parent_head] 915 916 generation = self._generation 917 self._generation += 1 918 self._hgheads[head] = (branch, generation) 919 920 def read_changeset_data(self, obj): 921 assert obj is not None 922 obj = bytes(obj) 923 data = GitHgHelper.git2hg(obj) 924 if data is None: 925 return None 926 ret = ChangesetPatcher(data) 927 return ret 928 929 def hg_changeset(self, sha1): 930 data = self.read_changeset_data(sha1) 931 if data: 932 assert data.startswith(b'changeset ') 933 return data[10:50] 934 return None 935 936 def hg_manifest(self, sha1): 937 git_commit = GitCommit(sha1) 938 assert len(git_commit.body) == 40 939 return git_commit.body 940 941 def _hg2git(self, sha1): 942 if not self._has_metadata and not GitHgHelper._helper: 943 return None 944 gitsha1 = GitHgHelper.hg2git(sha1) 945 if gitsha1 == NULL_NODE_ID: 946 gitsha1 = None 947 return gitsha1 948 949 def changeset(self, sha1, include_parents=False): 950 gitsha1 = self.changeset_ref(sha1) 951 assert gitsha1 952 return self._changeset(gitsha1, include_parents) 953 954 def _changeset(self, git_commit, include_parents=False): 955 if not isinstance(git_commit, GitCommit): 956 git_commit = GitCommit(git_commit) 957 958 metadata = self.read_changeset_data(git_commit.sha1) 959 if not metadata: 960 return None 961 changeset = Changeset.from_git_commit(git_commit) 962 changeset = metadata.apply(changeset) 963 964 if include_parents: 965 assert len(git_commit.parents) <= 2 966 changeset.parents = tuple( 967 self.hg_changeset(self._replace.get(p, p)) 968 for p in git_commit.parents) 969 970 return changeset 971 972 ATTR = { 973 b'100644': b'', 974 b'100755': b'x', 975 b'120000': b'l', 976 } 977 978 @staticmethod 979 def manifest_metadata_path(path): 980 return b'_' + path.replace(b'/', b'/_') 981 982 @staticmethod 983 def manifest_path(path): 984 return path[1:].replace(b'/_', b'/') 985 986 def manifest(self, sha1, include_parents=False): 987 manifest = GeneratedManifestInfo(sha1) 988 manifest.raw_data = GitHgHelper.manifest(sha1) 989 if include_parents: 990 git_sha1 = self.manifest_ref(sha1) 991 commit = GitCommit(git_sha1) 992 parents = (self.hg_manifest(p) for p in commit.parents) 993 manifest.parents = tuple(parents) 994 return manifest 995 996 def manifest_ref(self, sha1): 997 return self._hg2git(sha1) 998 999 def changeset_ref(self, sha1): 1000 return self._hg2git(sha1) 1001 1002 def cached_changeset_ref(self, sha1): 1003 try: 1004 return self._cached_changeset_ref[sha1] 1005 except KeyError: 1006 res = self._cached_changeset_ref[sha1] = self.changeset_ref(sha1) 1007 return res 1008 1009 def file_meta(self, sha1): 1010 return GitHgHelper.file_meta(sha1) 1011 1012 def file(self, sha1, file_parents=None): 1013 if sha1 == HG_EMPTY_FILE: 1014 content = b'' 1015 else: 1016 content = GitHgHelper.cat_blob(b':h%s' % sha1) 1017 1018 file = File(sha1) 1019 meta = self.file_meta(sha1) 1020 if meta: 1021 file.metadata = meta 1022 file.content = content 1023 if file_parents is not None: 1024 FileFindParents.set_parents(file, *file_parents) 1025 return file 1026 1027 def git_file_ref(self, sha1): 1028 # Because an empty file and an empty manifest, both with no parents, 1029 # have the same sha1, we can't store both in the hg2git tree. So, we 1030 # choose to never store the file version, and make it forcibly resolve 1031 # to the empty blob. Which means we won't be storing an empty blob and 1032 # getting a mark for it, and will attempt to use it directly even if 1033 # it doesn't exist. The FastImport code works around this. 1034 # Theoretically, it is possible to have a non-modified child of the 1035 # empty file, and a non-modified child of the empty manifest, which 1036 # both would also have the same sha1, but, TTBOMK, it is only possible 1037 # to achieve with commands like hg debugparents. 1038 if sha1 == HG_EMPTY_FILE: 1039 return EMPTY_BLOB 1040 return self._hg2git(sha1) 1041 1042 def git_tree(self, manifest_sha1, ref_changeset=None): 1043 if manifest_sha1 == NULL_NODE_ID: 1044 return EMPTY_TREE 1045 return GitHgHelper.create_git_tree(manifest_sha1, ref_changeset) 1046 1047 def store_changeset(self, instance, commit=None): 1048 if commit and not isinstance(commit, GitCommit): 1049 commit = GitCommit(commit) 1050 if commit is None and self._graft: 1051 return self._graft.graft(instance) 1052 1053 if not commit: 1054 author = Authorship.from_hg(instance.author, instance.timestamp, 1055 instance.utcoffset) 1056 extra = instance.extra 1057 if extra and extra.get(b'committer'): 1058 committer = extra[b'committer'] 1059 if committer[-1:] == b'>': 1060 committer = Authorship.from_hg( 1061 committer, instance.timestamp, instance.utcoffset) 1062 else: 1063 committer = Authorship.from_hg_str( 1064 committer, maybe_git_utcoffset=True) 1065 if committer.to_hg() == committer: 1066 extra = dict(instance.extra) 1067 del extra[b'committer'] 1068 if not extra: 1069 extra = None 1070 else: 1071 committer = author 1072 1073 parents = tuple(b':h%s' % p for p in instance.parents) 1074 1075 body = instance.body 1076 1077 # There are cases where two changesets would map to the same 1078 # git commit because their differences are not in information 1079 # stored in the git commit (different manifest node, but 1080 # identical tree ; different branches ; etc.) 1081 # In that case, add invisible characters to the commit 1082 # message until we find a commit that doesn't map to another 1083 # changeset. 1084 committer = committer.to_git_str() 1085 author = author.to_git_str() 1086 with GitHgHelper.commit( 1087 ref=b'refs/cinnabar/tip', 1088 message=body, 1089 committer=committer, 1090 author=author, 1091 parents=parents, 1092 pseudo_mark=b':h%s' % instance.node, 1093 ) as c: 1094 c.filemodify(b'', self.git_tree(instance.manifest, 1095 *instance.parents[:1]), 1096 typ=b'tree') 1097 1098 commit = PseudoGitCommit(b':1') 1099 commit.author = author 1100 commit.committer = committer 1101 commit.body = body 1102 1103 GitHgHelper.set(b'changeset', instance.node, commit.sha1) 1104 changeset = Changeset.from_git_commit(commit) 1105 GitHgHelper.put_blob( 1106 ChangesetPatcher.from_diff(changeset, instance), want_sha1=False) 1107 GitHgHelper.set(b'changeset-metadata', instance.node, b':1') 1108 1109 self._branches[instance.node] = instance.branch or b'default' 1110 self.add_head(instance.node, instance.parent1, instance.parent2) 1111 1112 MODE = { 1113 b'': b'160644', 1114 b'l': b'160000', 1115 b'x': b'160755', 1116 } 1117 1118 def store_manifest(self, instance): 1119 if getattr(instance, 'delta_node', NULL_NODE_ID) != NULL_NODE_ID: 1120 previous = b':h%s' % instance.delta_node 1121 else: 1122 previous = None 1123 parents = tuple(b':h%s' % p for p in instance.parents) 1124 with GitHgHelper.commit( 1125 ref=b'refs/cinnabar/manifests', 1126 from_commit=previous, 1127 parents=parents, 1128 message=instance.node, 1129 pseudo_mark=b':h%s' % instance.node, 1130 ) as commit: 1131 if hasattr(instance, 'delta_node'): 1132 for name in instance.removed: 1133 commit.filedelete(self.manifest_metadata_path(name)) 1134 modified = instance.modified.items() 1135 else: 1136 # slow 1137 modified = ((line.path, (line.sha1, line.attr)) 1138 for line in instance) 1139 for name, (node, attr) in modified: 1140 node = bytes(node) 1141 commit.filemodify(self.manifest_metadata_path(name), node, 1142 self.MODE[attr]) 1143 1144 GitHgHelper.set(b'manifest', instance.node, b':1') 1145 1146 if check_enabled('manifests'): 1147 if not GitHgHelper.check_manifest(instance.node): 1148 raise Exception( 1149 'sha1 mismatch for node %s with parents %s %s and ' 1150 'previous %s' % 1151 (instance.node.decode('ascii'), 1152 instance.parent1.decode('ascii'), 1153 instance.parent2.decode('ascii'), 1154 instance.delta_node.decode('ascii')) 1155 ) 1156 1157 def close(self, refresh=()): 1158 if self._closed: 1159 return 1160 if self._graft: 1161 self._graft.close() 1162 self._closed = True 1163 # If the helper is not running, we don't have anything to update. 1164 if not GitHgHelper._helper: 1165 return 1166 update_metadata = {} 1167 tree = GitHgHelper.store(b'metadata', b'hg2git') 1168 if tree != NULL_NODE_ID: 1169 hg2git = self._metadata_refs.get(b'refs/cinnabar/hg2git') 1170 with GitHgHelper.commit( 1171 ref=b'refs/cinnabar/hg2git', 1172 ) as commit: 1173 commit.write(b'M 040000 %s \n' % tree) 1174 if commit.sha1 != hg2git: 1175 update_metadata[b'refs/cinnabar/hg2git'] = commit.sha1 1176 1177 tree = GitHgHelper.store(b'metadata', b'git2hg') 1178 if tree != NULL_NODE_ID: 1179 notes = self._metadata_refs.get(b'refs/notes/cinnabar') 1180 with GitHgHelper.commit( 1181 ref=b'refs/notes/cinnabar', 1182 ) as commit: 1183 commit.write(b'M 040000 %s \n' % tree) 1184 if commit.sha1 != notes: 1185 update_metadata[b'refs/notes/cinnabar'] = commit.sha1 1186 1187 hg_changeset_heads = list(self._hgheads) 1188 changeset_heads = list(self.changeset_ref(h) 1189 for h in hg_changeset_heads) 1190 if (any(self._hgheads.iterchanges()) or 1191 b'refs/cinnabar/changesets' in refresh): 1192 heads = sorted((self._hgheads[h][1], self._hgheads[h][0], h, g) 1193 for h, g in zip(hg_changeset_heads, 1194 changeset_heads)) 1195 with GitHgHelper.commit( 1196 ref=b'refs/cinnabar/changesets', 1197 parents=list(h for _, __, ___, h in heads), 1198 message=b'\n'.join(b'%s %s' % (h, b) for _, b, h, __ in heads), 1199 ) as commit: 1200 pass 1201 update_metadata[b'refs/cinnabar/changesets'] = commit.sha1 1202 1203 changeset_heads = set(changeset_heads) 1204 1205 manifest_heads = GitHgHelper.heads(b'manifests') 1206 if (set(manifest_heads) != self._manifest_heads_orig or 1207 (b'refs/cinnabar/changesets' in update_metadata and 1208 not manifest_heads) or b'refs/cinnabar/manifests' in refresh): 1209 with GitHgHelper.commit( 1210 ref=b'refs/cinnabar/manifests', 1211 parents=sorted(manifest_heads), 1212 ) as commit: 1213 pass 1214 update_metadata[b'refs/cinnabar/manifests'] = commit.sha1 1215 1216 tree = GitHgHelper.store(b'metadata', b'files-meta') 1217 files_meta_ref = self._metadata_refs.get(b'refs/cinnabar/files-meta') 1218 if update_metadata and (tree != NULL_NODE_ID or not files_meta_ref): 1219 with GitHgHelper.commit( 1220 ref=b'refs/cinnabar/files-meta', 1221 ) as commit: 1222 if tree != NULL_NODE_ID: 1223 commit.write(b'M 040000 %s \n' % tree) 1224 if commit.sha1 != files_meta_ref: 1225 update_metadata[b'refs/cinnabar/files-meta'] = commit.sha1 1226 1227 replace_changed = False 1228 for status, ref, sha1 in self._replace.iterchanges(): 1229 if status == VersionedDict.REMOVED: 1230 Git.delete_ref(b'refs/cinnabar/replace/%s' % ref) 1231 else: 1232 Git.update_ref(b'refs/cinnabar/replace/%s' % ref, sha1) 1233 replace_changed = True 1234 1235 if update_metadata or replace_changed: 1236 parents = list(update_metadata.get(r) or self._metadata_refs[r] 1237 for r in self.METADATA_REFS) 1238 metadata_sha1 = (Git.config('cinnabar.previous-metadata') or 1239 self._metadata_sha1) 1240 if metadata_sha1: 1241 parents.append(metadata_sha1) 1242 with GitHgHelper.commit( 1243 ref=b'refs/cinnabar/metadata', 1244 parents=parents, 1245 message=b' '.join(sorted(self.FLAGS)), 1246 ) as commit: 1247 for sha1, target in util.iteritems(self._replace): 1248 commit.filemodify(sha1, target, b'commit') 1249 1250 for c in self._tagcache: 1251 if c not in changeset_heads: 1252 self._tagcache[c] = False 1253 1254 for c in changeset_heads: 1255 if c not in self._tagcache: 1256 tags = self._get_hgtags(c) 1257 1258 files = set(util.itervalues(self._tagcache)) 1259 deleted = set() 1260 created = {} 1261 for f in self._tagcache_items: 1262 if (f not in self._tagcache and f not in self._tagfiles or 1263 f not in files and f in self._tagfiles): 1264 deleted.add(f) 1265 1266 def tagset_lines(tags): 1267 for tag, value in tags: 1268 yield b'%s\0%s %s\n' % (tag, value, 1269 b' '.join(tags.hist(tag))) 1270 1271 for f, tags in util.iteritems(self._tags): 1272 if f not in self._tagfiles and f != NULL_NODE_ID: 1273 data = b''.join(tagset_lines(tags)) 1274 mark = GitHgHelper.put_blob(data=data) 1275 created[f] = (mark, b'exec') 1276 1277 if created or deleted: 1278 self.tag_changes = True 1279 1280 for c, f in util.iteritems(self._tagcache): 1281 if (f and c not in self._tagcache_items): 1282 if f == NULL_NODE_ID: 1283 created[c] = (f, b'commit') 1284 else: 1285 created[c] = (f, b'regular') 1286 elif f is False and c in self._tagcache_items: 1287 deleted.add(c) 1288 1289 if created or deleted: 1290 with GitHgHelper.commit( 1291 ref=b'refs/cinnabar/tag_cache', 1292 from_commit=self._tagcache_ref, 1293 ) as commit: 1294 for f in deleted: 1295 commit.filedelete(f) 1296 1297 for f, (filesha1, typ) in util.iteritems(created): 1298 commit.filemodify(f, filesha1, typ) 1299 1300 # refs/notes/cinnabar is kept for convenience 1301 for ref in update_metadata: 1302 if ref not in (b'refs/notes/cinnabar',): 1303 Git.delete_ref(ref) 1304 1305 GitHgHelper.close(rollback=False) 1306 1307 # Try to detect issue #207 as early as possible. 1308 GitHgHelper._helper = False 1309 busted = False 1310 from .hg.repo import getbundle_params, stored_files 1311 for (node, (parent1, parent2)) in progress_iter( 1312 "Checking {} imported file root and head revisions", 1313 util.iteritems(stored_files)): 1314 if not GitHgHelper.check_file(node, parent1, parent2): 1315 busted = True 1316 logging.error("Error in file %s" % node) 1317 if busted: 1318 import json 1319 extra = "" 1320 if getbundle_params: 1321 extra = \ 1322 "If it failed, please also copy/paste the following:\n" 1323 extra += json.dumps(getbundle_params, sort_keys=True, indent=4) 1324 raise Abort( 1325 "It seems you have hit a known, rare, and difficult to " 1326 "reproduce issue.\n" 1327 "Your help would be appreciated.\n" 1328 "Please try either `git cinnabar rollback` followed by the " 1329 "same command that just\n" 1330 "failed, or `git cinnabar reclone`.\n" 1331 "Please open a new issue " 1332 "(https://github.com/glandium/git-cinnabar/issues/new)\n" 1333 "mentioning issue #207 and reporting whether the second " 1334 "attempt succeeded.\n" + extra + "\n" 1335 "Please keep a copy of this repository." 1336 ) 1337