1# Copyright (C) 2008-2018 Jelmer Vernooij <jelmer@jelmer.uk> 2# Copyright (C) 2007 Canonical Ltd 3# Copyright (C) 2008 John Carr 4# 5# This program is free software; you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation; either version 2 of the License, or 8# (at your option) any later version. 9# 10# This program is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with this program; if not, write to the Free Software 17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 18 19"""Converters, etc for going between Bazaar and Git ids.""" 20 21import base64 22import stat 23 24from .. import ( 25 bencode, 26 errors, 27 foreign, 28 trace, 29 urlutils, 30 ) 31from ..foreign import ( 32 ForeignVcs, 33 VcsMappingRegistry, 34 ForeignRevision, 35 ) 36from ..revision import ( 37 NULL_REVISION, 38 Revision, 39 ) 40from .errors import ( 41 NoPushSupport, 42 ) 43from .hg import ( 44 format_hg_metadata, 45 extract_hg_metadata, 46 ) 47from .roundtrip import ( 48 extract_bzr_metadata, 49 inject_bzr_metadata, 50 CommitSupplement, 51 ) 52 53 54DEFAULT_FILE_MODE = stat.S_IFREG | 0o644 55HG_RENAME_SOURCE = b"HG:rename-source" 56HG_EXTRA = b"HG:extra" 57 58# This HG extra is used to indicate the commit that this commit was based on. 59HG_EXTRA_AMEND_SOURCE = b"amend_source" 60 61FILE_ID_PREFIX = b'git:' 62 63# Always the same. 64ROOT_ID = b"TREE_ROOT" 65 66 67class UnknownCommitExtra(errors.BzrError): 68 _fmt = "Unknown extra fields in %(object)r: %(fields)r." 69 70 def __init__(self, object, fields): 71 errors.BzrError.__init__(self) 72 self.object = object 73 self.fields = ",".join(fields) 74 75 76class UnknownMercurialCommitExtra(errors.BzrError): 77 _fmt = "Unknown mercurial extra fields in %(object)r: %(fields)r." 78 79 def __init__(self, object, fields): 80 errors.BzrError.__init__(self) 81 self.object = object 82 self.fields = b",".join(fields) 83 84 85def escape_file_id(file_id): 86 file_id = file_id.replace(b'_', b'__') 87 file_id = file_id.replace(b' ', b'_s') 88 file_id = file_id.replace(b'\x0c', b'_c') 89 return file_id 90 91 92def unescape_file_id(file_id): 93 ret = bytearray() 94 i = 0 95 while i < len(file_id): 96 if file_id[i:i + 1] != b'_': 97 ret.append(file_id[i]) 98 else: 99 if file_id[i + 1:i + 2] == b'_': 100 ret.append(b"_"[0]) 101 elif file_id[i + 1:i + 2] == b's': 102 ret.append(b" "[0]) 103 elif file_id[i + 1:i + 2] == b'c': 104 ret.append(b"\x0c"[0]) 105 else: 106 raise ValueError("unknown escape character %s" % 107 file_id[i + 1:i + 2]) 108 i += 1 109 i += 1 110 return bytes(ret) 111 112 113def fix_person_identifier(text): 114 if b"<" not in text and b">" not in text: 115 username = text 116 email = text 117 elif b">" not in text: 118 return text + b">" 119 else: 120 if text.rindex(b">") < text.rindex(b"<"): 121 raise ValueError(text) 122 username, email = text.split(b"<", 2)[-2:] 123 email = email.split(b">", 1)[0] 124 if username.endswith(b" "): 125 username = username[:-1] 126 return b"%s <%s>" % (username, email) 127 128 129def decode_git_path(path): 130 """Take a git path and decode it.""" 131 try: 132 return path.decode('utf-8') 133 except UnicodeDecodeError: 134 if PY3: 135 return path.decode('utf-8', 'surrogateescape') 136 raise 137 138 139def encode_git_path(path): 140 """Take a regular path and encode it for git.""" 141 try: 142 return path.encode('utf-8') 143 except UnicodeEncodeError: 144 if PY3: 145 return path.encode('utf-8', 'surrogateescape') 146 raise 147 148 149def warn_escaped(commit, num_escaped): 150 trace.warning("Escaped %d XML-invalid characters in %s. Will be unable " 151 "to regenerate the SHA map.", num_escaped, commit) 152 153 154def warn_unusual_mode(commit, path, mode): 155 trace.mutter("Unusual file mode %o for %s in %s. Storing as revision " 156 "property. ", mode, path, commit) 157 158 159class BzrGitMapping(foreign.VcsMapping): 160 """Class that maps between Git and Bazaar semantics.""" 161 experimental = False 162 163 BZR_DUMMY_FILE = None # type: Optional[str] 164 165 def is_special_file(self, filename): 166 return (filename in (self.BZR_DUMMY_FILE, )) 167 168 def __init__(self): 169 super(BzrGitMapping, self).__init__(foreign_vcs_git) 170 171 def __eq__(self, other): 172 return (type(self) == type(other) 173 and self.revid_prefix == other.revid_prefix) 174 175 @classmethod 176 def revision_id_foreign_to_bzr(cls, git_rev_id): 177 """Convert a git revision id handle to a Bazaar revision id.""" 178 from dulwich.protocol import ZERO_SHA 179 if git_rev_id == ZERO_SHA: 180 return NULL_REVISION 181 return b"%s:%s" % (cls.revid_prefix, git_rev_id) 182 183 @classmethod 184 def revision_id_bzr_to_foreign(cls, bzr_rev_id): 185 """Convert a Bazaar revision id to a git revision id handle.""" 186 if not bzr_rev_id.startswith(b"%s:" % cls.revid_prefix): 187 raise errors.InvalidRevisionId(bzr_rev_id, cls) 188 return bzr_rev_id[len(cls.revid_prefix) + 1:], cls() 189 190 def generate_file_id(self, path): 191 # Git paths are just bytestrings 192 # We must just hope they are valid UTF-8.. 193 if isinstance(path, str): 194 path = path.encode("utf-8") 195 if path == b"": 196 return ROOT_ID 197 return FILE_ID_PREFIX + escape_file_id(path) 198 199 def parse_file_id(self, file_id): 200 if file_id == ROOT_ID: 201 return u"" 202 if not file_id.startswith(FILE_ID_PREFIX): 203 raise ValueError 204 return decode_git_path(unescape_file_id(file_id[len(FILE_ID_PREFIX):])) 205 206 def import_unusual_file_modes(self, rev, unusual_file_modes): 207 if unusual_file_modes: 208 ret = [(path, unusual_file_modes[path]) 209 for path in sorted(unusual_file_modes.keys())] 210 rev.properties[u'file-modes'] = bencode.bencode(ret) 211 212 def export_unusual_file_modes(self, rev): 213 try: 214 file_modes = rev.properties[u'file-modes'] 215 except KeyError: 216 return {} 217 else: 218 return dict(bencode.bdecode(file_modes.encode("utf-8"))) 219 220 def _generate_git_svn_metadata(self, rev, encoding): 221 try: 222 git_svn_id = rev.properties[u"git-svn-id"] 223 except KeyError: 224 return "" 225 else: 226 return "\ngit-svn-id: %s\n" % git_svn_id.encode(encoding) 227 228 def _generate_hg_message_tail(self, rev): 229 extra = {} 230 renames = [] 231 branch = 'default' 232 for name in rev.properties: 233 if name == u'hg:extra:branch': 234 branch = rev.properties[u'hg:extra:branch'] 235 elif name.startswith(u'hg:extra'): 236 extra[name[len(u'hg:extra:'):]] = base64.b64decode( 237 rev.properties[name]) 238 elif name == u'hg:renames': 239 renames = bencode.bdecode(base64.b64decode( 240 rev.properties[u'hg:renames'])) 241 # TODO: Export other properties as 'bzr:' extras? 242 ret = format_hg_metadata(renames, branch, extra) 243 if not isinstance(ret, bytes): 244 raise TypeError(ret) 245 return ret 246 247 def _extract_git_svn_metadata(self, rev, message): 248 lines = message.split("\n") 249 if not (lines[-1] == "" and len(lines) >= 2 and 250 lines[-2].startswith("git-svn-id:")): 251 return message 252 git_svn_id = lines[-2].split(": ", 1)[1] 253 rev.properties[u'git-svn-id'] = git_svn_id 254 (url, rev, uuid) = parse_git_svn_id(git_svn_id) 255 # FIXME: Convert this to converted-from property somehow.. 256 return "\n".join(lines[:-2]) 257 258 def _extract_hg_metadata(self, rev, message): 259 (message, renames, branch, extra) = extract_hg_metadata(message) 260 if branch is not None: 261 rev.properties[u'hg:extra:branch'] = branch 262 for name, value in extra.items(): 263 rev.properties[u'hg:extra:' + name] = base64.b64encode(value) 264 if renames: 265 rev.properties[u'hg:renames'] = base64.b64encode(bencode.bencode( 266 [(new, old) for (old, new) in renames.items()])) 267 return message 268 269 def _extract_bzr_metadata(self, rev, message): 270 (message, metadata) = extract_bzr_metadata(message) 271 return message, metadata 272 273 def _decode_commit_message(self, rev, message, encoding): 274 return message.decode(encoding), CommitSupplement() 275 276 def _encode_commit_message(self, rev, message, encoding): 277 return message.encode(encoding) 278 279 def export_commit(self, rev, tree_sha, parent_lookup, lossy, 280 verifiers): 281 """Turn a Bazaar revision in to a Git commit 282 283 :param tree_sha: Tree sha for the commit 284 :param parent_lookup: Function for looking up the GIT sha equiv of a 285 bzr revision 286 :param lossy: Whether to store roundtripping information. 287 :param verifiers: Verifiers info 288 :return dulwich.objects.Commit represent the revision: 289 """ 290 from dulwich.objects import Commit, Tag 291 commit = Commit() 292 commit.tree = tree_sha 293 if not lossy: 294 metadata = CommitSupplement() 295 metadata.verifiers = verifiers 296 else: 297 metadata = None 298 parents = [] 299 for p in rev.parent_ids: 300 try: 301 git_p = parent_lookup(p) 302 except KeyError: 303 git_p = None 304 if metadata is not None: 305 metadata.explicit_parent_ids = rev.parent_ids 306 if git_p is not None: 307 if len(git_p) != 40: 308 raise AssertionError("unexpected length for %r" % git_p) 309 parents.append(git_p) 310 commit.parents = parents 311 try: 312 encoding = rev.properties[u'git-explicit-encoding'] 313 except KeyError: 314 encoding = rev.properties.get(u'git-implicit-encoding', 'utf-8') 315 try: 316 commit.encoding = rev.properties[u'git-explicit-encoding'].encode( 317 'ascii') 318 except KeyError: 319 pass 320 commit.committer = fix_person_identifier(rev.committer.encode( 321 encoding)) 322 commit.author = fix_person_identifier( 323 rev.get_apparent_authors()[0].encode(encoding)) 324 # TODO(jelmer): Don't use this hack. 325 long = getattr(__builtins__, 'long', int) 326 commit.commit_time = long(rev.timestamp) 327 if u'author-timestamp' in rev.properties: 328 commit.author_time = long(rev.properties[u'author-timestamp']) 329 else: 330 commit.author_time = commit.commit_time 331 commit._commit_timezone_neg_utc = ( 332 u"commit-timezone-neg-utc" in rev.properties) 333 commit.commit_timezone = rev.timezone 334 commit._author_timezone_neg_utc = ( 335 u"author-timezone-neg-utc" in rev.properties) 336 if u'author-timezone' in rev.properties: 337 commit.author_timezone = int(rev.properties[u'author-timezone']) 338 else: 339 commit.author_timezone = commit.commit_timezone 340 if u'git-gpg-signature' in rev.properties: 341 commit.gpgsig = rev.properties[u'git-gpg-signature'].encode( 342 'utf-8', 'surrogateescape') 343 commit.message = self._encode_commit_message(rev, rev.message, 344 encoding) 345 if not isinstance(commit.message, bytes): 346 raise TypeError(commit.message) 347 if metadata is not None: 348 try: 349 mapping_registry.parse_revision_id(rev.revision_id) 350 except errors.InvalidRevisionId: 351 metadata.revision_id = rev.revision_id 352 mapping_properties = set( 353 [u'author', u'author-timezone', u'author-timezone-neg-utc', 354 u'commit-timezone-neg-utc', u'git-implicit-encoding', 355 u'git-gpg-signature', u'git-explicit-encoding', 356 u'author-timestamp', u'file-modes']) 357 for k, v in rev.properties.items(): 358 if k not in mapping_properties: 359 metadata.properties[k] = v 360 if not lossy and metadata: 361 if self.roundtripping: 362 commit.message = inject_bzr_metadata(commit.message, metadata, 363 encoding) 364 else: 365 raise NoPushSupport( 366 None, None, self, revision_id=rev.revision_id) 367 if not isinstance(commit.message, bytes): 368 raise TypeError(commit.message) 369 i = 0 370 propname = u'git-mergetag-0' 371 while propname in rev.properties: 372 commit.mergetag.append(Tag.from_string(rev.properties[propname])) 373 i += 1 374 propname = u'git-mergetag-%d' % i 375 if u'git-extra' in rev.properties: 376 commit.extra.extend( 377 [l.split(b' ', 1) 378 for l in rev.properties[u'git-extra'].splitlines()]) 379 return commit 380 381 def get_revision_id(self, commit): 382 if commit.encoding: 383 encoding = commit.encoding.decode('ascii') 384 else: 385 encoding = 'utf-8' 386 try: 387 message, metadata = self._decode_commit_message( 388 None, commit.message, encoding) 389 except UnicodeDecodeError: 390 pass 391 else: 392 if metadata.revision_id: 393 return metadata.revision_id 394 return self.revision_id_foreign_to_bzr(commit.id) 395 396 def import_commit(self, commit, lookup_parent_revid, strict=True): 397 """Convert a git commit to a bzr revision. 398 399 :return: a `breezy.revision.Revision` object, foreign revid and a 400 testament sha1 401 """ 402 if commit is None: 403 raise AssertionError("Commit object can't be None") 404 rev = ForeignRevision(commit.id, self, 405 self.revision_id_foreign_to_bzr(commit.id)) 406 rev.git_metadata = None 407 408 def decode_using_encoding(rev, commit, encoding): 409 rev.committer = commit.committer.decode(encoding) 410 if commit.committer != commit.author: 411 rev.properties[u'author'] = commit.author.decode(encoding) 412 rev.message, rev.git_metadata = self._decode_commit_message( 413 rev, commit.message, encoding) 414 415 if commit.encoding is not None: 416 rev.properties[u'git-explicit-encoding'] = commit.encoding.decode( 417 'ascii') 418 if commit.encoding is not None and commit.encoding != b'false': 419 decode_using_encoding(rev, commit, commit.encoding.decode('ascii')) 420 else: 421 for encoding in ('utf-8', 'latin1'): 422 try: 423 decode_using_encoding(rev, commit, encoding) 424 except UnicodeDecodeError: 425 pass 426 else: 427 if encoding != 'utf-8': 428 rev.properties[u'git-implicit-encoding'] = encoding 429 break 430 if commit.commit_time != commit.author_time: 431 rev.properties[u'author-timestamp'] = str(commit.author_time) 432 if commit.commit_timezone != commit.author_timezone: 433 rev.properties[u'author-timezone'] = "%d" % commit.author_timezone 434 if commit._author_timezone_neg_utc: 435 rev.properties[u'author-timezone-neg-utc'] = "" 436 if commit._commit_timezone_neg_utc: 437 rev.properties[u'commit-timezone-neg-utc'] = "" 438 if commit.gpgsig: 439 rev.properties[u'git-gpg-signature'] = commit.gpgsig.decode( 440 'utf-8', 'surrogateescape') 441 if commit.mergetag: 442 for i, tag in enumerate(commit.mergetag): 443 rev.properties[u'git-mergetag-%d' % i] = tag.as_raw_string() 444 rev.timestamp = commit.commit_time 445 rev.timezone = commit.commit_timezone 446 rev.parent_ids = None 447 if rev.git_metadata is not None: 448 md = rev.git_metadata 449 roundtrip_revid = md.revision_id 450 if md.explicit_parent_ids: 451 rev.parent_ids = md.explicit_parent_ids 452 rev.properties.update(md.properties) 453 verifiers = md.verifiers 454 else: 455 roundtrip_revid = None 456 verifiers = {} 457 if rev.parent_ids is None: 458 parents = [] 459 for p in commit.parents: 460 try: 461 parents.append(lookup_parent_revid(p)) 462 except KeyError: 463 parents.append(self.revision_id_foreign_to_bzr(p)) 464 rev.parent_ids = list(parents) 465 unknown_extra_fields = [] 466 extra_lines = [] 467 for k, v in commit.extra: 468 if k == HG_RENAME_SOURCE: 469 extra_lines.append(k + b' ' + v + b'\n') 470 elif k == HG_EXTRA: 471 hgk, hgv = v.split(b':', 1) 472 if hgk not in (HG_EXTRA_AMEND_SOURCE, ) and strict: 473 raise UnknownMercurialCommitExtra(commit, [hgk]) 474 extra_lines.append(k + b' ' + v + b'\n') 475 else: 476 unknown_extra_fields.append(k) 477 if unknown_extra_fields and strict: 478 raise UnknownCommitExtra( 479 commit, 480 [f.decode('ascii', 'replace') for f in unknown_extra_fields]) 481 if extra_lines: 482 rev.properties[u'git-extra'] = b''.join(extra_lines) 483 return rev, roundtrip_revid, verifiers 484 485 486class BzrGitMappingv1(BzrGitMapping): 487 revid_prefix = b'git-v1' 488 experimental = False 489 490 def __str__(self): 491 return self.revid_prefix 492 493 494class BzrGitMappingExperimental(BzrGitMappingv1): 495 revid_prefix = b'git-experimental' 496 experimental = True 497 roundtripping = False 498 499 BZR_DUMMY_FILE = '.bzrdummy' 500 501 def _decode_commit_message(self, rev, message, encoding): 502 if rev is None: 503 rev = Revision() 504 message = self._extract_hg_metadata(rev, message) 505 message = self._extract_git_svn_metadata(rev, message) 506 message, metadata = self._extract_bzr_metadata(rev, message) 507 return message.decode(encoding), metadata 508 509 def _encode_commit_message(self, rev, message, encoding): 510 ret = message.encode(encoding) 511 ret += self._generate_hg_message_tail(rev) 512 ret += self._generate_git_svn_metadata(rev, encoding) 513 return ret 514 515 def import_commit(self, commit, lookup_parent_revid, strict=True): 516 rev, roundtrip_revid, verifiers = super( 517 BzrGitMappingExperimental, self).import_commit( 518 commit, lookup_parent_revid, strict) 519 rev.properties[u'converted_revision'] = "git %s\n" % commit.id 520 return rev, roundtrip_revid, verifiers 521 522 523class GitMappingRegistry(VcsMappingRegistry): 524 """Registry with available git mappings.""" 525 526 def revision_id_bzr_to_foreign(self, bzr_revid): 527 if bzr_revid == NULL_REVISION: 528 from dulwich.protocol import ZERO_SHA 529 return ZERO_SHA, None 530 if not bzr_revid.startswith(b"git-"): 531 raise errors.InvalidRevisionId(bzr_revid, None) 532 (mapping_version, git_sha) = bzr_revid.split(b":", 1) 533 mapping = self.get(mapping_version) 534 return mapping.revision_id_bzr_to_foreign(bzr_revid) 535 536 parse_revision_id = revision_id_bzr_to_foreign 537 538 539mapping_registry = GitMappingRegistry() 540mapping_registry.register_lazy(b'git-v1', __name__, 541 "BzrGitMappingv1") 542mapping_registry.register_lazy(b'git-experimental', 543 __name__, "BzrGitMappingExperimental") 544# Uncomment the next line to enable the experimental bzr-git mappings. 545# This will make sure all bzr metadata is pushed into git, allowing for 546# full roundtripping later. 547# NOTE: THIS IS EXPERIMENTAL. IT MAY EAT YOUR DATA OR CORRUPT 548# YOUR BZR OR GIT REPOSITORIES. USE WITH CARE. 549# mapping_registry.set_default('git-experimental') 550mapping_registry.set_default(b'git-v1') 551 552 553class ForeignGit(ForeignVcs): 554 """The Git Stupid Content Tracker""" 555 556 @property 557 def branch_format(self): 558 from .branch import LocalGitBranchFormat 559 return LocalGitBranchFormat() 560 561 @property 562 def repository_format(self): 563 from .repository import GitRepositoryFormat 564 return GitRepositoryFormat() 565 566 def __init__(self): 567 super(ForeignGit, self).__init__(mapping_registry) 568 self.abbreviation = "git" 569 570 @classmethod 571 def serialize_foreign_revid(self, foreign_revid): 572 return foreign_revid 573 574 @classmethod 575 def show_foreign_revid(cls, foreign_revid): 576 return {"git commit": foreign_revid.decode('utf-8')} 577 578 579foreign_vcs_git = ForeignGit() 580default_mapping = mapping_registry.get_default()() 581 582 583def symlink_to_blob(symlink_target): 584 from dulwich.objects import Blob 585 blob = Blob() 586 if isinstance(symlink_target, str): 587 symlink_target = encode_git_path(symlink_target) 588 blob.data = symlink_target 589 return blob 590 591 592def mode_is_executable(mode): 593 """Check if mode should be considered executable.""" 594 return bool(mode & 0o111) 595 596 597def mode_kind(mode): 598 """Determine the Bazaar inventory kind based on Unix file mode.""" 599 if mode is None: 600 return None 601 entry_kind = (mode & 0o700000) / 0o100000 602 if entry_kind == 0: 603 return 'directory' 604 elif entry_kind == 1: 605 file_kind = (mode & 0o70000) / 0o10000 606 if file_kind == 0: 607 return 'file' 608 elif file_kind == 2: 609 return 'symlink' 610 elif file_kind == 6: 611 return 'tree-reference' 612 else: 613 raise AssertionError( 614 "Unknown file kind %d, perms=%o." % (file_kind, mode,)) 615 else: 616 raise AssertionError( 617 "Unknown kind, perms=%r." % (mode,)) 618 619 620def object_mode(kind, executable): 621 if kind == 'directory': 622 return stat.S_IFDIR 623 elif kind == 'symlink': 624 mode = stat.S_IFLNK 625 if executable: 626 mode |= 0o111 627 return mode 628 elif kind == 'file': 629 mode = stat.S_IFREG | 0o644 630 if executable: 631 mode |= 0o111 632 return mode 633 elif kind == 'tree-reference': 634 from dulwich.objects import S_IFGITLINK 635 return S_IFGITLINK 636 else: 637 raise AssertionError 638 639 640def entry_mode(entry): 641 """Determine the git file mode for an inventory entry.""" 642 return object_mode(entry.kind, getattr(entry, 'executable', False)) 643 644 645def extract_unusual_modes(rev): 646 try: 647 foreign_revid, mapping = mapping_registry.parse_revision_id( 648 rev.revision_id) 649 except errors.InvalidRevisionId: 650 return {} 651 else: 652 return mapping.export_unusual_file_modes(rev) 653 654 655def parse_git_svn_id(text): 656 (head, uuid) = text.rsplit(" ", 1) 657 (full_url, rev) = head.rsplit("@", 1) 658 return (full_url, int(rev), uuid) 659 660 661def needs_roundtripping(repo, revid): 662 try: 663 mapping_registry.parse_revision_id(revid) 664 except errors.InvalidRevisionId: 665 return True 666 else: 667 return False 668