1# repo.py -- For dealing with git repositories. 2# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net> 3# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 4# 5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 6# General Public License as public by the Free Software Foundation; version 2.0 7# or (at your option) any later version. You can redistribute it and/or 8# modify it under the terms of either of these two licenses. 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# 16# You should have received a copy of the licenses; if not, see 17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 19# License, Version 2.0. 20# 21 22 23"""Repository access. 24 25This module contains the base class for git repositories 26(BaseRepo) and an implementation which uses a repository on 27local disk (Repo). 28 29""" 30 31from io import BytesIO 32import errno 33import os 34import sys 35import stat 36import time 37 38from dulwich.errors import ( 39 NoIndexPresent, 40 NotBlobError, 41 NotCommitError, 42 NotGitRepository, 43 NotTreeError, 44 NotTagError, 45 CommitError, 46 RefFormatError, 47 HookError, 48 ) 49from dulwich.file import ( 50 GitFile, 51 ) 52from dulwich.object_store import ( 53 DiskObjectStore, 54 MemoryObjectStore, 55 ObjectStoreGraphWalker, 56 ) 57from dulwich.objects import ( 58 check_hexsha, 59 Blob, 60 Commit, 61 ShaFile, 62 Tag, 63 Tree, 64 ) 65from dulwich.pack import ( 66 pack_objects_to_data, 67 ) 68 69from dulwich.hooks import ( 70 PreCommitShellHook, 71 PostCommitShellHook, 72 CommitMsgShellHook, 73 PostReceiveShellHook, 74 ) 75 76from dulwich.line_ending import BlobNormalizer 77 78from dulwich.refs import ( # noqa: F401 79 ANNOTATED_TAG_SUFFIX, 80 check_ref_format, 81 RefsContainer, 82 DictRefsContainer, 83 InfoRefsContainer, 84 DiskRefsContainer, 85 read_packed_refs, 86 read_packed_refs_with_peeled, 87 write_packed_refs, 88 SYMREF, 89 ) 90 91 92import warnings 93 94 95CONTROLDIR = '.git' 96OBJECTDIR = 'objects' 97REFSDIR = 'refs' 98REFSDIR_TAGS = 'tags' 99REFSDIR_HEADS = 'heads' 100INDEX_FILENAME = "index" 101COMMONDIR = 'commondir' 102GITDIR = 'gitdir' 103WORKTREES = 'worktrees' 104 105BASE_DIRECTORIES = [ 106 ["branches"], 107 [REFSDIR], 108 [REFSDIR, REFSDIR_TAGS], 109 [REFSDIR, REFSDIR_HEADS], 110 ["hooks"], 111 ["info"] 112 ] 113 114DEFAULT_REF = b'refs/heads/master' 115 116 117class InvalidUserIdentity(Exception): 118 """User identity is not of the format 'user <email>'""" 119 120 def __init__(self, identity): 121 self.identity = identity 122 123 124def _get_default_identity(): 125 import getpass 126 import socket 127 username = getpass.getuser() 128 try: 129 import pwd 130 except ImportError: 131 fullname = None 132 else: 133 try: 134 gecos = pwd.getpwnam(username).pw_gecos 135 except KeyError: 136 fullname = None 137 else: 138 fullname = gecos.split(',')[0] 139 if not fullname: 140 fullname = username 141 email = os.environ.get('EMAIL') 142 if email is None: 143 email = "{}@{}".format(username, socket.gethostname()) 144 return (fullname, email) 145 146 147def get_user_identity(config, kind=None): 148 """Determine the identity to use for new commits. 149 """ 150 if kind: 151 user = os.environ.get("GIT_" + kind + "_NAME") 152 if user is not None: 153 user = user.encode('utf-8') 154 email = os.environ.get("GIT_" + kind + "_EMAIL") 155 if email is not None: 156 email = email.encode('utf-8') 157 else: 158 user = None 159 email = None 160 if user is None: 161 try: 162 user = config.get(("user", ), "name") 163 except KeyError: 164 user = None 165 if email is None: 166 try: 167 email = config.get(("user", ), "email") 168 except KeyError: 169 email = None 170 default_user, default_email = _get_default_identity() 171 if user is None: 172 user = default_user 173 if not isinstance(user, bytes): 174 user = user.encode('utf-8') 175 if email is None: 176 email = default_email 177 if not isinstance(email, bytes): 178 email = email.encode('utf-8') 179 if email.startswith(b'<') and email.endswith(b'>'): 180 email = email[1:-1] 181 return (user + b" <" + email + b">") 182 183 184def check_user_identity(identity): 185 """Verify that a user identity is formatted correctly. 186 187 Args: 188 identity: User identity bytestring 189 Raises: 190 InvalidUserIdentity: Raised when identity is invalid 191 """ 192 try: 193 fst, snd = identity.split(b' <', 1) 194 except ValueError: 195 raise InvalidUserIdentity(identity) 196 if b'>' not in snd: 197 raise InvalidUserIdentity(identity) 198 199 200def parse_graftpoints(graftpoints): 201 """Convert a list of graftpoints into a dict 202 203 Args: 204 graftpoints: Iterator of graftpoint lines 205 206 Each line is formatted as: 207 <commit sha1> <parent sha1> [<parent sha1>]* 208 209 Resulting dictionary is: 210 <commit sha1>: [<parent sha1>*] 211 212 https://git.wiki.kernel.org/index.php/GraftPoint 213 """ 214 grafts = {} 215 for l in graftpoints: 216 raw_graft = l.split(None, 1) 217 218 commit = raw_graft[0] 219 if len(raw_graft) == 2: 220 parents = raw_graft[1].split() 221 else: 222 parents = [] 223 224 for sha in [commit] + parents: 225 check_hexsha(sha, 'Invalid graftpoint') 226 227 grafts[commit] = parents 228 return grafts 229 230 231def serialize_graftpoints(graftpoints): 232 """Convert a dictionary of grafts into string 233 234 The graft dictionary is: 235 <commit sha1>: [<parent sha1>*] 236 237 Each line is formatted as: 238 <commit sha1> <parent sha1> [<parent sha1>]* 239 240 https://git.wiki.kernel.org/index.php/GraftPoint 241 242 """ 243 graft_lines = [] 244 for commit, parents in graftpoints.items(): 245 if parents: 246 graft_lines.append(commit + b' ' + b' '.join(parents)) 247 else: 248 graft_lines.append(commit) 249 return b'\n'.join(graft_lines) 250 251 252def _set_filesystem_hidden(path): 253 """Mark path as to be hidden if supported by platform and filesystem. 254 255 On win32 uses SetFileAttributesW api: 256 <https://docs.microsoft.com/windows/desktop/api/fileapi/nf-fileapi-setfileattributesw> 257 """ 258 if sys.platform == 'win32': 259 import ctypes 260 from ctypes.wintypes import BOOL, DWORD, LPCWSTR 261 262 FILE_ATTRIBUTE_HIDDEN = 2 263 SetFileAttributesW = ctypes.WINFUNCTYPE(BOOL, LPCWSTR, DWORD)( 264 ("SetFileAttributesW", ctypes.windll.kernel32)) 265 266 if isinstance(path, bytes): 267 path = path.decode(sys.getfilesystemencoding()) 268 if not SetFileAttributesW(path, FILE_ATTRIBUTE_HIDDEN): 269 pass # Could raise or log `ctypes.WinError()` here 270 271 # Could implement other platform specific filesytem hiding here 272 273 274class BaseRepo(object): 275 """Base class for a git repository. 276 277 :ivar object_store: Dictionary-like object for accessing 278 the objects 279 :ivar refs: Dictionary-like object with the refs in this 280 repository 281 """ 282 283 def __init__(self, object_store, refs): 284 """Open a repository. 285 286 This shouldn't be called directly, but rather through one of the 287 base classes, such as MemoryRepo or Repo. 288 289 Args: 290 object_store: Object store to use 291 refs: Refs container to use 292 """ 293 self.object_store = object_store 294 self.refs = refs 295 296 self._graftpoints = {} 297 self.hooks = {} 298 299 def _determine_file_mode(self): 300 """Probe the file-system to determine whether permissions can be trusted. 301 302 Returns: True if permissions can be trusted, False otherwise. 303 """ 304 raise NotImplementedError(self._determine_file_mode) 305 306 def _init_files(self, bare): 307 """Initialize a default set of named files.""" 308 from dulwich.config import ConfigFile 309 self._put_named_file('description', b"Unnamed repository") 310 f = BytesIO() 311 cf = ConfigFile() 312 cf.set("core", "repositoryformatversion", "0") 313 if self._determine_file_mode(): 314 cf.set("core", "filemode", True) 315 else: 316 cf.set("core", "filemode", False) 317 318 cf.set("core", "bare", bare) 319 cf.set("core", "logallrefupdates", True) 320 cf.write_to_file(f) 321 self._put_named_file('config', f.getvalue()) 322 self._put_named_file(os.path.join('info', 'exclude'), b'') 323 324 def get_named_file(self, path): 325 """Get a file from the control dir with a specific name. 326 327 Although the filename should be interpreted as a filename relative to 328 the control dir in a disk-based Repo, the object returned need not be 329 pointing to a file in that location. 330 331 Args: 332 path: The path to the file, relative to the control dir. 333 Returns: An open file object, or None if the file does not exist. 334 """ 335 raise NotImplementedError(self.get_named_file) 336 337 def _put_named_file(self, path, contents): 338 """Write a file to the control dir with the given name and contents. 339 340 Args: 341 path: The path to the file, relative to the control dir. 342 contents: A string to write to the file. 343 """ 344 raise NotImplementedError(self._put_named_file) 345 346 def _del_named_file(self, path): 347 """Delete a file in the contrl directory with the given name.""" 348 raise NotImplementedError(self._del_named_file) 349 350 def open_index(self): 351 """Open the index for this repository. 352 353 Raises: 354 NoIndexPresent: If no index is present 355 Returns: The matching `Index` 356 """ 357 raise NotImplementedError(self.open_index) 358 359 def fetch(self, target, determine_wants=None, progress=None, depth=None): 360 """Fetch objects into another repository. 361 362 Args: 363 target: The target repository 364 determine_wants: Optional function to determine what refs to 365 fetch. 366 progress: Optional progress function 367 depth: Optional shallow fetch depth 368 Returns: The local refs 369 """ 370 if determine_wants is None: 371 determine_wants = target.object_store.determine_wants_all 372 count, pack_data = self.fetch_pack_data( 373 determine_wants, target.get_graph_walker(), progress=progress, 374 depth=depth) 375 target.object_store.add_pack_data(count, pack_data, progress) 376 return self.get_refs() 377 378 def fetch_pack_data(self, determine_wants, graph_walker, progress, 379 get_tagged=None, depth=None): 380 """Fetch the pack data required for a set of revisions. 381 382 Args: 383 determine_wants: Function that takes a dictionary with heads 384 and returns the list of heads to fetch. 385 graph_walker: Object that can iterate over the list of revisions 386 to fetch and has an "ack" method that will be called to acknowledge 387 that a revision is present. 388 progress: Simple progress function that will be called with 389 updated progress strings. 390 get_tagged: Function that returns a dict of pointed-to sha -> 391 tag sha for including tags. 392 depth: Shallow fetch depth 393 Returns: count and iterator over pack data 394 """ 395 # TODO(jelmer): Fetch pack data directly, don't create objects first. 396 objects = self.fetch_objects(determine_wants, graph_walker, progress, 397 get_tagged, depth=depth) 398 return pack_objects_to_data(objects) 399 400 def fetch_objects(self, determine_wants, graph_walker, progress, 401 get_tagged=None, depth=None): 402 """Fetch the missing objects required for a set of revisions. 403 404 Args: 405 determine_wants: Function that takes a dictionary with heads 406 and returns the list of heads to fetch. 407 graph_walker: Object that can iterate over the list of revisions 408 to fetch and has an "ack" method that will be called to acknowledge 409 that a revision is present. 410 progress: Simple progress function that will be called with 411 updated progress strings. 412 get_tagged: Function that returns a dict of pointed-to sha -> 413 tag sha for including tags. 414 depth: Shallow fetch depth 415 Returns: iterator over objects, with __len__ implemented 416 """ 417 if depth not in (None, 0): 418 raise NotImplementedError("depth not supported yet") 419 420 refs = {} 421 for ref, sha in self.get_refs().items(): 422 try: 423 obj = self.object_store[sha] 424 except KeyError: 425 warnings.warn( 426 'ref %s points at non-present sha %s' % ( 427 ref.decode('utf-8', 'replace'), sha.decode('ascii')), 428 UserWarning) 429 continue 430 else: 431 if isinstance(obj, Tag): 432 refs[ref + ANNOTATED_TAG_SUFFIX] = obj.object[1] 433 refs[ref] = sha 434 435 wants = determine_wants(refs) 436 if not isinstance(wants, list): 437 raise TypeError("determine_wants() did not return a list") 438 439 shallows = getattr(graph_walker, 'shallow', frozenset()) 440 unshallows = getattr(graph_walker, 'unshallow', frozenset()) 441 442 if wants == []: 443 # TODO(dborowitz): find a way to short-circuit that doesn't change 444 # this interface. 445 446 if shallows or unshallows: 447 # Do not send a pack in shallow short-circuit path 448 return None 449 450 return [] 451 452 # If the graph walker is set up with an implementation that can 453 # ACK/NAK to the wire, it will write data to the client through 454 # this call as a side-effect. 455 haves = self.object_store.find_common_revisions(graph_walker) 456 457 # Deal with shallow requests separately because the haves do 458 # not reflect what objects are missing 459 if shallows or unshallows: 460 # TODO: filter the haves commits from iter_shas. the specific 461 # commits aren't missing. 462 haves = [] 463 464 def get_parents(commit): 465 if commit.id in shallows: 466 return [] 467 return self.get_parents(commit.id, commit) 468 469 return self.object_store.iter_shas( 470 self.object_store.find_missing_objects( 471 haves, wants, progress, 472 get_tagged, 473 get_parents=get_parents)) 474 475 def get_graph_walker(self, heads=None): 476 """Retrieve a graph walker. 477 478 A graph walker is used by a remote repository (or proxy) 479 to find out which objects are present in this repository. 480 481 Args: 482 heads: Repository heads to use (optional) 483 Returns: A graph walker object 484 """ 485 if heads is None: 486 heads = [ 487 sha for sha in self.refs.as_dict(b'refs/heads').values() 488 if sha in self.object_store] 489 return ObjectStoreGraphWalker( 490 heads, self.get_parents, shallow=self.get_shallow()) 491 492 def get_refs(self): 493 """Get dictionary with all refs. 494 495 Returns: A ``dict`` mapping ref names to SHA1s 496 """ 497 return self.refs.as_dict() 498 499 def head(self): 500 """Return the SHA1 pointed at by HEAD.""" 501 return self.refs[b'HEAD'] 502 503 def _get_object(self, sha, cls): 504 assert len(sha) in (20, 40) 505 ret = self.get_object(sha) 506 if not isinstance(ret, cls): 507 if cls is Commit: 508 raise NotCommitError(ret) 509 elif cls is Blob: 510 raise NotBlobError(ret) 511 elif cls is Tree: 512 raise NotTreeError(ret) 513 elif cls is Tag: 514 raise NotTagError(ret) 515 else: 516 raise Exception("Type invalid: %r != %r" % ( 517 ret.type_name, cls.type_name)) 518 return ret 519 520 def get_object(self, sha): 521 """Retrieve the object with the specified SHA. 522 523 Args: 524 sha: SHA to retrieve 525 Returns: A ShaFile object 526 Raises: 527 KeyError: when the object can not be found 528 """ 529 return self.object_store[sha] 530 531 def get_parents(self, sha, commit=None): 532 """Retrieve the parents of a specific commit. 533 534 If the specific commit is a graftpoint, the graft parents 535 will be returned instead. 536 537 Args: 538 sha: SHA of the commit for which to retrieve the parents 539 commit: Optional commit matching the sha 540 Returns: List of parents 541 """ 542 543 try: 544 return self._graftpoints[sha] 545 except KeyError: 546 if commit is None: 547 commit = self[sha] 548 return commit.parents 549 550 def get_config(self): 551 """Retrieve the config object. 552 553 Returns: `ConfigFile` object for the ``.git/config`` file. 554 """ 555 raise NotImplementedError(self.get_config) 556 557 def get_description(self): 558 """Retrieve the description for this repository. 559 560 Returns: String with the description of the repository 561 as set by the user. 562 """ 563 raise NotImplementedError(self.get_description) 564 565 def set_description(self, description): 566 """Set the description for this repository. 567 568 Args: 569 description: Text to set as description for this repository. 570 """ 571 raise NotImplementedError(self.set_description) 572 573 def get_config_stack(self): 574 """Return a config stack for this repository. 575 576 This stack accesses the configuration for both this repository 577 itself (.git/config) and the global configuration, which usually 578 lives in ~/.gitconfig. 579 580 Returns: `Config` instance for this repository 581 """ 582 from dulwich.config import StackedConfig 583 backends = [self.get_config()] + StackedConfig.default_backends() 584 return StackedConfig(backends, writable=backends[0]) 585 586 def get_shallow(self): 587 """Get the set of shallow commits. 588 589 Returns: Set of shallow commits. 590 """ 591 f = self.get_named_file('shallow') 592 if f is None: 593 return set() 594 with f: 595 return set(l.strip() for l in f) 596 597 def update_shallow(self, new_shallow, new_unshallow): 598 """Update the list of shallow objects. 599 600 Args: 601 new_shallow: Newly shallow objects 602 new_unshallow: Newly no longer shallow objects 603 """ 604 shallow = self.get_shallow() 605 if new_shallow: 606 shallow.update(new_shallow) 607 if new_unshallow: 608 shallow.difference_update(new_unshallow) 609 self._put_named_file( 610 'shallow', 611 b''.join([sha + b'\n' for sha in shallow])) 612 613 def get_peeled(self, ref): 614 """Get the peeled value of a ref. 615 616 Args: 617 ref: The refname to peel. 618 Returns: The fully-peeled SHA1 of a tag object, after peeling all 619 intermediate tags; if the original ref does not point to a tag, 620 this will equal the original SHA1. 621 """ 622 cached = self.refs.get_peeled(ref) 623 if cached is not None: 624 return cached 625 return self.object_store.peel_sha(self.refs[ref]).id 626 627 def get_walker(self, include=None, *args, **kwargs): 628 """Obtain a walker for this repository. 629 630 Args: 631 include: Iterable of SHAs of commits to include along with their 632 ancestors. Defaults to [HEAD] 633 exclude: Iterable of SHAs of commits to exclude along with their 634 ancestors, overriding includes. 635 order: ORDER_* constant specifying the order of results. 636 Anything other than ORDER_DATE may result in O(n) memory usage. 637 reverse: If True, reverse the order of output, requiring O(n) 638 memory. 639 max_entries: The maximum number of entries to yield, or None for 640 no limit. 641 paths: Iterable of file or subtree paths to show entries for. 642 rename_detector: diff.RenameDetector object for detecting 643 renames. 644 follow: If True, follow path across renames/copies. Forces a 645 default rename_detector. 646 since: Timestamp to list commits after. 647 until: Timestamp to list commits before. 648 queue_cls: A class to use for a queue of commits, supporting the 649 iterator protocol. The constructor takes a single argument, the 650 Walker. 651 Returns: A `Walker` object 652 """ 653 from dulwich.walk import Walker 654 if include is None: 655 include = [self.head()] 656 if isinstance(include, str): 657 include = [include] 658 659 kwargs['get_parents'] = lambda commit: self.get_parents( 660 commit.id, commit) 661 662 return Walker(self.object_store, include, *args, **kwargs) 663 664 def __getitem__(self, name): 665 """Retrieve a Git object by SHA1 or ref. 666 667 Args: 668 name: A Git object SHA1 or a ref name 669 Returns: A `ShaFile` object, such as a Commit or Blob 670 Raises: 671 KeyError: when the specified ref or object does not exist 672 """ 673 if not isinstance(name, bytes): 674 raise TypeError("'name' must be bytestring, not %.80s" % 675 type(name).__name__) 676 if len(name) in (20, 40): 677 try: 678 return self.object_store[name] 679 except (KeyError, ValueError): 680 pass 681 try: 682 return self.object_store[self.refs[name]] 683 except RefFormatError: 684 raise KeyError(name) 685 686 def __contains__(self, name): 687 """Check if a specific Git object or ref is present. 688 689 Args: 690 name: Git object SHA1 or ref name 691 """ 692 if len(name) in (20, 40): 693 return name in self.object_store or name in self.refs 694 else: 695 return name in self.refs 696 697 def __setitem__(self, name, value): 698 """Set a ref. 699 700 Args: 701 name: ref name 702 value: Ref value - either a ShaFile object, or a hex sha 703 """ 704 if name.startswith(b"refs/") or name == b'HEAD': 705 if isinstance(value, ShaFile): 706 self.refs[name] = value.id 707 elif isinstance(value, bytes): 708 self.refs[name] = value 709 else: 710 raise TypeError(value) 711 else: 712 raise ValueError(name) 713 714 def __delitem__(self, name): 715 """Remove a ref. 716 717 Args: 718 name: Name of the ref to remove 719 """ 720 if name.startswith(b"refs/") or name == b"HEAD": 721 del self.refs[name] 722 else: 723 raise ValueError(name) 724 725 def _get_user_identity(self, config, kind=None): 726 """Determine the identity to use for new commits. 727 """ 728 # TODO(jelmer): Deprecate this function in favor of get_user_identity 729 return get_user_identity(config) 730 731 def _add_graftpoints(self, updated_graftpoints): 732 """Add or modify graftpoints 733 734 Args: 735 updated_graftpoints: Dict of commit shas to list of parent shas 736 """ 737 738 # Simple validation 739 for commit, parents in updated_graftpoints.items(): 740 for sha in [commit] + parents: 741 check_hexsha(sha, 'Invalid graftpoint') 742 743 self._graftpoints.update(updated_graftpoints) 744 745 def _remove_graftpoints(self, to_remove=[]): 746 """Remove graftpoints 747 748 Args: 749 to_remove: List of commit shas 750 """ 751 for sha in to_remove: 752 del self._graftpoints[sha] 753 754 def _read_heads(self, name): 755 f = self.get_named_file(name) 756 if f is None: 757 return [] 758 with f: 759 return [l.strip() for l in f.readlines() if l.strip()] 760 761 def do_commit(self, message=None, committer=None, 762 author=None, commit_timestamp=None, 763 commit_timezone=None, author_timestamp=None, 764 author_timezone=None, tree=None, encoding=None, 765 ref=b'HEAD', merge_heads=None): 766 """Create a new commit. 767 768 Args: 769 message: Commit message 770 committer: Committer fullname 771 author: Author fullname (defaults to committer) 772 commit_timestamp: Commit timestamp (defaults to now) 773 commit_timezone: Commit timestamp timezone (defaults to GMT) 774 author_timestamp: Author timestamp (defaults to commit 775 timestamp) 776 author_timezone: Author timestamp timezone 777 (defaults to commit timestamp timezone) 778 tree: SHA1 of the tree root to use (if not specified the 779 current index will be committed). 780 encoding: Encoding 781 ref: Optional ref to commit to (defaults to current branch) 782 merge_heads: Merge heads (defaults to .git/MERGE_HEADS) 783 Returns: New commit SHA1 784 """ 785 import time 786 c = Commit() 787 if tree is None: 788 index = self.open_index() 789 c.tree = index.commit(self.object_store) 790 else: 791 if len(tree) != 40: 792 raise ValueError("tree must be a 40-byte hex sha string") 793 c.tree = tree 794 795 try: 796 self.hooks['pre-commit'].execute() 797 except HookError as e: 798 raise CommitError(e) 799 except KeyError: # no hook defined, silent fallthrough 800 pass 801 802 config = self.get_config_stack() 803 if merge_heads is None: 804 merge_heads = self._read_heads('MERGE_HEADS') 805 if committer is None: 806 committer = get_user_identity(config, kind='COMMITTER') 807 check_user_identity(committer) 808 c.committer = committer 809 if commit_timestamp is None: 810 # FIXME: Support GIT_COMMITTER_DATE environment variable 811 commit_timestamp = time.time() 812 c.commit_time = int(commit_timestamp) 813 if commit_timezone is None: 814 # FIXME: Use current user timezone rather than UTC 815 commit_timezone = 0 816 c.commit_timezone = commit_timezone 817 if author is None: 818 author = get_user_identity(config, kind='AUTHOR') 819 c.author = author 820 check_user_identity(author) 821 if author_timestamp is None: 822 # FIXME: Support GIT_AUTHOR_DATE environment variable 823 author_timestamp = commit_timestamp 824 c.author_time = int(author_timestamp) 825 if author_timezone is None: 826 author_timezone = commit_timezone 827 c.author_timezone = author_timezone 828 if encoding is None: 829 try: 830 encoding = config.get(('i18n', ), 'commitEncoding') 831 except KeyError: 832 pass # No dice 833 if encoding is not None: 834 c.encoding = encoding 835 if message is None: 836 # FIXME: Try to read commit message from .git/MERGE_MSG 837 raise ValueError("No commit message specified") 838 839 try: 840 c.message = self.hooks['commit-msg'].execute(message) 841 if c.message is None: 842 c.message = message 843 except HookError as e: 844 raise CommitError(e) 845 except KeyError: # no hook defined, message not modified 846 c.message = message 847 848 if ref is None: 849 # Create a dangling commit 850 c.parents = merge_heads 851 self.object_store.add_object(c) 852 else: 853 try: 854 old_head = self.refs[ref] 855 c.parents = [old_head] + merge_heads 856 self.object_store.add_object(c) 857 ok = self.refs.set_if_equals( 858 ref, old_head, c.id, message=b"commit: " + message, 859 committer=committer, timestamp=commit_timestamp, 860 timezone=commit_timezone) 861 except KeyError: 862 c.parents = merge_heads 863 self.object_store.add_object(c) 864 ok = self.refs.add_if_new( 865 ref, c.id, message=b"commit: " + message, 866 committer=committer, timestamp=commit_timestamp, 867 timezone=commit_timezone) 868 if not ok: 869 # Fail if the atomic compare-and-swap failed, leaving the 870 # commit and all its objects as garbage. 871 raise CommitError("%s changed during commit" % (ref,)) 872 873 self._del_named_file('MERGE_HEADS') 874 875 try: 876 self.hooks['post-commit'].execute() 877 except HookError as e: # silent failure 878 warnings.warn("post-commit hook failed: %s" % e, UserWarning) 879 except KeyError: # no hook defined, silent fallthrough 880 pass 881 882 return c.id 883 884 885def read_gitfile(f): 886 """Read a ``.git`` file. 887 888 The first line of the file should start with "gitdir: " 889 890 Args: 891 f: File-like object to read from 892 Returns: A path 893 """ 894 cs = f.read() 895 if not cs.startswith("gitdir: "): 896 raise ValueError("Expected file to start with 'gitdir: '") 897 return cs[len("gitdir: "):].rstrip("\n") 898 899 900class Repo(BaseRepo): 901 """A git repository backed by local disk. 902 903 To open an existing repository, call the contructor with 904 the path of the repository. 905 906 To create a new repository, use the Repo.init class method. 907 """ 908 909 def __init__(self, root): 910 hidden_path = os.path.join(root, CONTROLDIR) 911 if os.path.isdir(os.path.join(hidden_path, OBJECTDIR)): 912 self.bare = False 913 self._controldir = hidden_path 914 elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and 915 os.path.isdir(os.path.join(root, REFSDIR))): 916 self.bare = True 917 self._controldir = root 918 elif os.path.isfile(hidden_path): 919 self.bare = False 920 with open(hidden_path, 'r') as f: 921 path = read_gitfile(f) 922 self.bare = False 923 self._controldir = os.path.join(root, path) 924 else: 925 raise NotGitRepository( 926 "No git repository was found at %(path)s" % dict(path=root) 927 ) 928 commondir = self.get_named_file(COMMONDIR) 929 if commondir is not None: 930 with commondir: 931 self._commondir = os.path.join( 932 self.controldir(), 933 commondir.read().rstrip(b"\r\n").decode( 934 sys.getfilesystemencoding())) 935 else: 936 self._commondir = self._controldir 937 self.path = root 938 config = self.get_config() 939 object_store = DiskObjectStore.from_config( 940 os.path.join(self.commondir(), OBJECTDIR), 941 config) 942 refs = DiskRefsContainer(self.commondir(), self._controldir, 943 logger=self._write_reflog) 944 BaseRepo.__init__(self, object_store, refs) 945 946 self._graftpoints = {} 947 graft_file = self.get_named_file(os.path.join("info", "grafts"), 948 basedir=self.commondir()) 949 if graft_file: 950 with graft_file: 951 self._graftpoints.update(parse_graftpoints(graft_file)) 952 graft_file = self.get_named_file("shallow", 953 basedir=self.commondir()) 954 if graft_file: 955 with graft_file: 956 self._graftpoints.update(parse_graftpoints(graft_file)) 957 958 self.hooks['pre-commit'] = PreCommitShellHook(self.controldir()) 959 self.hooks['commit-msg'] = CommitMsgShellHook(self.controldir()) 960 self.hooks['post-commit'] = PostCommitShellHook(self.controldir()) 961 self.hooks['post-receive'] = PostReceiveShellHook(self.controldir()) 962 963 def _write_reflog(self, ref, old_sha, new_sha, committer, timestamp, 964 timezone, message): 965 from .reflog import format_reflog_line 966 path = os.path.join( 967 self.controldir(), 'logs', 968 ref.decode(sys.getfilesystemencoding())) 969 try: 970 os.makedirs(os.path.dirname(path)) 971 except OSError as e: 972 if e.errno != errno.EEXIST: 973 raise 974 if committer is None: 975 config = self.get_config_stack() 976 committer = self._get_user_identity(config) 977 check_user_identity(committer) 978 if timestamp is None: 979 timestamp = int(time.time()) 980 if timezone is None: 981 timezone = 0 # FIXME 982 with open(path, 'ab') as f: 983 f.write(format_reflog_line(old_sha, new_sha, committer, 984 timestamp, timezone, message) + b'\n') 985 986 @classmethod 987 def discover(cls, start='.'): 988 """Iterate parent directories to discover a repository 989 990 Return a Repo object for the first parent directory that looks like a 991 Git repository. 992 993 Args: 994 start: The directory to start discovery from (defaults to '.') 995 """ 996 remaining = True 997 path = os.path.abspath(start) 998 while remaining: 999 try: 1000 return cls(path) 1001 except NotGitRepository: 1002 path, remaining = os.path.split(path) 1003 raise NotGitRepository( 1004 "No git repository was found at %(path)s" % dict(path=start) 1005 ) 1006 1007 def controldir(self): 1008 """Return the path of the control directory.""" 1009 return self._controldir 1010 1011 def commondir(self): 1012 """Return the path of the common directory. 1013 1014 For a main working tree, it is identical to controldir(). 1015 1016 For a linked working tree, it is the control directory of the 1017 main working tree.""" 1018 1019 return self._commondir 1020 1021 def _determine_file_mode(self): 1022 """Probe the file-system to determine whether permissions can be trusted. 1023 1024 Returns: True if permissions can be trusted, False otherwise. 1025 """ 1026 fname = os.path.join(self.path, '.probe-permissions') 1027 with open(fname, 'w') as f: 1028 f.write('') 1029 1030 st1 = os.lstat(fname) 1031 try: 1032 os.chmod(fname, st1.st_mode ^ stat.S_IXUSR) 1033 except EnvironmentError as e: 1034 if e.errno == errno.EPERM: 1035 return False 1036 raise 1037 st2 = os.lstat(fname) 1038 1039 os.unlink(fname) 1040 1041 mode_differs = st1.st_mode != st2.st_mode 1042 st2_has_exec = (st2.st_mode & stat.S_IXUSR) != 0 1043 1044 return mode_differs and st2_has_exec 1045 1046 def _put_named_file(self, path, contents): 1047 """Write a file to the control dir with the given name and contents. 1048 1049 Args: 1050 path: The path to the file, relative to the control dir. 1051 contents: A string to write to the file. 1052 """ 1053 path = path.lstrip(os.path.sep) 1054 with GitFile(os.path.join(self.controldir(), path), 'wb') as f: 1055 f.write(contents) 1056 1057 def _del_named_file(self, path): 1058 try: 1059 os.unlink(os.path.join(self.controldir(), path)) 1060 except (IOError, OSError) as e: 1061 if e.errno == errno.ENOENT: 1062 return 1063 raise 1064 1065 def get_named_file(self, path, basedir=None): 1066 """Get a file from the control dir with a specific name. 1067 1068 Although the filename should be interpreted as a filename relative to 1069 the control dir in a disk-based Repo, the object returned need not be 1070 pointing to a file in that location. 1071 1072 Args: 1073 path: The path to the file, relative to the control dir. 1074 basedir: Optional argument that specifies an alternative to the 1075 control dir. 1076 Returns: An open file object, or None if the file does not exist. 1077 """ 1078 # TODO(dborowitz): sanitize filenames, since this is used directly by 1079 # the dumb web serving code. 1080 if basedir is None: 1081 basedir = self.controldir() 1082 path = path.lstrip(os.path.sep) 1083 try: 1084 return open(os.path.join(basedir, path), 'rb') 1085 except (IOError, OSError) as e: 1086 if e.errno == errno.ENOENT: 1087 return None 1088 raise 1089 1090 def index_path(self): 1091 """Return path to the index file.""" 1092 return os.path.join(self.controldir(), INDEX_FILENAME) 1093 1094 def open_index(self): 1095 """Open the index for this repository. 1096 1097 Raises: 1098 NoIndexPresent: If no index is present 1099 Returns: The matching `Index` 1100 """ 1101 from dulwich.index import Index 1102 if not self.has_index(): 1103 raise NoIndexPresent() 1104 return Index(self.index_path()) 1105 1106 def has_index(self): 1107 """Check if an index is present.""" 1108 # Bare repos must never have index files; non-bare repos may have a 1109 # missing index file, which is treated as empty. 1110 return not self.bare 1111 1112 def stage(self, fs_paths): 1113 """Stage a set of paths. 1114 1115 Args: 1116 fs_paths: List of paths, relative to the repository path 1117 """ 1118 1119 root_path_bytes = self.path.encode(sys.getfilesystemencoding()) 1120 1121 if not isinstance(fs_paths, list): 1122 fs_paths = [fs_paths] 1123 from dulwich.index import ( 1124 blob_from_path_and_stat, 1125 index_entry_from_stat, 1126 _fs_to_tree_path, 1127 ) 1128 index = self.open_index() 1129 blob_normalizer = self.get_blob_normalizer() 1130 for fs_path in fs_paths: 1131 if not isinstance(fs_path, bytes): 1132 fs_path = fs_path.encode(sys.getfilesystemencoding()) 1133 if os.path.isabs(fs_path): 1134 raise ValueError( 1135 "path %r should be relative to " 1136 "repository root, not absolute" % fs_path) 1137 tree_path = _fs_to_tree_path(fs_path) 1138 full_path = os.path.join(root_path_bytes, fs_path) 1139 try: 1140 st = os.lstat(full_path) 1141 except OSError: 1142 # File no longer exists 1143 try: 1144 del index[tree_path] 1145 except KeyError: 1146 pass # already removed 1147 else: 1148 if not stat.S_ISDIR(st.st_mode): 1149 blob = blob_from_path_and_stat(full_path, st) 1150 blob = blob_normalizer.checkin_normalize(blob, fs_path) 1151 self.object_store.add_object(blob) 1152 index[tree_path] = index_entry_from_stat(st, blob.id, 0) 1153 else: 1154 try: 1155 del index[tree_path] 1156 except KeyError: 1157 pass 1158 index.write() 1159 1160 def clone(self, target_path, mkdir=True, bare=False, 1161 origin=b"origin", checkout=None): 1162 """Clone this repository. 1163 1164 Args: 1165 target_path: Target path 1166 mkdir: Create the target directory 1167 bare: Whether to create a bare repository 1168 origin: Base name for refs in target repository 1169 cloned from this repository 1170 Returns: Created repository as `Repo` 1171 """ 1172 if not bare: 1173 target = self.init(target_path, mkdir=mkdir) 1174 else: 1175 if checkout: 1176 raise ValueError("checkout and bare are incompatible") 1177 target = self.init_bare(target_path, mkdir=mkdir) 1178 self.fetch(target) 1179 encoded_path = self.path 1180 if not isinstance(encoded_path, bytes): 1181 encoded_path = encoded_path.encode(sys.getfilesystemencoding()) 1182 ref_message = b"clone: from " + encoded_path 1183 target.refs.import_refs( 1184 b'refs/remotes/' + origin, self.refs.as_dict(b'refs/heads'), 1185 message=ref_message) 1186 target.refs.import_refs( 1187 b'refs/tags', self.refs.as_dict(b'refs/tags'), 1188 message=ref_message) 1189 try: 1190 target.refs.add_if_new( 1191 DEFAULT_REF, self.refs[DEFAULT_REF], 1192 message=ref_message) 1193 except KeyError: 1194 pass 1195 target_config = target.get_config() 1196 target_config.set(('remote', 'origin'), 'url', encoded_path) 1197 target_config.set(('remote', 'origin'), 'fetch', 1198 '+refs/heads/*:refs/remotes/origin/*') 1199 target_config.write_to_path() 1200 1201 # Update target head 1202 head_chain, head_sha = self.refs.follow(b'HEAD') 1203 if head_chain and head_sha is not None: 1204 target.refs.set_symbolic_ref(b'HEAD', head_chain[-1], 1205 message=ref_message) 1206 target[b'HEAD'] = head_sha 1207 1208 if checkout is None: 1209 checkout = (not bare) 1210 if checkout: 1211 # Checkout HEAD to target dir 1212 target.reset_index() 1213 1214 return target 1215 1216 def reset_index(self, tree=None): 1217 """Reset the index back to a specific tree. 1218 1219 Args: 1220 tree: Tree SHA to reset to, None for current HEAD tree. 1221 """ 1222 from dulwich.index import ( 1223 build_index_from_tree, 1224 validate_path_element_default, 1225 validate_path_element_ntfs, 1226 ) 1227 if tree is None: 1228 tree = self[b'HEAD'].tree 1229 config = self.get_config() 1230 honor_filemode = config.get_boolean( 1231 b'core', b'filemode', os.name != "nt") 1232 if config.get_boolean(b'core', b'core.protectNTFS', os.name == "nt"): 1233 validate_path_element = validate_path_element_ntfs 1234 else: 1235 validate_path_element = validate_path_element_default 1236 return build_index_from_tree( 1237 self.path, self.index_path(), self.object_store, tree, 1238 honor_filemode=honor_filemode, 1239 validate_path_element=validate_path_element) 1240 1241 def get_config(self): 1242 """Retrieve the config object. 1243 1244 Returns: `ConfigFile` object for the ``.git/config`` file. 1245 """ 1246 from dulwich.config import ConfigFile 1247 path = os.path.join(self._controldir, 'config') 1248 try: 1249 return ConfigFile.from_path(path) 1250 except (IOError, OSError) as e: 1251 if e.errno != errno.ENOENT: 1252 raise 1253 ret = ConfigFile() 1254 ret.path = path 1255 return ret 1256 1257 def get_description(self): 1258 """Retrieve the description of this repository. 1259 1260 Returns: A string describing the repository or None. 1261 """ 1262 path = os.path.join(self._controldir, 'description') 1263 try: 1264 with GitFile(path, 'rb') as f: 1265 return f.read() 1266 except (IOError, OSError) as e: 1267 if e.errno != errno.ENOENT: 1268 raise 1269 return None 1270 1271 def __repr__(self): 1272 return "<Repo at %r>" % self.path 1273 1274 def set_description(self, description): 1275 """Set the description for this repository. 1276 1277 Args: 1278 description: Text to set as description for this repository. 1279 """ 1280 1281 self._put_named_file('description', description) 1282 1283 @classmethod 1284 def _init_maybe_bare(cls, path, bare): 1285 for d in BASE_DIRECTORIES: 1286 os.mkdir(os.path.join(path, *d)) 1287 DiskObjectStore.init(os.path.join(path, OBJECTDIR)) 1288 ret = cls(path) 1289 ret.refs.set_symbolic_ref(b'HEAD', DEFAULT_REF) 1290 ret._init_files(bare) 1291 return ret 1292 1293 @classmethod 1294 def init(cls, path, mkdir=False): 1295 """Create a new repository. 1296 1297 Args: 1298 path: Path in which to create the repository 1299 mkdir: Whether to create the directory 1300 Returns: `Repo` instance 1301 """ 1302 if mkdir: 1303 os.mkdir(path) 1304 controldir = os.path.join(path, CONTROLDIR) 1305 os.mkdir(controldir) 1306 _set_filesystem_hidden(controldir) 1307 cls._init_maybe_bare(controldir, False) 1308 return cls(path) 1309 1310 @classmethod 1311 def _init_new_working_directory(cls, path, main_repo, identifier=None, 1312 mkdir=False): 1313 """Create a new working directory linked to a repository. 1314 1315 Args: 1316 path: Path in which to create the working tree. 1317 main_repo: Main repository to reference 1318 identifier: Worktree identifier 1319 mkdir: Whether to create the directory 1320 Returns: `Repo` instance 1321 """ 1322 if mkdir: 1323 os.mkdir(path) 1324 if identifier is None: 1325 identifier = os.path.basename(path) 1326 main_worktreesdir = os.path.join(main_repo.controldir(), WORKTREES) 1327 worktree_controldir = os.path.join(main_worktreesdir, identifier) 1328 gitdirfile = os.path.join(path, CONTROLDIR) 1329 with open(gitdirfile, 'wb') as f: 1330 f.write(b'gitdir: ' + 1331 worktree_controldir.encode(sys.getfilesystemencoding()) + 1332 b'\n') 1333 try: 1334 os.mkdir(main_worktreesdir) 1335 except OSError as e: 1336 if e.errno != errno.EEXIST: 1337 raise 1338 try: 1339 os.mkdir(worktree_controldir) 1340 except OSError as e: 1341 if e.errno != errno.EEXIST: 1342 raise 1343 with open(os.path.join(worktree_controldir, GITDIR), 'wb') as f: 1344 f.write(gitdirfile.encode(sys.getfilesystemencoding()) + b'\n') 1345 with open(os.path.join(worktree_controldir, COMMONDIR), 'wb') as f: 1346 f.write(b'../..\n') 1347 with open(os.path.join(worktree_controldir, 'HEAD'), 'wb') as f: 1348 f.write(main_repo.head() + b'\n') 1349 r = cls(path) 1350 r.reset_index() 1351 return r 1352 1353 @classmethod 1354 def init_bare(cls, path, mkdir=False): 1355 """Create a new bare repository. 1356 1357 ``path`` should already exist and be an empty directory. 1358 1359 Args: 1360 path: Path to create bare repository in 1361 Returns: a `Repo` instance 1362 """ 1363 if mkdir: 1364 os.mkdir(path) 1365 return cls._init_maybe_bare(path, True) 1366 1367 create = init_bare 1368 1369 def close(self): 1370 """Close any files opened by this repository.""" 1371 self.object_store.close() 1372 1373 def __enter__(self): 1374 return self 1375 1376 def __exit__(self, exc_type, exc_val, exc_tb): 1377 self.close() 1378 1379 def get_blob_normalizer(self): 1380 """ Return a BlobNormalizer object 1381 """ 1382 # TODO Parse the git attributes files 1383 git_attributes = {} 1384 return BlobNormalizer( 1385 self.get_config_stack(), git_attributes 1386 ) 1387 1388 1389class MemoryRepo(BaseRepo): 1390 """Repo that stores refs, objects, and named files in memory. 1391 1392 MemoryRepos are always bare: they have no working tree and no index, since 1393 those have a stronger dependency on the filesystem. 1394 """ 1395 1396 def __init__(self): 1397 from dulwich.config import ConfigFile 1398 self._reflog = [] 1399 refs_container = DictRefsContainer({}, logger=self._append_reflog) 1400 BaseRepo.__init__(self, MemoryObjectStore(), refs_container) 1401 self._named_files = {} 1402 self.bare = True 1403 self._config = ConfigFile() 1404 self._description = None 1405 1406 def _append_reflog(self, *args): 1407 self._reflog.append(args) 1408 1409 def set_description(self, description): 1410 self._description = description 1411 1412 def get_description(self): 1413 return self._description 1414 1415 def _determine_file_mode(self): 1416 """Probe the file-system to determine whether permissions can be trusted. 1417 1418 Returns: True if permissions can be trusted, False otherwise. 1419 """ 1420 return sys.platform != 'win32' 1421 1422 def _put_named_file(self, path, contents): 1423 """Write a file to the control dir with the given name and contents. 1424 1425 Args: 1426 path: The path to the file, relative to the control dir. 1427 contents: A string to write to the file. 1428 """ 1429 self._named_files[path] = contents 1430 1431 def _del_named_file(self, path): 1432 try: 1433 del self._named_files[path] 1434 except KeyError: 1435 pass 1436 1437 def get_named_file(self, path, basedir=None): 1438 """Get a file from the control dir with a specific name. 1439 1440 Although the filename should be interpreted as a filename relative to 1441 the control dir in a disk-baked Repo, the object returned need not be 1442 pointing to a file in that location. 1443 1444 Args: 1445 path: The path to the file, relative to the control dir. 1446 Returns: An open file object, or None if the file does not exist. 1447 """ 1448 contents = self._named_files.get(path, None) 1449 if contents is None: 1450 return None 1451 return BytesIO(contents) 1452 1453 def open_index(self): 1454 """Fail to open index for this repo, since it is bare. 1455 1456 Raises: 1457 NoIndexPresent: Raised when no index is present 1458 """ 1459 raise NoIndexPresent() 1460 1461 def get_config(self): 1462 """Retrieve the config object. 1463 1464 Returns: `ConfigFile` object. 1465 """ 1466 return self._config 1467 1468 @classmethod 1469 def init_bare(cls, objects, refs): 1470 """Create a new bare repository in memory. 1471 1472 Args: 1473 objects: Objects for the new repository, 1474 as iterable 1475 refs: Refs as dictionary, mapping names 1476 to object SHA1s 1477 """ 1478 ret = cls() 1479 for obj in objects: 1480 ret.object_store.add_object(obj) 1481 for refname, sha in refs.items(): 1482 ret.refs.add_if_new(refname, sha) 1483 ret._init_files(bare=True) 1484 return ret 1485