1# repo.py -- For dealing with git repositories.
2# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
4#
5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6# General Public License as public by the Free Software Foundation; version 2.0
7# or (at your option) any later version. You can redistribute it and/or
8# modify it under the terms of either of these two licenses.
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16# You should have received a copy of the licenses; if not, see
17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19# License, Version 2.0.
20#
21
22
23"""Repository access.
24
25This module contains the base class for git repositories
26(BaseRepo) and an implementation which uses a repository on
27local disk (Repo).
28
29"""
30
31from io import BytesIO
32import errno
33import os
34import sys
35import stat
36import time
37
38from dulwich.errors import (
39    NoIndexPresent,
40    NotBlobError,
41    NotCommitError,
42    NotGitRepository,
43    NotTreeError,
44    NotTagError,
45    CommitError,
46    RefFormatError,
47    HookError,
48    )
49from dulwich.file import (
50    GitFile,
51    )
52from dulwich.object_store import (
53    DiskObjectStore,
54    MemoryObjectStore,
55    ObjectStoreGraphWalker,
56    )
57from dulwich.objects import (
58    check_hexsha,
59    Blob,
60    Commit,
61    ShaFile,
62    Tag,
63    Tree,
64    )
65from dulwich.pack import (
66    pack_objects_to_data,
67    )
68
69from dulwich.hooks import (
70    PreCommitShellHook,
71    PostCommitShellHook,
72    CommitMsgShellHook,
73    PostReceiveShellHook,
74    )
75
76from dulwich.line_ending import BlobNormalizer
77
78from dulwich.refs import (  # noqa: F401
79    ANNOTATED_TAG_SUFFIX,
80    check_ref_format,
81    RefsContainer,
82    DictRefsContainer,
83    InfoRefsContainer,
84    DiskRefsContainer,
85    read_packed_refs,
86    read_packed_refs_with_peeled,
87    write_packed_refs,
88    SYMREF,
89    )
90
91
92import warnings
93
94
95CONTROLDIR = '.git'
96OBJECTDIR = 'objects'
97REFSDIR = 'refs'
98REFSDIR_TAGS = 'tags'
99REFSDIR_HEADS = 'heads'
100INDEX_FILENAME = "index"
101COMMONDIR = 'commondir'
102GITDIR = 'gitdir'
103WORKTREES = 'worktrees'
104
105BASE_DIRECTORIES = [
106    ["branches"],
107    [REFSDIR],
108    [REFSDIR, REFSDIR_TAGS],
109    [REFSDIR, REFSDIR_HEADS],
110    ["hooks"],
111    ["info"]
112    ]
113
114DEFAULT_REF = b'refs/heads/master'
115
116
117class InvalidUserIdentity(Exception):
118    """User identity is not of the format 'user <email>'"""
119
120    def __init__(self, identity):
121        self.identity = identity
122
123
124def _get_default_identity():
125    import getpass
126    import socket
127    username = getpass.getuser()
128    try:
129        import pwd
130    except ImportError:
131        fullname = None
132    else:
133        try:
134            gecos = pwd.getpwnam(username).pw_gecos
135        except KeyError:
136            fullname = None
137        else:
138            fullname = gecos.split(',')[0]
139    if not fullname:
140        fullname = username
141    email = os.environ.get('EMAIL')
142    if email is None:
143        email = "{}@{}".format(username, socket.gethostname())
144    return (fullname, email)
145
146
147def get_user_identity(config, kind=None):
148    """Determine the identity to use for new commits.
149    """
150    if kind:
151        user = os.environ.get("GIT_" + kind + "_NAME")
152        if user is not None:
153            user = user.encode('utf-8')
154        email = os.environ.get("GIT_" + kind + "_EMAIL")
155        if email is not None:
156            email = email.encode('utf-8')
157    else:
158        user = None
159        email = None
160    if user is None:
161        try:
162            user = config.get(("user", ), "name")
163        except KeyError:
164            user = None
165    if email is None:
166        try:
167            email = config.get(("user", ), "email")
168        except KeyError:
169            email = None
170    default_user, default_email = _get_default_identity()
171    if user is None:
172        user = default_user
173        if not isinstance(user, bytes):
174            user = user.encode('utf-8')
175    if email is None:
176        email = default_email
177        if not isinstance(email, bytes):
178            email = email.encode('utf-8')
179    if email.startswith(b'<') and email.endswith(b'>'):
180        email = email[1:-1]
181    return (user + b" <" + email + b">")
182
183
184def check_user_identity(identity):
185    """Verify that a user identity is formatted correctly.
186
187    Args:
188      identity: User identity bytestring
189    Raises:
190      InvalidUserIdentity: Raised when identity is invalid
191    """
192    try:
193        fst, snd = identity.split(b' <', 1)
194    except ValueError:
195        raise InvalidUserIdentity(identity)
196    if b'>' not in snd:
197        raise InvalidUserIdentity(identity)
198
199
200def parse_graftpoints(graftpoints):
201    """Convert a list of graftpoints into a dict
202
203    Args:
204      graftpoints: Iterator of graftpoint lines
205
206    Each line is formatted as:
207        <commit sha1> <parent sha1> [<parent sha1>]*
208
209    Resulting dictionary is:
210        <commit sha1>: [<parent sha1>*]
211
212    https://git.wiki.kernel.org/index.php/GraftPoint
213    """
214    grafts = {}
215    for l in graftpoints:
216        raw_graft = l.split(None, 1)
217
218        commit = raw_graft[0]
219        if len(raw_graft) == 2:
220            parents = raw_graft[1].split()
221        else:
222            parents = []
223
224        for sha in [commit] + parents:
225            check_hexsha(sha, 'Invalid graftpoint')
226
227        grafts[commit] = parents
228    return grafts
229
230
231def serialize_graftpoints(graftpoints):
232    """Convert a dictionary of grafts into string
233
234    The graft dictionary is:
235        <commit sha1>: [<parent sha1>*]
236
237    Each line is formatted as:
238        <commit sha1> <parent sha1> [<parent sha1>]*
239
240    https://git.wiki.kernel.org/index.php/GraftPoint
241
242    """
243    graft_lines = []
244    for commit, parents in graftpoints.items():
245        if parents:
246            graft_lines.append(commit + b' ' + b' '.join(parents))
247        else:
248            graft_lines.append(commit)
249    return b'\n'.join(graft_lines)
250
251
252def _set_filesystem_hidden(path):
253    """Mark path as to be hidden if supported by platform and filesystem.
254
255    On win32 uses SetFileAttributesW api:
256    <https://docs.microsoft.com/windows/desktop/api/fileapi/nf-fileapi-setfileattributesw>
257    """
258    if sys.platform == 'win32':
259        import ctypes
260        from ctypes.wintypes import BOOL, DWORD, LPCWSTR
261
262        FILE_ATTRIBUTE_HIDDEN = 2
263        SetFileAttributesW = ctypes.WINFUNCTYPE(BOOL, LPCWSTR, DWORD)(
264            ("SetFileAttributesW", ctypes.windll.kernel32))
265
266        if isinstance(path, bytes):
267            path = path.decode(sys.getfilesystemencoding())
268        if not SetFileAttributesW(path, FILE_ATTRIBUTE_HIDDEN):
269            pass  # Could raise or log `ctypes.WinError()` here
270
271    # Could implement other platform specific filesytem hiding here
272
273
274class BaseRepo(object):
275    """Base class for a git repository.
276
277    :ivar object_store: Dictionary-like object for accessing
278        the objects
279    :ivar refs: Dictionary-like object with the refs in this
280        repository
281    """
282
283    def __init__(self, object_store, refs):
284        """Open a repository.
285
286        This shouldn't be called directly, but rather through one of the
287        base classes, such as MemoryRepo or Repo.
288
289        Args:
290          object_store: Object store to use
291          refs: Refs container to use
292        """
293        self.object_store = object_store
294        self.refs = refs
295
296        self._graftpoints = {}
297        self.hooks = {}
298
299    def _determine_file_mode(self):
300        """Probe the file-system to determine whether permissions can be trusted.
301
302        Returns: True if permissions can be trusted, False otherwise.
303        """
304        raise NotImplementedError(self._determine_file_mode)
305
306    def _init_files(self, bare):
307        """Initialize a default set of named files."""
308        from dulwich.config import ConfigFile
309        self._put_named_file('description', b"Unnamed repository")
310        f = BytesIO()
311        cf = ConfigFile()
312        cf.set("core", "repositoryformatversion", "0")
313        if self._determine_file_mode():
314            cf.set("core", "filemode", True)
315        else:
316            cf.set("core", "filemode", False)
317
318        cf.set("core", "bare", bare)
319        cf.set("core", "logallrefupdates", True)
320        cf.write_to_file(f)
321        self._put_named_file('config', f.getvalue())
322        self._put_named_file(os.path.join('info', 'exclude'), b'')
323
324    def get_named_file(self, path):
325        """Get a file from the control dir with a specific name.
326
327        Although the filename should be interpreted as a filename relative to
328        the control dir in a disk-based Repo, the object returned need not be
329        pointing to a file in that location.
330
331        Args:
332          path: The path to the file, relative to the control dir.
333        Returns: An open file object, or None if the file does not exist.
334        """
335        raise NotImplementedError(self.get_named_file)
336
337    def _put_named_file(self, path, contents):
338        """Write a file to the control dir with the given name and contents.
339
340        Args:
341          path: The path to the file, relative to the control dir.
342          contents: A string to write to the file.
343        """
344        raise NotImplementedError(self._put_named_file)
345
346    def _del_named_file(self, path):
347        """Delete a file in the contrl directory with the given name."""
348        raise NotImplementedError(self._del_named_file)
349
350    def open_index(self):
351        """Open the index for this repository.
352
353        Raises:
354          NoIndexPresent: If no index is present
355        Returns: The matching `Index`
356        """
357        raise NotImplementedError(self.open_index)
358
359    def fetch(self, target, determine_wants=None, progress=None, depth=None):
360        """Fetch objects into another repository.
361
362        Args:
363          target: The target repository
364          determine_wants: Optional function to determine what refs to
365            fetch.
366          progress: Optional progress function
367          depth: Optional shallow fetch depth
368        Returns: The local refs
369        """
370        if determine_wants is None:
371            determine_wants = target.object_store.determine_wants_all
372        count, pack_data = self.fetch_pack_data(
373                determine_wants, target.get_graph_walker(), progress=progress,
374                depth=depth)
375        target.object_store.add_pack_data(count, pack_data, progress)
376        return self.get_refs()
377
378    def fetch_pack_data(self, determine_wants, graph_walker, progress,
379                        get_tagged=None, depth=None):
380        """Fetch the pack data required for a set of revisions.
381
382        Args:
383          determine_wants: Function that takes a dictionary with heads
384            and returns the list of heads to fetch.
385          graph_walker: Object that can iterate over the list of revisions
386            to fetch and has an "ack" method that will be called to acknowledge
387            that a revision is present.
388          progress: Simple progress function that will be called with
389            updated progress strings.
390          get_tagged: Function that returns a dict of pointed-to sha ->
391            tag sha for including tags.
392          depth: Shallow fetch depth
393        Returns: count and iterator over pack data
394        """
395        # TODO(jelmer): Fetch pack data directly, don't create objects first.
396        objects = self.fetch_objects(determine_wants, graph_walker, progress,
397                                     get_tagged, depth=depth)
398        return pack_objects_to_data(objects)
399
400    def fetch_objects(self, determine_wants, graph_walker, progress,
401                      get_tagged=None, depth=None):
402        """Fetch the missing objects required for a set of revisions.
403
404        Args:
405          determine_wants: Function that takes a dictionary with heads
406            and returns the list of heads to fetch.
407          graph_walker: Object that can iterate over the list of revisions
408            to fetch and has an "ack" method that will be called to acknowledge
409            that a revision is present.
410          progress: Simple progress function that will be called with
411            updated progress strings.
412          get_tagged: Function that returns a dict of pointed-to sha ->
413            tag sha for including tags.
414          depth: Shallow fetch depth
415        Returns: iterator over objects, with __len__ implemented
416        """
417        if depth not in (None, 0):
418            raise NotImplementedError("depth not supported yet")
419
420        refs = {}
421        for ref, sha in self.get_refs().items():
422            try:
423                obj = self.object_store[sha]
424            except KeyError:
425                warnings.warn(
426                    'ref %s points at non-present sha %s' % (
427                        ref.decode('utf-8', 'replace'), sha.decode('ascii')),
428                    UserWarning)
429                continue
430            else:
431                if isinstance(obj, Tag):
432                    refs[ref + ANNOTATED_TAG_SUFFIX] = obj.object[1]
433                refs[ref] = sha
434
435        wants = determine_wants(refs)
436        if not isinstance(wants, list):
437            raise TypeError("determine_wants() did not return a list")
438
439        shallows = getattr(graph_walker, 'shallow', frozenset())
440        unshallows = getattr(graph_walker, 'unshallow', frozenset())
441
442        if wants == []:
443            # TODO(dborowitz): find a way to short-circuit that doesn't change
444            # this interface.
445
446            if shallows or unshallows:
447                # Do not send a pack in shallow short-circuit path
448                return None
449
450            return []
451
452        # If the graph walker is set up with an implementation that can
453        # ACK/NAK to the wire, it will write data to the client through
454        # this call as a side-effect.
455        haves = self.object_store.find_common_revisions(graph_walker)
456
457        # Deal with shallow requests separately because the haves do
458        # not reflect what objects are missing
459        if shallows or unshallows:
460            # TODO: filter the haves commits from iter_shas. the specific
461            # commits aren't missing.
462            haves = []
463
464        def get_parents(commit):
465            if commit.id in shallows:
466                return []
467            return self.get_parents(commit.id, commit)
468
469        return self.object_store.iter_shas(
470          self.object_store.find_missing_objects(
471              haves, wants, progress,
472              get_tagged,
473              get_parents=get_parents))
474
475    def get_graph_walker(self, heads=None):
476        """Retrieve a graph walker.
477
478        A graph walker is used by a remote repository (or proxy)
479        to find out which objects are present in this repository.
480
481        Args:
482          heads: Repository heads to use (optional)
483        Returns: A graph walker object
484        """
485        if heads is None:
486            heads = [
487                sha for sha in self.refs.as_dict(b'refs/heads').values()
488                if sha in self.object_store]
489        return ObjectStoreGraphWalker(
490            heads, self.get_parents, shallow=self.get_shallow())
491
492    def get_refs(self):
493        """Get dictionary with all refs.
494
495        Returns: A ``dict`` mapping ref names to SHA1s
496        """
497        return self.refs.as_dict()
498
499    def head(self):
500        """Return the SHA1 pointed at by HEAD."""
501        return self.refs[b'HEAD']
502
503    def _get_object(self, sha, cls):
504        assert len(sha) in (20, 40)
505        ret = self.get_object(sha)
506        if not isinstance(ret, cls):
507            if cls is Commit:
508                raise NotCommitError(ret)
509            elif cls is Blob:
510                raise NotBlobError(ret)
511            elif cls is Tree:
512                raise NotTreeError(ret)
513            elif cls is Tag:
514                raise NotTagError(ret)
515            else:
516                raise Exception("Type invalid: %r != %r" % (
517                  ret.type_name, cls.type_name))
518        return ret
519
520    def get_object(self, sha):
521        """Retrieve the object with the specified SHA.
522
523        Args:
524          sha: SHA to retrieve
525        Returns: A ShaFile object
526        Raises:
527          KeyError: when the object can not be found
528        """
529        return self.object_store[sha]
530
531    def get_parents(self, sha, commit=None):
532        """Retrieve the parents of a specific commit.
533
534        If the specific commit is a graftpoint, the graft parents
535        will be returned instead.
536
537        Args:
538          sha: SHA of the commit for which to retrieve the parents
539          commit: Optional commit matching the sha
540        Returns: List of parents
541        """
542
543        try:
544            return self._graftpoints[sha]
545        except KeyError:
546            if commit is None:
547                commit = self[sha]
548            return commit.parents
549
550    def get_config(self):
551        """Retrieve the config object.
552
553        Returns: `ConfigFile` object for the ``.git/config`` file.
554        """
555        raise NotImplementedError(self.get_config)
556
557    def get_description(self):
558        """Retrieve the description for this repository.
559
560        Returns: String with the description of the repository
561            as set by the user.
562        """
563        raise NotImplementedError(self.get_description)
564
565    def set_description(self, description):
566        """Set the description for this repository.
567
568        Args:
569          description: Text to set as description for this repository.
570        """
571        raise NotImplementedError(self.set_description)
572
573    def get_config_stack(self):
574        """Return a config stack for this repository.
575
576        This stack accesses the configuration for both this repository
577        itself (.git/config) and the global configuration, which usually
578        lives in ~/.gitconfig.
579
580        Returns: `Config` instance for this repository
581        """
582        from dulwich.config import StackedConfig
583        backends = [self.get_config()] + StackedConfig.default_backends()
584        return StackedConfig(backends, writable=backends[0])
585
586    def get_shallow(self):
587        """Get the set of shallow commits.
588
589        Returns: Set of shallow commits.
590        """
591        f = self.get_named_file('shallow')
592        if f is None:
593            return set()
594        with f:
595            return set(l.strip() for l in f)
596
597    def update_shallow(self, new_shallow, new_unshallow):
598        """Update the list of shallow objects.
599
600        Args:
601          new_shallow: Newly shallow objects
602          new_unshallow: Newly no longer shallow objects
603        """
604        shallow = self.get_shallow()
605        if new_shallow:
606            shallow.update(new_shallow)
607        if new_unshallow:
608            shallow.difference_update(new_unshallow)
609        self._put_named_file(
610            'shallow',
611            b''.join([sha + b'\n' for sha in shallow]))
612
613    def get_peeled(self, ref):
614        """Get the peeled value of a ref.
615
616        Args:
617          ref: The refname to peel.
618        Returns: The fully-peeled SHA1 of a tag object, after peeling all
619            intermediate tags; if the original ref does not point to a tag,
620            this will equal the original SHA1.
621        """
622        cached = self.refs.get_peeled(ref)
623        if cached is not None:
624            return cached
625        return self.object_store.peel_sha(self.refs[ref]).id
626
627    def get_walker(self, include=None, *args, **kwargs):
628        """Obtain a walker for this repository.
629
630        Args:
631          include: Iterable of SHAs of commits to include along with their
632            ancestors. Defaults to [HEAD]
633          exclude: Iterable of SHAs of commits to exclude along with their
634            ancestors, overriding includes.
635          order: ORDER_* constant specifying the order of results.
636            Anything other than ORDER_DATE may result in O(n) memory usage.
637          reverse: If True, reverse the order of output, requiring O(n)
638            memory.
639          max_entries: The maximum number of entries to yield, or None for
640            no limit.
641          paths: Iterable of file or subtree paths to show entries for.
642          rename_detector: diff.RenameDetector object for detecting
643            renames.
644          follow: If True, follow path across renames/copies. Forces a
645            default rename_detector.
646          since: Timestamp to list commits after.
647          until: Timestamp to list commits before.
648          queue_cls: A class to use for a queue of commits, supporting the
649            iterator protocol. The constructor takes a single argument, the
650            Walker.
651        Returns: A `Walker` object
652        """
653        from dulwich.walk import Walker
654        if include is None:
655            include = [self.head()]
656        if isinstance(include, str):
657            include = [include]
658
659        kwargs['get_parents'] = lambda commit: self.get_parents(
660            commit.id, commit)
661
662        return Walker(self.object_store, include, *args, **kwargs)
663
664    def __getitem__(self, name):
665        """Retrieve a Git object by SHA1 or ref.
666
667        Args:
668          name: A Git object SHA1 or a ref name
669        Returns: A `ShaFile` object, such as a Commit or Blob
670        Raises:
671          KeyError: when the specified ref or object does not exist
672        """
673        if not isinstance(name, bytes):
674            raise TypeError("'name' must be bytestring, not %.80s" %
675                            type(name).__name__)
676        if len(name) in (20, 40):
677            try:
678                return self.object_store[name]
679            except (KeyError, ValueError):
680                pass
681        try:
682            return self.object_store[self.refs[name]]
683        except RefFormatError:
684            raise KeyError(name)
685
686    def __contains__(self, name):
687        """Check if a specific Git object or ref is present.
688
689        Args:
690          name: Git object SHA1 or ref name
691        """
692        if len(name) in (20, 40):
693            return name in self.object_store or name in self.refs
694        else:
695            return name in self.refs
696
697    def __setitem__(self, name, value):
698        """Set a ref.
699
700        Args:
701          name: ref name
702          value: Ref value - either a ShaFile object, or a hex sha
703        """
704        if name.startswith(b"refs/") or name == b'HEAD':
705            if isinstance(value, ShaFile):
706                self.refs[name] = value.id
707            elif isinstance(value, bytes):
708                self.refs[name] = value
709            else:
710                raise TypeError(value)
711        else:
712            raise ValueError(name)
713
714    def __delitem__(self, name):
715        """Remove a ref.
716
717        Args:
718          name: Name of the ref to remove
719        """
720        if name.startswith(b"refs/") or name == b"HEAD":
721            del self.refs[name]
722        else:
723            raise ValueError(name)
724
725    def _get_user_identity(self, config, kind=None):
726        """Determine the identity to use for new commits.
727        """
728        # TODO(jelmer): Deprecate this function in favor of get_user_identity
729        return get_user_identity(config)
730
731    def _add_graftpoints(self, updated_graftpoints):
732        """Add or modify graftpoints
733
734        Args:
735          updated_graftpoints: Dict of commit shas to list of parent shas
736        """
737
738        # Simple validation
739        for commit, parents in updated_graftpoints.items():
740            for sha in [commit] + parents:
741                check_hexsha(sha, 'Invalid graftpoint')
742
743        self._graftpoints.update(updated_graftpoints)
744
745    def _remove_graftpoints(self, to_remove=[]):
746        """Remove graftpoints
747
748        Args:
749          to_remove: List of commit shas
750        """
751        for sha in to_remove:
752            del self._graftpoints[sha]
753
754    def _read_heads(self, name):
755        f = self.get_named_file(name)
756        if f is None:
757            return []
758        with f:
759            return [l.strip() for l in f.readlines() if l.strip()]
760
761    def do_commit(self, message=None, committer=None,
762                  author=None, commit_timestamp=None,
763                  commit_timezone=None, author_timestamp=None,
764                  author_timezone=None, tree=None, encoding=None,
765                  ref=b'HEAD', merge_heads=None):
766        """Create a new commit.
767
768        Args:
769          message: Commit message
770          committer: Committer fullname
771          author: Author fullname (defaults to committer)
772          commit_timestamp: Commit timestamp (defaults to now)
773          commit_timezone: Commit timestamp timezone (defaults to GMT)
774          author_timestamp: Author timestamp (defaults to commit
775            timestamp)
776          author_timezone: Author timestamp timezone
777            (defaults to commit timestamp timezone)
778          tree: SHA1 of the tree root to use (if not specified the
779            current index will be committed).
780          encoding: Encoding
781          ref: Optional ref to commit to (defaults to current branch)
782          merge_heads: Merge heads (defaults to .git/MERGE_HEADS)
783        Returns: New commit SHA1
784        """
785        import time
786        c = Commit()
787        if tree is None:
788            index = self.open_index()
789            c.tree = index.commit(self.object_store)
790        else:
791            if len(tree) != 40:
792                raise ValueError("tree must be a 40-byte hex sha string")
793            c.tree = tree
794
795        try:
796            self.hooks['pre-commit'].execute()
797        except HookError as e:
798            raise CommitError(e)
799        except KeyError:  # no hook defined, silent fallthrough
800            pass
801
802        config = self.get_config_stack()
803        if merge_heads is None:
804            merge_heads = self._read_heads('MERGE_HEADS')
805        if committer is None:
806            committer = get_user_identity(config, kind='COMMITTER')
807        check_user_identity(committer)
808        c.committer = committer
809        if commit_timestamp is None:
810            # FIXME: Support GIT_COMMITTER_DATE environment variable
811            commit_timestamp = time.time()
812        c.commit_time = int(commit_timestamp)
813        if commit_timezone is None:
814            # FIXME: Use current user timezone rather than UTC
815            commit_timezone = 0
816        c.commit_timezone = commit_timezone
817        if author is None:
818            author = get_user_identity(config, kind='AUTHOR')
819        c.author = author
820        check_user_identity(author)
821        if author_timestamp is None:
822            # FIXME: Support GIT_AUTHOR_DATE environment variable
823            author_timestamp = commit_timestamp
824        c.author_time = int(author_timestamp)
825        if author_timezone is None:
826            author_timezone = commit_timezone
827        c.author_timezone = author_timezone
828        if encoding is None:
829            try:
830                encoding = config.get(('i18n', ), 'commitEncoding')
831            except KeyError:
832                pass  # No dice
833        if encoding is not None:
834            c.encoding = encoding
835        if message is None:
836            # FIXME: Try to read commit message from .git/MERGE_MSG
837            raise ValueError("No commit message specified")
838
839        try:
840            c.message = self.hooks['commit-msg'].execute(message)
841            if c.message is None:
842                c.message = message
843        except HookError as e:
844            raise CommitError(e)
845        except KeyError:  # no hook defined, message not modified
846            c.message = message
847
848        if ref is None:
849            # Create a dangling commit
850            c.parents = merge_heads
851            self.object_store.add_object(c)
852        else:
853            try:
854                old_head = self.refs[ref]
855                c.parents = [old_head] + merge_heads
856                self.object_store.add_object(c)
857                ok = self.refs.set_if_equals(
858                    ref, old_head, c.id, message=b"commit: " + message,
859                    committer=committer, timestamp=commit_timestamp,
860                    timezone=commit_timezone)
861            except KeyError:
862                c.parents = merge_heads
863                self.object_store.add_object(c)
864                ok = self.refs.add_if_new(
865                        ref, c.id, message=b"commit: " + message,
866                        committer=committer, timestamp=commit_timestamp,
867                        timezone=commit_timezone)
868            if not ok:
869                # Fail if the atomic compare-and-swap failed, leaving the
870                # commit and all its objects as garbage.
871                raise CommitError("%s changed during commit" % (ref,))
872
873        self._del_named_file('MERGE_HEADS')
874
875        try:
876            self.hooks['post-commit'].execute()
877        except HookError as e:  # silent failure
878            warnings.warn("post-commit hook failed: %s" % e, UserWarning)
879        except KeyError:  # no hook defined, silent fallthrough
880            pass
881
882        return c.id
883
884
885def read_gitfile(f):
886    """Read a ``.git`` file.
887
888    The first line of the file should start with "gitdir: "
889
890    Args:
891      f: File-like object to read from
892    Returns: A path
893    """
894    cs = f.read()
895    if not cs.startswith("gitdir: "):
896        raise ValueError("Expected file to start with 'gitdir: '")
897    return cs[len("gitdir: "):].rstrip("\n")
898
899
900class Repo(BaseRepo):
901    """A git repository backed by local disk.
902
903    To open an existing repository, call the contructor with
904    the path of the repository.
905
906    To create a new repository, use the Repo.init class method.
907    """
908
909    def __init__(self, root):
910        hidden_path = os.path.join(root, CONTROLDIR)
911        if os.path.isdir(os.path.join(hidden_path, OBJECTDIR)):
912            self.bare = False
913            self._controldir = hidden_path
914        elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
915              os.path.isdir(os.path.join(root, REFSDIR))):
916            self.bare = True
917            self._controldir = root
918        elif os.path.isfile(hidden_path):
919            self.bare = False
920            with open(hidden_path, 'r') as f:
921                path = read_gitfile(f)
922            self.bare = False
923            self._controldir = os.path.join(root, path)
924        else:
925            raise NotGitRepository(
926                "No git repository was found at %(path)s" % dict(path=root)
927            )
928        commondir = self.get_named_file(COMMONDIR)
929        if commondir is not None:
930            with commondir:
931                self._commondir = os.path.join(
932                    self.controldir(),
933                    commondir.read().rstrip(b"\r\n").decode(
934                        sys.getfilesystemencoding()))
935        else:
936            self._commondir = self._controldir
937        self.path = root
938        config = self.get_config()
939        object_store = DiskObjectStore.from_config(
940            os.path.join(self.commondir(), OBJECTDIR),
941            config)
942        refs = DiskRefsContainer(self.commondir(), self._controldir,
943                                 logger=self._write_reflog)
944        BaseRepo.__init__(self, object_store, refs)
945
946        self._graftpoints = {}
947        graft_file = self.get_named_file(os.path.join("info", "grafts"),
948                                         basedir=self.commondir())
949        if graft_file:
950            with graft_file:
951                self._graftpoints.update(parse_graftpoints(graft_file))
952        graft_file = self.get_named_file("shallow",
953                                         basedir=self.commondir())
954        if graft_file:
955            with graft_file:
956                self._graftpoints.update(parse_graftpoints(graft_file))
957
958        self.hooks['pre-commit'] = PreCommitShellHook(self.controldir())
959        self.hooks['commit-msg'] = CommitMsgShellHook(self.controldir())
960        self.hooks['post-commit'] = PostCommitShellHook(self.controldir())
961        self.hooks['post-receive'] = PostReceiveShellHook(self.controldir())
962
963    def _write_reflog(self, ref, old_sha, new_sha, committer, timestamp,
964                      timezone, message):
965        from .reflog import format_reflog_line
966        path = os.path.join(
967                self.controldir(), 'logs',
968                ref.decode(sys.getfilesystemencoding()))
969        try:
970            os.makedirs(os.path.dirname(path))
971        except OSError as e:
972            if e.errno != errno.EEXIST:
973                raise
974        if committer is None:
975            config = self.get_config_stack()
976            committer = self._get_user_identity(config)
977        check_user_identity(committer)
978        if timestamp is None:
979            timestamp = int(time.time())
980        if timezone is None:
981            timezone = 0  # FIXME
982        with open(path, 'ab') as f:
983            f.write(format_reflog_line(old_sha, new_sha, committer,
984                    timestamp, timezone, message) + b'\n')
985
986    @classmethod
987    def discover(cls, start='.'):
988        """Iterate parent directories to discover a repository
989
990        Return a Repo object for the first parent directory that looks like a
991        Git repository.
992
993        Args:
994          start: The directory to start discovery from (defaults to '.')
995        """
996        remaining = True
997        path = os.path.abspath(start)
998        while remaining:
999            try:
1000                return cls(path)
1001            except NotGitRepository:
1002                path, remaining = os.path.split(path)
1003        raise NotGitRepository(
1004            "No git repository was found at %(path)s" % dict(path=start)
1005        )
1006
1007    def controldir(self):
1008        """Return the path of the control directory."""
1009        return self._controldir
1010
1011    def commondir(self):
1012        """Return the path of the common directory.
1013
1014        For a main working tree, it is identical to controldir().
1015
1016        For a linked working tree, it is the control directory of the
1017        main working tree."""
1018
1019        return self._commondir
1020
1021    def _determine_file_mode(self):
1022        """Probe the file-system to determine whether permissions can be trusted.
1023
1024        Returns: True if permissions can be trusted, False otherwise.
1025        """
1026        fname = os.path.join(self.path, '.probe-permissions')
1027        with open(fname, 'w') as f:
1028            f.write('')
1029
1030        st1 = os.lstat(fname)
1031        try:
1032            os.chmod(fname, st1.st_mode ^ stat.S_IXUSR)
1033        except EnvironmentError as e:
1034            if e.errno == errno.EPERM:
1035                return False
1036            raise
1037        st2 = os.lstat(fname)
1038
1039        os.unlink(fname)
1040
1041        mode_differs = st1.st_mode != st2.st_mode
1042        st2_has_exec = (st2.st_mode & stat.S_IXUSR) != 0
1043
1044        return mode_differs and st2_has_exec
1045
1046    def _put_named_file(self, path, contents):
1047        """Write a file to the control dir with the given name and contents.
1048
1049        Args:
1050          path: The path to the file, relative to the control dir.
1051          contents: A string to write to the file.
1052        """
1053        path = path.lstrip(os.path.sep)
1054        with GitFile(os.path.join(self.controldir(), path), 'wb') as f:
1055            f.write(contents)
1056
1057    def _del_named_file(self, path):
1058        try:
1059            os.unlink(os.path.join(self.controldir(), path))
1060        except (IOError, OSError) as e:
1061            if e.errno == errno.ENOENT:
1062                return
1063            raise
1064
1065    def get_named_file(self, path, basedir=None):
1066        """Get a file from the control dir with a specific name.
1067
1068        Although the filename should be interpreted as a filename relative to
1069        the control dir in a disk-based Repo, the object returned need not be
1070        pointing to a file in that location.
1071
1072        Args:
1073          path: The path to the file, relative to the control dir.
1074          basedir: Optional argument that specifies an alternative to the
1075            control dir.
1076        Returns: An open file object, or None if the file does not exist.
1077        """
1078        # TODO(dborowitz): sanitize filenames, since this is used directly by
1079        # the dumb web serving code.
1080        if basedir is None:
1081            basedir = self.controldir()
1082        path = path.lstrip(os.path.sep)
1083        try:
1084            return open(os.path.join(basedir, path), 'rb')
1085        except (IOError, OSError) as e:
1086            if e.errno == errno.ENOENT:
1087                return None
1088            raise
1089
1090    def index_path(self):
1091        """Return path to the index file."""
1092        return os.path.join(self.controldir(), INDEX_FILENAME)
1093
1094    def open_index(self):
1095        """Open the index for this repository.
1096
1097        Raises:
1098          NoIndexPresent: If no index is present
1099        Returns: The matching `Index`
1100        """
1101        from dulwich.index import Index
1102        if not self.has_index():
1103            raise NoIndexPresent()
1104        return Index(self.index_path())
1105
1106    def has_index(self):
1107        """Check if an index is present."""
1108        # Bare repos must never have index files; non-bare repos may have a
1109        # missing index file, which is treated as empty.
1110        return not self.bare
1111
1112    def stage(self, fs_paths):
1113        """Stage a set of paths.
1114
1115        Args:
1116          fs_paths: List of paths, relative to the repository path
1117        """
1118
1119        root_path_bytes = self.path.encode(sys.getfilesystemencoding())
1120
1121        if not isinstance(fs_paths, list):
1122            fs_paths = [fs_paths]
1123        from dulwich.index import (
1124            blob_from_path_and_stat,
1125            index_entry_from_stat,
1126            _fs_to_tree_path,
1127            )
1128        index = self.open_index()
1129        blob_normalizer = self.get_blob_normalizer()
1130        for fs_path in fs_paths:
1131            if not isinstance(fs_path, bytes):
1132                fs_path = fs_path.encode(sys.getfilesystemencoding())
1133            if os.path.isabs(fs_path):
1134                raise ValueError(
1135                    "path %r should be relative to "
1136                    "repository root, not absolute" % fs_path)
1137            tree_path = _fs_to_tree_path(fs_path)
1138            full_path = os.path.join(root_path_bytes, fs_path)
1139            try:
1140                st = os.lstat(full_path)
1141            except OSError:
1142                # File no longer exists
1143                try:
1144                    del index[tree_path]
1145                except KeyError:
1146                    pass  # already removed
1147            else:
1148                if not stat.S_ISDIR(st.st_mode):
1149                    blob = blob_from_path_and_stat(full_path, st)
1150                    blob = blob_normalizer.checkin_normalize(blob, fs_path)
1151                    self.object_store.add_object(blob)
1152                    index[tree_path] = index_entry_from_stat(st, blob.id, 0)
1153                else:
1154                    try:
1155                        del index[tree_path]
1156                    except KeyError:
1157                        pass
1158        index.write()
1159
1160    def clone(self, target_path, mkdir=True, bare=False,
1161              origin=b"origin", checkout=None):
1162        """Clone this repository.
1163
1164        Args:
1165          target_path: Target path
1166          mkdir: Create the target directory
1167          bare: Whether to create a bare repository
1168          origin: Base name for refs in target repository
1169            cloned from this repository
1170        Returns: Created repository as `Repo`
1171        """
1172        if not bare:
1173            target = self.init(target_path, mkdir=mkdir)
1174        else:
1175            if checkout:
1176                raise ValueError("checkout and bare are incompatible")
1177            target = self.init_bare(target_path, mkdir=mkdir)
1178        self.fetch(target)
1179        encoded_path = self.path
1180        if not isinstance(encoded_path, bytes):
1181            encoded_path = encoded_path.encode(sys.getfilesystemencoding())
1182        ref_message = b"clone: from " + encoded_path
1183        target.refs.import_refs(
1184            b'refs/remotes/' + origin, self.refs.as_dict(b'refs/heads'),
1185            message=ref_message)
1186        target.refs.import_refs(
1187            b'refs/tags', self.refs.as_dict(b'refs/tags'),
1188            message=ref_message)
1189        try:
1190            target.refs.add_if_new(
1191                    DEFAULT_REF, self.refs[DEFAULT_REF],
1192                    message=ref_message)
1193        except KeyError:
1194            pass
1195        target_config = target.get_config()
1196        target_config.set(('remote', 'origin'), 'url', encoded_path)
1197        target_config.set(('remote', 'origin'), 'fetch',
1198                          '+refs/heads/*:refs/remotes/origin/*')
1199        target_config.write_to_path()
1200
1201        # Update target head
1202        head_chain, head_sha = self.refs.follow(b'HEAD')
1203        if head_chain and head_sha is not None:
1204            target.refs.set_symbolic_ref(b'HEAD', head_chain[-1],
1205                                         message=ref_message)
1206            target[b'HEAD'] = head_sha
1207
1208            if checkout is None:
1209                checkout = (not bare)
1210            if checkout:
1211                # Checkout HEAD to target dir
1212                target.reset_index()
1213
1214        return target
1215
1216    def reset_index(self, tree=None):
1217        """Reset the index back to a specific tree.
1218
1219        Args:
1220          tree: Tree SHA to reset to, None for current HEAD tree.
1221        """
1222        from dulwich.index import (
1223            build_index_from_tree,
1224            validate_path_element_default,
1225            validate_path_element_ntfs,
1226            )
1227        if tree is None:
1228            tree = self[b'HEAD'].tree
1229        config = self.get_config()
1230        honor_filemode = config.get_boolean(
1231            b'core', b'filemode', os.name != "nt")
1232        if config.get_boolean(b'core', b'core.protectNTFS', os.name == "nt"):
1233            validate_path_element = validate_path_element_ntfs
1234        else:
1235            validate_path_element = validate_path_element_default
1236        return build_index_from_tree(
1237            self.path, self.index_path(), self.object_store, tree,
1238            honor_filemode=honor_filemode,
1239            validate_path_element=validate_path_element)
1240
1241    def get_config(self):
1242        """Retrieve the config object.
1243
1244        Returns: `ConfigFile` object for the ``.git/config`` file.
1245        """
1246        from dulwich.config import ConfigFile
1247        path = os.path.join(self._controldir, 'config')
1248        try:
1249            return ConfigFile.from_path(path)
1250        except (IOError, OSError) as e:
1251            if e.errno != errno.ENOENT:
1252                raise
1253            ret = ConfigFile()
1254            ret.path = path
1255            return ret
1256
1257    def get_description(self):
1258        """Retrieve the description of this repository.
1259
1260        Returns: A string describing the repository or None.
1261        """
1262        path = os.path.join(self._controldir, 'description')
1263        try:
1264            with GitFile(path, 'rb') as f:
1265                return f.read()
1266        except (IOError, OSError) as e:
1267            if e.errno != errno.ENOENT:
1268                raise
1269            return None
1270
1271    def __repr__(self):
1272        return "<Repo at %r>" % self.path
1273
1274    def set_description(self, description):
1275        """Set the description for this repository.
1276
1277        Args:
1278          description: Text to set as description for this repository.
1279        """
1280
1281        self._put_named_file('description', description)
1282
1283    @classmethod
1284    def _init_maybe_bare(cls, path, bare):
1285        for d in BASE_DIRECTORIES:
1286            os.mkdir(os.path.join(path, *d))
1287        DiskObjectStore.init(os.path.join(path, OBJECTDIR))
1288        ret = cls(path)
1289        ret.refs.set_symbolic_ref(b'HEAD', DEFAULT_REF)
1290        ret._init_files(bare)
1291        return ret
1292
1293    @classmethod
1294    def init(cls, path, mkdir=False):
1295        """Create a new repository.
1296
1297        Args:
1298          path: Path in which to create the repository
1299          mkdir: Whether to create the directory
1300        Returns: `Repo` instance
1301        """
1302        if mkdir:
1303            os.mkdir(path)
1304        controldir = os.path.join(path, CONTROLDIR)
1305        os.mkdir(controldir)
1306        _set_filesystem_hidden(controldir)
1307        cls._init_maybe_bare(controldir, False)
1308        return cls(path)
1309
1310    @classmethod
1311    def _init_new_working_directory(cls, path, main_repo, identifier=None,
1312                                    mkdir=False):
1313        """Create a new working directory linked to a repository.
1314
1315        Args:
1316          path: Path in which to create the working tree.
1317          main_repo: Main repository to reference
1318          identifier: Worktree identifier
1319          mkdir: Whether to create the directory
1320        Returns: `Repo` instance
1321        """
1322        if mkdir:
1323            os.mkdir(path)
1324        if identifier is None:
1325            identifier = os.path.basename(path)
1326        main_worktreesdir = os.path.join(main_repo.controldir(), WORKTREES)
1327        worktree_controldir = os.path.join(main_worktreesdir, identifier)
1328        gitdirfile = os.path.join(path, CONTROLDIR)
1329        with open(gitdirfile, 'wb') as f:
1330            f.write(b'gitdir: ' +
1331                    worktree_controldir.encode(sys.getfilesystemencoding()) +
1332                    b'\n')
1333        try:
1334            os.mkdir(main_worktreesdir)
1335        except OSError as e:
1336            if e.errno != errno.EEXIST:
1337                raise
1338        try:
1339            os.mkdir(worktree_controldir)
1340        except OSError as e:
1341            if e.errno != errno.EEXIST:
1342                raise
1343        with open(os.path.join(worktree_controldir, GITDIR), 'wb') as f:
1344            f.write(gitdirfile.encode(sys.getfilesystemencoding()) + b'\n')
1345        with open(os.path.join(worktree_controldir, COMMONDIR), 'wb') as f:
1346            f.write(b'../..\n')
1347        with open(os.path.join(worktree_controldir, 'HEAD'), 'wb') as f:
1348            f.write(main_repo.head() + b'\n')
1349        r = cls(path)
1350        r.reset_index()
1351        return r
1352
1353    @classmethod
1354    def init_bare(cls, path, mkdir=False):
1355        """Create a new bare repository.
1356
1357        ``path`` should already exist and be an empty directory.
1358
1359        Args:
1360          path: Path to create bare repository in
1361        Returns: a `Repo` instance
1362        """
1363        if mkdir:
1364            os.mkdir(path)
1365        return cls._init_maybe_bare(path, True)
1366
1367    create = init_bare
1368
1369    def close(self):
1370        """Close any files opened by this repository."""
1371        self.object_store.close()
1372
1373    def __enter__(self):
1374        return self
1375
1376    def __exit__(self, exc_type, exc_val, exc_tb):
1377        self.close()
1378
1379    def get_blob_normalizer(self):
1380        """ Return a BlobNormalizer object
1381        """
1382        # TODO Parse the git attributes files
1383        git_attributes = {}
1384        return BlobNormalizer(
1385            self.get_config_stack(), git_attributes
1386        )
1387
1388
1389class MemoryRepo(BaseRepo):
1390    """Repo that stores refs, objects, and named files in memory.
1391
1392    MemoryRepos are always bare: they have no working tree and no index, since
1393    those have a stronger dependency on the filesystem.
1394    """
1395
1396    def __init__(self):
1397        from dulwich.config import ConfigFile
1398        self._reflog = []
1399        refs_container = DictRefsContainer({}, logger=self._append_reflog)
1400        BaseRepo.__init__(self, MemoryObjectStore(), refs_container)
1401        self._named_files = {}
1402        self.bare = True
1403        self._config = ConfigFile()
1404        self._description = None
1405
1406    def _append_reflog(self, *args):
1407        self._reflog.append(args)
1408
1409    def set_description(self, description):
1410        self._description = description
1411
1412    def get_description(self):
1413        return self._description
1414
1415    def _determine_file_mode(self):
1416        """Probe the file-system to determine whether permissions can be trusted.
1417
1418        Returns: True if permissions can be trusted, False otherwise.
1419        """
1420        return sys.platform != 'win32'
1421
1422    def _put_named_file(self, path, contents):
1423        """Write a file to the control dir with the given name and contents.
1424
1425        Args:
1426          path: The path to the file, relative to the control dir.
1427          contents: A string to write to the file.
1428        """
1429        self._named_files[path] = contents
1430
1431    def _del_named_file(self, path):
1432        try:
1433            del self._named_files[path]
1434        except KeyError:
1435            pass
1436
1437    def get_named_file(self, path, basedir=None):
1438        """Get a file from the control dir with a specific name.
1439
1440        Although the filename should be interpreted as a filename relative to
1441        the control dir in a disk-baked Repo, the object returned need not be
1442        pointing to a file in that location.
1443
1444        Args:
1445          path: The path to the file, relative to the control dir.
1446        Returns: An open file object, or None if the file does not exist.
1447        """
1448        contents = self._named_files.get(path, None)
1449        if contents is None:
1450            return None
1451        return BytesIO(contents)
1452
1453    def open_index(self):
1454        """Fail to open index for this repo, since it is bare.
1455
1456        Raises:
1457          NoIndexPresent: Raised when no index is present
1458        """
1459        raise NoIndexPresent()
1460
1461    def get_config(self):
1462        """Retrieve the config object.
1463
1464        Returns: `ConfigFile` object.
1465        """
1466        return self._config
1467
1468    @classmethod
1469    def init_bare(cls, objects, refs):
1470        """Create a new bare repository in memory.
1471
1472        Args:
1473          objects: Objects for the new repository,
1474            as iterable
1475          refs: Refs as dictionary, mapping names
1476            to object SHA1s
1477        """
1478        ret = cls()
1479        for obj in objects:
1480            ret.object_store.add_object(obj)
1481        for refname, sha in refs.items():
1482            ret.refs.add_if_new(refname, sha)
1483        ret._init_files(bare=True)
1484        return ret
1485