1# need a dict to set bloody .name field
2from io import BytesIO
3import logging
4import os
5import stat
6from unittest import SkipTest
7import uuid
8
9import git
10from git.cmd import Git
11from git.compat import (
12    string_types,
13    defenc,
14    is_win,
15)
16from git.config import (
17    SectionConstraint,
18    GitConfigParser,
19    cp
20)
21from git.exc import (
22    InvalidGitRepositoryError,
23    NoSuchPathError,
24    RepositoryDirtyError
25)
26from git.objects.base import IndexObject, Object
27from git.objects.util import Traversable
28from git.util import (
29    Iterable,
30    join_path_native,
31    to_native_path_linux,
32    RemoteProgress,
33    rmtree,
34    unbare_repo
35)
36from git.util import HIDE_WINDOWS_KNOWN_ERRORS
37
38import os.path as osp
39
40from .util import (
41    mkhead,
42    sm_name,
43    sm_section,
44    SubmoduleConfigParser,
45    find_first_remote_branch
46)
47
48
49__all__ = ["Submodule", "UpdateProgress"]
50
51
52log = logging.getLogger('git.objects.submodule.base')
53log.addHandler(logging.NullHandler())
54
55
56class UpdateProgress(RemoteProgress):
57
58    """Class providing detailed progress information to the caller who should
59    derive from it and implement the ``update(...)`` message"""
60    CLONE, FETCH, UPDWKTREE = [1 << x for x in range(RemoteProgress._num_op_codes, RemoteProgress._num_op_codes + 3)]
61    _num_op_codes = RemoteProgress._num_op_codes + 3
62
63    __slots__ = ()
64
65
66BEGIN = UpdateProgress.BEGIN
67END = UpdateProgress.END
68CLONE = UpdateProgress.CLONE
69FETCH = UpdateProgress.FETCH
70UPDWKTREE = UpdateProgress.UPDWKTREE
71
72
73# IndexObject comes via util module, its a 'hacky' fix thanks to pythons import
74# mechanism which cause plenty of trouble of the only reason for packages and
75# modules is refactoring - subpackages shouldn't depend on parent packages
76class Submodule(IndexObject, Iterable, Traversable):
77
78    """Implements access to a git submodule. They are special in that their sha
79    represents a commit in the submodule's repository which is to be checked out
80    at the path of this instance.
81    The submodule type does not have a string type associated with it, as it exists
82    solely as a marker in the tree and index.
83
84    All methods work in bare and non-bare repositories."""
85
86    _id_attribute_ = "name"
87    k_modules_file = '.gitmodules'
88    k_head_option = 'branch'
89    k_head_default = 'master'
90    k_default_mode = stat.S_IFDIR | stat.S_IFLNK        # submodules are directories with link-status
91
92    # this is a bogus type for base class compatibility
93    type = 'submodule'
94
95    __slots__ = ('_parent_commit', '_url', '_branch_path', '_name', '__weakref__')
96    _cache_attrs = ('path', '_url', '_branch_path')
97
98    def __init__(self, repo, binsha, mode=None, path=None, name=None, parent_commit=None, url=None, branch_path=None):
99        """Initialize this instance with its attributes. We only document the ones
100        that differ from ``IndexObject``
101
102        :param repo: Our parent repository
103        :param binsha: binary sha referring to a commit in the remote repository, see url parameter
104        :param parent_commit: see set_parent_commit()
105        :param url: The url to the remote repository which is the submodule
106        :param branch_path: full (relative) path to ref to checkout when cloning the remote repository"""
107        super(Submodule, self).__init__(repo, binsha, mode, path)
108        self.size = 0
109        self._parent_commit = parent_commit
110        if url is not None:
111            self._url = url
112        if branch_path is not None:
113            assert isinstance(branch_path, string_types)
114            self._branch_path = branch_path
115        if name is not None:
116            self._name = name
117
118    def _set_cache_(self, attr):
119        if attr in ('path', '_url', '_branch_path'):
120            reader = self.config_reader()
121            # default submodule values
122            try:
123                self.path = reader.get('path')
124            except cp.NoSectionError:
125                raise ValueError("This submodule instance does not exist anymore in '%s' file"
126                                 % osp.join(self.repo.working_tree_dir, '.gitmodules'))
127            # end
128            self._url = reader.get('url')
129            # git-python extension values - optional
130            self._branch_path = reader.get_value(self.k_head_option, git.Head.to_full_path(self.k_head_default))
131        elif attr == '_name':
132            raise AttributeError("Cannot retrieve the name of a submodule if it was not set initially")
133        else:
134            super(Submodule, self)._set_cache_(attr)
135        # END handle attribute name
136
137    def _get_intermediate_items(self, item):
138        """:return: all the submodules of our module repository"""
139        try:
140            return type(self).list_items(item.module())
141        except InvalidGitRepositoryError:
142            return []
143        # END handle intermediate items
144
145    @classmethod
146    def _need_gitfile_submodules(cls, git):
147        return git.version_info[:3] >= (1, 7, 5)
148
149    def __eq__(self, other):
150        """Compare with another submodule"""
151        # we may only compare by name as this should be the ID they are hashed with
152        # Otherwise this type wouldn't be hashable
153        # return self.path == other.path and self.url == other.url and super(Submodule, self).__eq__(other)
154        return self._name == other._name
155
156    def __ne__(self, other):
157        """Compare with another submodule for inequality"""
158        return not (self == other)
159
160    def __hash__(self):
161        """Hash this instance using its logical id, not the sha"""
162        return hash(self._name)
163
164    def __str__(self):
165        return self._name
166
167    def __repr__(self):
168        return "git.%s(name=%s, path=%s, url=%s, branch_path=%s)"\
169               % (type(self).__name__, self._name, self.path, self.url, self.branch_path)
170
171    @classmethod
172    def _config_parser(cls, repo, parent_commit, read_only):
173        """:return: Config Parser constrained to our submodule in read or write mode
174        :raise IOError: If the .gitmodules file cannot be found, either locally or in the repository
175            at the given parent commit. Otherwise the exception would be delayed until the first
176            access of the config parser"""
177        parent_matches_head = True
178        if parent_commit is not None:
179            try:
180                parent_matches_head = repo.head.commit == parent_commit
181            except ValueError:
182                # We are most likely in an empty repository, so the HEAD doesn't point to a valid ref
183                pass
184        # end handle parent_commit
185
186        if not repo.bare and parent_matches_head:
187            fp_module = osp.join(repo.working_tree_dir, cls.k_modules_file)
188        else:
189            assert parent_commit is not None, "need valid parent_commit in bare repositories"
190            try:
191                fp_module = cls._sio_modules(parent_commit)
192            except KeyError:
193                raise IOError("Could not find %s file in the tree of parent commit %s" %
194                              (cls.k_modules_file, parent_commit))
195            # END handle exceptions
196        # END handle non-bare working tree
197
198        if not read_only and (repo.bare or not parent_matches_head):
199            raise ValueError("Cannot write blobs of 'historical' submodule configurations")
200        # END handle writes of historical submodules
201
202        return SubmoduleConfigParser(fp_module, read_only=read_only)
203
204    def _clear_cache(self):
205        # clear the possibly changed values
206        for name in self._cache_attrs:
207            try:
208                delattr(self, name)
209            except AttributeError:
210                pass
211            # END try attr deletion
212        # END for each name to delete
213
214    @classmethod
215    def _sio_modules(cls, parent_commit):
216        """:return: Configuration file as BytesIO - we only access it through the respective blob's data"""
217        sio = BytesIO(parent_commit.tree[cls.k_modules_file].data_stream.read())
218        sio.name = cls.k_modules_file
219        return sio
220
221    def _config_parser_constrained(self, read_only):
222        """:return: Config Parser constrained to our submodule in read or write mode"""
223        try:
224            pc = self.parent_commit
225        except ValueError:
226            pc = None
227        # end handle empty parent repository
228        parser = self._config_parser(self.repo, pc, read_only)
229        parser.set_submodule(self)
230        return SectionConstraint(parser, sm_section(self.name))
231
232    @classmethod
233    def _module_abspath(cls, parent_repo, path, name):
234        if cls._need_gitfile_submodules(parent_repo.git):
235            return osp.join(parent_repo.git_dir, 'modules', name)
236        else:
237            return osp.join(parent_repo.working_tree_dir, path)
238        # end
239
240    @classmethod
241    def _clone_repo(cls, repo, url, path, name, **kwargs):
242        """:return: Repo instance of newly cloned repository
243        :param repo: our parent repository
244        :param url: url to clone from
245        :param path: repository-relative path to the submodule checkout location
246        :param name: canonical of the submodule
247        :param kwrags: additinoal arguments given to git.clone"""
248        module_abspath = cls._module_abspath(repo, path, name)
249        module_checkout_path = module_abspath
250        if cls._need_gitfile_submodules(repo.git):
251            kwargs['separate_git_dir'] = module_abspath
252            module_abspath_dir = osp.dirname(module_abspath)
253            if not osp.isdir(module_abspath_dir):
254                os.makedirs(module_abspath_dir)
255            module_checkout_path = osp.join(repo.working_tree_dir, path)
256        # end
257
258        clone = git.Repo.clone_from(url, module_checkout_path, **kwargs)
259        if cls._need_gitfile_submodules(repo.git):
260            cls._write_git_file_and_module_config(module_checkout_path, module_abspath)
261        # end
262        return clone
263
264    @classmethod
265    def _to_relative_path(cls, parent_repo, path):
266        """:return: a path guaranteed  to be relative to the given parent-repository
267        :raise ValueError: if path is not contained in the parent repository's working tree"""
268        path = to_native_path_linux(path)
269        if path.endswith('/'):
270            path = path[:-1]
271        # END handle trailing slash
272
273        if osp.isabs(path):
274            working_tree_linux = to_native_path_linux(parent_repo.working_tree_dir)
275            if not path.startswith(working_tree_linux):
276                raise ValueError("Submodule checkout path '%s' needs to be within the parents repository at '%s'"
277                                 % (working_tree_linux, path))
278            path = path[len(working_tree_linux.rstrip('/')) + 1:]
279            if not path:
280                raise ValueError("Absolute submodule path '%s' didn't yield a valid relative path" % path)
281            # end verify converted relative path makes sense
282        # end convert to a relative path
283
284        return path
285
286    @classmethod
287    def _write_git_file_and_module_config(cls, working_tree_dir, module_abspath):
288        """Writes a .git file containing a (preferably) relative path to the actual git module repository.
289        It is an error if the module_abspath cannot be made into a relative path, relative to the working_tree_dir
290        :note: will overwrite existing files !
291        :note: as we rewrite both the git file as well as the module configuration, we might fail on the configuration
292            and will not roll back changes done to the git file. This should be a non-issue, but may easily be fixed
293            if it becomes one
294        :param working_tree_dir: directory to write the .git file into
295        :param module_abspath: absolute path to the bare repository
296        """
297        git_file = osp.join(working_tree_dir, '.git')
298        rela_path = osp.relpath(module_abspath, start=working_tree_dir)
299        if is_win:
300            if osp.isfile(git_file):
301                os.remove(git_file)
302        with open(git_file, 'wb') as fp:
303            fp.write(("gitdir: %s" % rela_path).encode(defenc))
304
305        with GitConfigParser(osp.join(module_abspath, 'config'),
306                             read_only=False, merge_includes=False) as writer:
307            writer.set_value('core', 'worktree',
308                             to_native_path_linux(osp.relpath(working_tree_dir, start=module_abspath)))
309
310    #{ Edit Interface
311
312    @classmethod
313    def add(cls, repo, name, path, url=None, branch=None, no_checkout=False):
314        """Add a new submodule to the given repository. This will alter the index
315        as well as the .gitmodules file, but will not create a new commit.
316        If the submodule already exists, no matter if the configuration differs
317        from the one provided, the existing submodule will be returned.
318
319        :param repo: Repository instance which should receive the submodule
320        :param name: The name/identifier for the submodule
321        :param path: repository-relative or absolute path at which the submodule
322            should be located
323            It will be created as required during the repository initialization.
324        :param url: git-clone compatible URL, see git-clone reference for more information
325            If None, the repository is assumed to exist, and the url of the first
326            remote is taken instead. This is useful if you want to make an existing
327            repository a submodule of anotherone.
328        :param branch: name of branch at which the submodule should (later) be checked out.
329            The given branch must exist in the remote repository, and will be checked
330            out locally as a tracking branch.
331            It will only be written into the configuration if it not None, which is
332            when the checked out branch will be the one the remote HEAD pointed to.
333            The result you get in these situation is somewhat fuzzy, and it is recommended
334            to specify at least 'master' here.
335            Examples are 'master' or 'feature/new'
336        :param no_checkout: if True, and if the repository has to be cloned manually,
337            no checkout will be performed
338        :return: The newly created submodule instance
339        :note: works atomically, such that no change will be done if the repository
340            update fails for instance"""
341        if repo.bare:
342            raise InvalidGitRepositoryError("Cannot add submodules to bare repositories")
343        # END handle bare repos
344
345        path = cls._to_relative_path(repo, path)
346
347        # assure we never put backslashes into the url, as some operating systems
348        # like it ...
349        if url is not None:
350            url = to_native_path_linux(url)
351        # END assure url correctness
352
353        # INSTANTIATE INTERMEDIATE SM
354        sm = cls(repo, cls.NULL_BIN_SHA, cls.k_default_mode, path, name, url='invalid-temporary')
355        if sm.exists():
356            # reretrieve submodule from tree
357            try:
358                sm = repo.head.commit.tree[path]
359                sm._name = name
360                return sm
361            except KeyError:
362                # could only be in index
363                index = repo.index
364                entry = index.entries[index.entry_key(path, 0)]
365                sm.binsha = entry.binsha
366                return sm
367            # END handle exceptions
368        # END handle existing
369
370        # fake-repo - we only need the functionality on the branch instance
371        br = git.Head(repo, git.Head.to_full_path(str(branch) or cls.k_head_default))
372        has_module = sm.module_exists()
373        branch_is_default = branch is None
374        if has_module and url is not None:
375            if url not in [r.url for r in sm.module().remotes]:
376                raise ValueError(
377                    "Specified URL '%s' does not match any remote url of the repository at '%s'" % (url, sm.abspath))
378            # END check url
379        # END verify urls match
380
381        mrepo = None
382        if url is None:
383            if not has_module:
384                raise ValueError("A URL was not given and existing repository did not exsit at %s" % path)
385            # END check url
386            mrepo = sm.module()
387            urls = [r.url for r in mrepo.remotes]
388            if not urls:
389                raise ValueError("Didn't find any remote url in repository at %s" % sm.abspath)
390            # END verify we have url
391            url = urls[0]
392        else:
393            # clone new repo
394            kwargs = {'n': no_checkout}
395            if not branch_is_default:
396                kwargs['b'] = br.name
397            # END setup checkout-branch
398
399            # _clone_repo(cls, repo, url, path, name, **kwargs):
400            mrepo = cls._clone_repo(repo, url, path, name, **kwargs)
401        # END verify url
402
403        ## See #525 for ensuring git urls in config-files valid under Windows.
404        url = Git.polish_url(url)
405
406        # It's important to add the URL to the parent config, to let `git submodule` know.
407        # otherwise there is a '-' character in front of the submodule listing
408        #  a38efa84daef914e4de58d1905a500d8d14aaf45 mymodule (v0.9.0-1-ga38efa8)
409        # -a38efa84daef914e4de58d1905a500d8d14aaf45 submodules/intermediate/one
410        with sm.repo.config_writer() as writer:
411            writer.set_value(sm_section(name), 'url', url)
412
413        # update configuration and index
414        index = sm.repo.index
415        with sm.config_writer(index=index, write=False) as writer:
416            writer.set_value('url', url)
417            writer.set_value('path', path)
418
419            sm._url = url
420            if not branch_is_default:
421                # store full path
422                writer.set_value(cls.k_head_option, br.path)
423                sm._branch_path = br.path
424
425        # we deliberately assume that our head matches our index !
426        sm.binsha = mrepo.head.commit.binsha
427        index.add([sm], write=True)
428
429        return sm
430
431    def update(self, recursive=False, init=True, to_latest_revision=False, progress=None, dry_run=False,
432               force=False, keep_going=False):
433        """Update the repository of this submodule to point to the checkout
434        we point at with the binsha of this instance.
435
436        :param recursive: if True, we will operate recursively and update child-
437            modules as well.
438        :param init: if True, the module repository will be cloned into place if necessary
439        :param to_latest_revision: if True, the submodule's sha will be ignored during checkout.
440            Instead, the remote will be fetched, and the local tracking branch updated.
441            This only works if we have a local tracking branch, which is the case
442            if the remote repository had a master branch, or of the 'branch' option
443            was specified for this submodule and the branch existed remotely
444        :param progress: UpdateProgress instance or None if no progress should be shown
445        :param dry_run: if True, the operation will only be simulated, but not performed.
446            All performed operations are read-only
447        :param force:
448            If True, we may reset heads even if the repository in question is dirty. Additinoally we will be allowed
449            to set a tracking branch which is ahead of its remote branch back into the past or the location of the
450            remote branch. This will essentially 'forget' commits.
451            If False, local tracking branches that are in the future of their respective remote branches will simply
452            not be moved.
453        :param keep_going: if True, we will ignore but log all errors, and keep going recursively.
454            Unless dry_run is set as well, keep_going could cause subsequent/inherited errors you wouldn't see
455            otherwise.
456            In conjunction with dry_run, it can be useful to anticipate all errors when updating submodules
457        :note: does nothing in bare repositories
458        :note: method is definitely not atomic if recurisve is True
459        :return: self"""
460        if self.repo.bare:
461            return self
462        # END pass in bare mode
463
464        if progress is None:
465            progress = UpdateProgress()
466        # END handle progress
467        prefix = ''
468        if dry_run:
469            prefix = "DRY-RUN: "
470        # END handle prefix
471
472        # to keep things plausible in dry-run mode
473        if dry_run:
474            mrepo = None
475        # END init mrepo
476
477        try:
478            # ASSURE REPO IS PRESENT AND UPTODATE
479            #####################################
480            try:
481                mrepo = self.module()
482                rmts = mrepo.remotes
483                len_rmts = len(rmts)
484                for i, remote in enumerate(rmts):
485                    op = FETCH
486                    if i == 0:
487                        op |= BEGIN
488                    # END handle start
489
490                    progress.update(op, i, len_rmts, prefix + "Fetching remote %s of submodule %r"
491                                    % (remote, self.name))
492                    #===============================
493                    if not dry_run:
494                        remote.fetch(progress=progress)
495                    # END handle dry-run
496                    #===============================
497                    if i == len_rmts - 1:
498                        op |= END
499                    # END handle end
500                    progress.update(op, i, len_rmts, prefix + "Done fetching remote of submodule %r" % self.name)
501                # END fetch new data
502            except InvalidGitRepositoryError:
503                if not init:
504                    return self
505                # END early abort if init is not allowed
506
507                # there is no git-repository yet - but delete empty paths
508                checkout_module_abspath = self.abspath
509                if not dry_run and osp.isdir(checkout_module_abspath):
510                    try:
511                        os.rmdir(checkout_module_abspath)
512                    except OSError:
513                        raise OSError("Module directory at %r does already exist and is non-empty"
514                                      % checkout_module_abspath)
515                    # END handle OSError
516                # END handle directory removal
517
518                # don't check it out at first - nonetheless it will create a local
519                # branch according to the remote-HEAD if possible
520                progress.update(BEGIN | CLONE, 0, 1, prefix + "Cloning url '%s' to '%s' in submodule %r" %
521                                (self.url, checkout_module_abspath, self.name))
522                if not dry_run:
523                    mrepo = self._clone_repo(self.repo, self.url, self.path, self.name, n=True)
524                # END handle dry-run
525                progress.update(END | CLONE, 0, 1, prefix + "Done cloning to %s" % checkout_module_abspath)
526
527                if not dry_run:
528                    # see whether we have a valid branch to checkout
529                    try:
530                        # find  a remote which has our branch - we try to be flexible
531                        remote_branch = find_first_remote_branch(mrepo.remotes, self.branch_name)
532                        local_branch = mkhead(mrepo, self.branch_path)
533
534                        # have a valid branch, but no checkout - make sure we can figure
535                        # that out by marking the commit with a null_sha
536                        local_branch.set_object(Object(mrepo, self.NULL_BIN_SHA))
537                        # END initial checkout + branch creation
538
539                        # make sure HEAD is not detached
540                        mrepo.head.set_reference(local_branch, logmsg="submodule: attaching head to %s" % local_branch)
541                        mrepo.head.ref.set_tracking_branch(remote_branch)
542                    except (IndexError, InvalidGitRepositoryError):
543                        log.warn("Failed to checkout tracking branch %s", self.branch_path)
544                    # END handle tracking branch
545
546                    # NOTE: Have to write the repo config file as well, otherwise
547                    # the default implementation will be offended and not update the repository
548                    # Maybe this is a good way to assure it doesn't get into our way, but
549                    # we want to stay backwards compatible too ... . Its so redundant !
550                    with self.repo.config_writer() as writer:
551                        writer.set_value(sm_section(self.name), 'url', self.url)
552                # END handle dry_run
553            # END handle initialization
554
555            # DETERMINE SHAS TO CHECKOUT
556            ############################
557            binsha = self.binsha
558            hexsha = self.hexsha
559            if mrepo is not None:
560                # mrepo is only set if we are not in dry-run mode or if the module existed
561                is_detached = mrepo.head.is_detached
562            # END handle dry_run
563
564            if mrepo is not None and to_latest_revision:
565                msg_base = "Cannot update to latest revision in repository at %r as " % mrepo.working_dir
566                if not is_detached:
567                    rref = mrepo.head.ref.tracking_branch()
568                    if rref is not None:
569                        rcommit = rref.commit
570                        binsha = rcommit.binsha
571                        hexsha = rcommit.hexsha
572                    else:
573                        log.error("%s a tracking branch was not set for local branch '%s'", msg_base, mrepo.head.ref)
574                    # END handle remote ref
575                else:
576                    log.error("%s there was no local tracking branch", msg_base)
577                # END handle detached head
578            # END handle to_latest_revision option
579
580            # update the working tree
581            # handles dry_run
582            if mrepo is not None and mrepo.head.commit.binsha != binsha:
583                # We must assure that our destination sha (the one to point to) is in the future of our current head.
584                # Otherwise, we will reset changes that might have been done on the submodule, but were not yet pushed
585                # We also handle the case that history has been rewritten, leaving no merge-base. In that case
586                # we behave conservatively, protecting possible changes the user had done
587                may_reset = True
588                if mrepo.head.commit.binsha != self.NULL_BIN_SHA:
589                    base_commit = mrepo.merge_base(mrepo.head.commit, hexsha)
590                    if len(base_commit) == 0 or base_commit[0].hexsha == hexsha:
591                        if force:
592                            msg = "Will force checkout or reset on local branch that is possibly in the future of"
593                            msg += "the commit it will be checked out to, effectively 'forgetting' new commits"
594                            log.debug(msg)
595                        else:
596                            msg = "Skipping %s on branch '%s' of submodule repo '%s' as it contains un-pushed commits"
597                            msg %= (is_detached and "checkout" or "reset", mrepo.head, mrepo)
598                            log.info(msg)
599                            may_reset = False
600                        # end handle force
601                    # end handle if we are in the future
602
603                    if may_reset and not force and mrepo.is_dirty(index=True, working_tree=True, untracked_files=True):
604                        raise RepositoryDirtyError(mrepo, "Cannot reset a dirty repository")
605                    # end handle force and dirty state
606                # end handle empty repo
607
608                # end verify future/past
609                progress.update(BEGIN | UPDWKTREE, 0, 1, prefix +
610                                "Updating working tree at %s for submodule %r to revision %s"
611                                % (self.path, self.name, hexsha))
612
613                if not dry_run and may_reset:
614                    if is_detached:
615                        # NOTE: for now we force, the user is no supposed to change detached
616                        # submodules anyway. Maybe at some point this becomes an option, to
617                        # properly handle user modifications - see below for future options
618                        # regarding rebase and merge.
619                        mrepo.git.checkout(hexsha, force=force)
620                    else:
621                        mrepo.head.reset(hexsha, index=True, working_tree=True)
622                    # END handle checkout
623                # if we may reset/checkout
624                progress.update(END | UPDWKTREE, 0, 1, prefix + "Done updating working tree for submodule %r"
625                                % self.name)
626            # END update to new commit only if needed
627        except Exception as err:
628            if not keep_going:
629                raise
630            log.error(str(err))
631        # end handle keep_going
632
633        # HANDLE RECURSION
634        ##################
635        if recursive:
636            # in dry_run mode, the module might not exist
637            if mrepo is not None:
638                for submodule in self.iter_items(self.module()):
639                    submodule.update(recursive, init, to_latest_revision, progress=progress, dry_run=dry_run,
640                                     force=force, keep_going=keep_going)
641                # END handle recursive update
642            # END handle dry run
643        # END for each submodule
644
645        return self
646
647    @unbare_repo
648    def move(self, module_path, configuration=True, module=True):
649        """Move the submodule to a another module path. This involves physically moving
650        the repository at our current path, changing the configuration, as well as
651        adjusting our index entry accordingly.
652
653        :param module_path: the path to which to move our module in the parent repostory's working tree,
654            given as repository-relative or absolute path. Intermediate directories will be created
655            accordingly. If the path already exists, it must be empty.
656            Trailing (back)slashes are removed automatically
657        :param configuration: if True, the configuration will be adjusted to let
658            the submodule point to the given path.
659        :param module: if True, the repository managed by this submodule
660            will be moved as well. If False, we don't move the submodule's checkout, which may leave
661            the parent repository in an inconsistent state.
662        :return: self
663        :raise ValueError: if the module path existed and was not empty, or was a file
664        :note: Currently the method is not atomic, and it could leave the repository
665            in an inconsistent state if a sub-step fails for some reason
666        """
667        if module + configuration < 1:
668            raise ValueError("You must specify to move at least the module or the configuration of the submodule")
669        # END handle input
670
671        module_checkout_path = self._to_relative_path(self.repo, module_path)
672
673        # VERIFY DESTINATION
674        if module_checkout_path == self.path:
675            return self
676        # END handle no change
677
678        module_checkout_abspath = join_path_native(self.repo.working_tree_dir, module_checkout_path)
679        if osp.isfile(module_checkout_abspath):
680            raise ValueError("Cannot move repository onto a file: %s" % module_checkout_abspath)
681        # END handle target files
682
683        index = self.repo.index
684        tekey = index.entry_key(module_checkout_path, 0)
685        # if the target item already exists, fail
686        if configuration and tekey in index.entries:
687            raise ValueError("Index entry for target path did already exist")
688        # END handle index key already there
689
690        # remove existing destination
691        if module:
692            if osp.exists(module_checkout_abspath):
693                if len(os.listdir(module_checkout_abspath)):
694                    raise ValueError("Destination module directory was not empty")
695                # END handle non-emptiness
696
697                if osp.islink(module_checkout_abspath):
698                    os.remove(module_checkout_abspath)
699                else:
700                    os.rmdir(module_checkout_abspath)
701                # END handle link
702            else:
703                # recreate parent directories
704                # NOTE: renames() does that now
705                pass
706            # END handle existence
707        # END handle module
708
709        # move the module into place if possible
710        cur_path = self.abspath
711        renamed_module = False
712        if module and osp.exists(cur_path):
713            os.renames(cur_path, module_checkout_abspath)
714            renamed_module = True
715
716            if osp.isfile(osp.join(module_checkout_abspath, '.git')):
717                module_abspath = self._module_abspath(self.repo, self.path, self.name)
718                self._write_git_file_and_module_config(module_checkout_abspath, module_abspath)
719            # end handle git file rewrite
720        # END move physical module
721
722        # rename the index entry - have to manipulate the index directly as
723        # git-mv cannot be used on submodules ... yeah
724        previous_sm_path = self.path
725        try:
726            if configuration:
727                try:
728                    ekey = index.entry_key(self.path, 0)
729                    entry = index.entries[ekey]
730                    del(index.entries[ekey])
731                    nentry = git.IndexEntry(entry[:3] + (module_checkout_path,) + entry[4:])
732                    index.entries[tekey] = nentry
733                except KeyError:
734                    raise InvalidGitRepositoryError("Submodule's entry at %r did not exist" % (self.path))
735                # END handle submodule doesn't exist
736
737                # update configuration
738                with self.config_writer(index=index) as writer:        # auto-write
739                    writer.set_value('path', module_checkout_path)
740                    self.path = module_checkout_path
741            # END handle configuration flag
742        except Exception:
743            if renamed_module:
744                os.renames(module_checkout_abspath, cur_path)
745            # END undo module renaming
746            raise
747        # END handle undo rename
748
749        # Auto-rename submodule if it's name was 'default', that is, the checkout directory
750        if previous_sm_path == self.name:
751            self.rename(module_checkout_path)
752        # end
753
754        return self
755
756    @unbare_repo
757    def remove(self, module=True, force=False, configuration=True, dry_run=False):
758        """Remove this submodule from the repository. This will remove our entry
759        from the .gitmodules file and the entry in the .git/config file.
760
761        :param module: If True, the module checkout we point to will be deleted
762            as well. If the module is currently on a commit which is not part
763            of any branch in the remote, if the currently checked out branch
764            working tree, or untracked files,
765            is ahead of its tracking branch,  if you have modifications in the
766            In case the removal of the repository fails for these reasons, the
767            submodule status will not have been altered.
768            If this submodule has child-modules on its own, these will be deleted
769            prior to touching the own module.
770        :param force: Enforces the deletion of the module even though it contains
771            modifications. This basically enforces a brute-force file system based
772            deletion.
773        :param configuration: if True, the submodule is deleted from the configuration,
774            otherwise it isn't. Although this should be enabled most of the times,
775            this flag enables you to safely delete the repository of your submodule.
776        :param dry_run: if True, we will not actually do anything, but throw the errors
777            we would usually throw
778        :return: self
779        :note: doesn't work in bare repositories
780        :note: doesn't work atomically, as failure to remove any part of the submodule will leave
781            an inconsistent state
782        :raise InvalidGitRepositoryError: thrown if the repository cannot be deleted
783        :raise OSError: if directories or files could not be removed"""
784        if not (module or configuration):
785            raise ValueError("Need to specify to delete at least the module, or the configuration")
786        # END handle parameters
787
788        # Recursively remove children of this submodule
789        nc = 0
790        for csm in self.children():
791            nc += 1
792            csm.remove(module, force, configuration, dry_run)
793            del(csm)
794        # end
795        if configuration and not dry_run and nc > 0:
796            # Assure we don't leave the parent repository in a dirty state, and commit our changes
797            # It's important for recursive, unforced, deletions to work as expected
798            self.module().index.commit("Removed at least one of child-modules of '%s'" % self.name)
799        # end handle recursion
800
801        # DELETE REPOSITORY WORKING TREE
802        ################################
803        if module and self.module_exists():
804            mod = self.module()
805            git_dir = mod.git_dir
806            if force:
807                # take the fast lane and just delete everything in our module path
808                # TODO: If we run into permission problems, we have a highly inconsistent
809                # state. Delete the .git folders last, start with the submodules first
810                mp = self.abspath
811                method = None
812                if osp.islink(mp):
813                    method = os.remove
814                elif osp.isdir(mp):
815                    method = rmtree
816                elif osp.exists(mp):
817                    raise AssertionError("Cannot forcibly delete repository as it was neither a link, nor a directory")
818                # END handle brutal deletion
819                if not dry_run:
820                    assert method
821                    method(mp)
822                # END apply deletion method
823            else:
824                # verify we may delete our module
825                if mod.is_dirty(index=True, working_tree=True, untracked_files=True):
826                    raise InvalidGitRepositoryError(
827                        "Cannot delete module at %s with any modifications, unless force is specified"
828                        % mod.working_tree_dir)
829                # END check for dirt
830
831                # figure out whether we have new commits compared to the remotes
832                # NOTE: If the user pulled all the time, the remote heads might
833                # not have been updated, so commits coming from the remote look
834                # as if they come from us. But we stay strictly read-only and
835                # don't fetch beforehand.
836                for remote in mod.remotes:
837                    num_branches_with_new_commits = 0
838                    rrefs = remote.refs
839                    for rref in rrefs:
840                        num_branches_with_new_commits += len(mod.git.cherry(rref)) != 0
841                    # END for each remote ref
842                    # not a single remote branch contained all our commits
843                    if len(rrefs) and num_branches_with_new_commits == len(rrefs):
844                        raise InvalidGitRepositoryError(
845                            "Cannot delete module at %s as there are new commits" % mod.working_tree_dir)
846                    # END handle new commits
847                    # have to manually delete references as python's scoping is
848                    # not existing, they could keep handles open ( on windows this is a problem )
849                    if len(rrefs):
850                        del(rref)
851                    # END handle remotes
852                    del(rrefs)
853                    del(remote)
854                # END for each remote
855
856                # finally delete our own submodule
857                if not dry_run:
858                    self._clear_cache()
859                    wtd = mod.working_tree_dir
860                    del(mod)        # release file-handles (windows)
861                    import gc
862                    gc.collect()
863                    try:
864                        rmtree(wtd)
865                    except Exception as ex:
866                        if HIDE_WINDOWS_KNOWN_ERRORS:
867                            raise SkipTest("FIXME: fails with: PermissionError\n  %s", ex)
868                        else:
869                            raise
870                # END delete tree if possible
871            # END handle force
872
873            if not dry_run and osp.isdir(git_dir):
874                self._clear_cache()
875                try:
876                    rmtree(git_dir)
877                except Exception as ex:
878                    if HIDE_WINDOWS_KNOWN_ERRORS:
879                        raise SkipTest("FIXME: fails with: PermissionError\n  %s", ex)
880                    else:
881                        raise
882            # end handle separate bare repository
883        # END handle module deletion
884
885        # void our data not to delay invalid access
886        if not dry_run:
887            self._clear_cache()
888
889        # DELETE CONFIGURATION
890        ######################
891        if configuration and not dry_run:
892            # first the index-entry
893            parent_index = self.repo.index
894            try:
895                del(parent_index.entries[parent_index.entry_key(self.path, 0)])
896            except KeyError:
897                pass
898            # END delete entry
899            parent_index.write()
900
901            # now git config - need the config intact, otherwise we can't query
902            # information anymore
903            with self.repo.config_writer() as writer:
904                writer.remove_section(sm_section(self.name))
905
906            with self.config_writer() as writer:
907                writer.remove_section()
908        # END delete configuration
909
910        return self
911
912    def set_parent_commit(self, commit, check=True):
913        """Set this instance to use the given commit whose tree is supposed to
914        contain the .gitmodules blob.
915
916        :param commit:
917            Commit'ish reference pointing at the root_tree, or None to always point to the
918            most recent commit
919        :param check:
920            if True, relatively expensive checks will be performed to verify
921            validity of the submodule.
922        :raise ValueError: if the commit's tree didn't contain the .gitmodules blob.
923        :raise ValueError:
924            if the parent commit didn't store this submodule under the current path
925        :return: self"""
926        if commit is None:
927            self._parent_commit = None
928            return self
929        # end handle None
930        pcommit = self.repo.commit(commit)
931        pctree = pcommit.tree
932        if self.k_modules_file not in pctree:
933            raise ValueError("Tree of commit %s did not contain the %s file" % (commit, self.k_modules_file))
934        # END handle exceptions
935
936        prev_pc = self._parent_commit
937        self._parent_commit = pcommit
938
939        if check:
940            parser = self._config_parser(self.repo, self._parent_commit, read_only=True)
941            if not parser.has_section(sm_section(self.name)):
942                self._parent_commit = prev_pc
943                raise ValueError("Submodule at path %r did not exist in parent commit %s" % (self.path, commit))
944            # END handle submodule did not exist
945        # END handle checking mode
946
947        # update our sha, it could have changed
948        # If check is False, we might see a parent-commit that doesn't even contain the submodule anymore.
949        # in that case, mark our sha as being NULL
950        try:
951            self.binsha = pctree[self.path].binsha
952        except KeyError:
953            self.binsha = self.NULL_BIN_SHA
954        # end
955
956        self._clear_cache()
957        return self
958
959    @unbare_repo
960    def config_writer(self, index=None, write=True):
961        """:return: a config writer instance allowing you to read and write the data
962        belonging to this submodule into the .gitmodules file.
963
964        :param index: if not None, an IndexFile instance which should be written.
965            defaults to the index of the Submodule's parent repository.
966        :param write: if True, the index will be written each time a configuration
967            value changes.
968        :note: the parameters allow for a more efficient writing of the index,
969            as you can pass in a modified index on your own, prevent automatic writing,
970            and write yourself once the whole operation is complete
971        :raise ValueError: if trying to get a writer on a parent_commit which does not
972            match the current head commit
973        :raise IOError: If the .gitmodules file/blob could not be read"""
974        writer = self._config_parser_constrained(read_only=False)
975        if index is not None:
976            writer.config._index = index
977        writer.config._auto_write = write
978        return writer
979
980    @unbare_repo
981    def rename(self, new_name):
982        """Rename this submodule
983        :note: This method takes care of renaming the submodule in various places, such as
984
985            * $parent_git_dir/config
986            * $working_tree_dir/.gitmodules
987            * (git >=v1.8.0: move submodule repository to new name)
988
989        As .gitmodules will be changed, you would need to make a commit afterwards. The changed .gitmodules file
990        will already be added to the index
991
992        :return: this submodule instance
993        """
994        if self.name == new_name:
995            return self
996
997        # .git/config
998        with self.repo.config_writer() as pw:
999            # As we ourselves didn't write anything about submodules into the parent .git/config,
1000            # we will not require it to exist, and just ignore missing entries.
1001            if pw.has_section(sm_section(self.name)):
1002                pw.rename_section(sm_section(self.name), sm_section(new_name))
1003
1004        # .gitmodules
1005        with self.config_writer(write=True).config as cw:
1006            cw.rename_section(sm_section(self.name), sm_section(new_name))
1007
1008        self._name = new_name
1009
1010        # .git/modules
1011        mod = self.module()
1012        if mod.has_separate_working_tree():
1013            destination_module_abspath = self._module_abspath(self.repo, self.path, new_name)
1014            source_dir = mod.git_dir
1015            # Let's be sure the submodule name is not so obviously tied to a directory
1016            if destination_module_abspath.startswith(mod.git_dir):
1017                tmp_dir = self._module_abspath(self.repo, self.path, str(uuid.uuid4()))
1018                os.renames(source_dir, tmp_dir)
1019                source_dir = tmp_dir
1020            # end handle self-containment
1021            os.renames(source_dir, destination_module_abspath)
1022            self._write_git_file_and_module_config(mod.working_tree_dir, destination_module_abspath)
1023        # end move separate git repository
1024
1025        return self
1026
1027    #} END edit interface
1028
1029    #{ Query Interface
1030
1031    @unbare_repo
1032    def module(self):
1033        """:return: Repo instance initialized from the repository at our submodule path
1034        :raise InvalidGitRepositoryError: if a repository was not available. This could
1035            also mean that it was not yet initialized"""
1036        # late import to workaround circular dependencies
1037        module_checkout_abspath = self.abspath
1038        try:
1039            repo = git.Repo(module_checkout_abspath)
1040            if repo != self.repo:
1041                return repo
1042            # END handle repo uninitialized
1043        except (InvalidGitRepositoryError, NoSuchPathError):
1044            raise InvalidGitRepositoryError("No valid repository at %s" % module_checkout_abspath)
1045        else:
1046            raise InvalidGitRepositoryError("Repository at %r was not yet checked out" % module_checkout_abspath)
1047        # END handle exceptions
1048
1049    def module_exists(self):
1050        """:return: True if our module exists and is a valid git repository. See module() method"""
1051        try:
1052            self.module()
1053            return True
1054        except Exception:
1055            return False
1056        # END handle exception
1057
1058    def exists(self):
1059        """
1060        :return: True if the submodule exists, False otherwise. Please note that
1061            a submodule may exist (in the .gitmodules file) even though its module
1062            doesn't exist on disk"""
1063        # keep attributes for later, and restore them if we have no valid data
1064        # this way we do not actually alter the state of the object
1065        loc = locals()
1066        for attr in self._cache_attrs:
1067            try:
1068                if hasattr(self, attr):
1069                    loc[attr] = getattr(self, attr)
1070                # END if we have the attribute cache
1071            except (cp.NoSectionError, ValueError):
1072                # on PY3, this can happen apparently ... don't know why this doesn't happen on PY2
1073                pass
1074        # END for each attr
1075        self._clear_cache()
1076
1077        try:
1078            try:
1079                self.path
1080                return True
1081            except Exception:
1082                return False
1083            # END handle exceptions
1084        finally:
1085            for attr in self._cache_attrs:
1086                if attr in loc:
1087                    setattr(self, attr, loc[attr])
1088                # END if we have a cache
1089            # END reapply each attribute
1090        # END handle object state consistency
1091
1092    @property
1093    def branch(self):
1094        """:return: The branch instance that we are to checkout
1095        :raise InvalidGitRepositoryError: if our module is not yet checked out"""
1096        return mkhead(self.module(), self._branch_path)
1097
1098    @property
1099    def branch_path(self):
1100        """
1101        :return: full (relative) path as string to the branch we would checkout
1102            from the remote and track"""
1103        return self._branch_path
1104
1105    @property
1106    def branch_name(self):
1107        """:return: the name of the branch, which is the shortest possible branch name"""
1108        # use an instance method, for this we create a temporary Head instance
1109        # which uses a repository that is available at least ( it makes no difference )
1110        return git.Head(self.repo, self._branch_path).name
1111
1112    @property
1113    def url(self):
1114        """:return: The url to the repository which our module-repository refers to"""
1115        return self._url
1116
1117    @property
1118    def parent_commit(self):
1119        """:return: Commit instance with the tree containing the .gitmodules file
1120        :note: will always point to the current head's commit if it was not set explicitly"""
1121        if self._parent_commit is None:
1122            return self.repo.commit()
1123        return self._parent_commit
1124
1125    @property
1126    def name(self):
1127        """:return: The name of this submodule. It is used to identify it within the
1128            .gitmodules file.
1129        :note: by default, the name is the path at which to find the submodule, but
1130            in git-python it should be a unique identifier similar to the identifiers
1131            used for remotes, which allows to change the path of the submodule
1132            easily
1133        """
1134        return self._name
1135
1136    def config_reader(self):
1137        """
1138        :return: ConfigReader instance which allows you to qurey the configuration values
1139            of this submodule, as provided by the .gitmodules file
1140        :note: The config reader will actually read the data directly from the repository
1141            and thus does not need nor care about your working tree.
1142        :note: Should be cached by the caller and only kept as long as needed
1143        :raise IOError: If the .gitmodules file/blob could not be read"""
1144        return self._config_parser_constrained(read_only=True)
1145
1146    def children(self):
1147        """
1148        :return: IterableList(Submodule, ...) an iterable list of submodules instances
1149            which are children of this submodule or 0 if the submodule is not checked out"""
1150        return self._get_intermediate_items(self)
1151
1152    #} END query interface
1153
1154    #{ Iterable Interface
1155
1156    @classmethod
1157    def iter_items(cls, repo, parent_commit='HEAD'):
1158        """:return: iterator yielding Submodule instances available in the given repository"""
1159        pc = repo.commit(parent_commit)         # parent commit instance
1160        try:
1161            parser = cls._config_parser(repo, pc, read_only=True)
1162        except IOError:
1163            raise StopIteration
1164        # END handle empty iterator
1165
1166        rt = pc.tree                                # root tree
1167
1168        for sms in parser.sections():
1169            n = sm_name(sms)
1170            p = parser.get(sms, 'path')
1171            u = parser.get(sms, 'url')
1172            b = cls.k_head_default
1173            if parser.has_option(sms, cls.k_head_option):
1174                b = str(parser.get(sms, cls.k_head_option))
1175            # END handle optional information
1176
1177            # get the binsha
1178            index = repo.index
1179            try:
1180                sm = rt[p]
1181            except KeyError:
1182                # try the index, maybe it was just added
1183                try:
1184                    entry = index.entries[index.entry_key(p, 0)]
1185                    sm = Submodule(repo, entry.binsha, entry.mode, entry.path)
1186                except KeyError:
1187                    raise InvalidGitRepositoryError(
1188                        "Gitmodule path %r did not exist in revision of parent commit %s" % (p, parent_commit))
1189                # END handle keyerror
1190            # END handle critical error
1191
1192            # fill in remaining info - saves time as it doesn't have to be parsed again
1193            sm._name = n
1194            if pc != repo.commit():
1195                sm._parent_commit = pc
1196            # end set only if not most recent !
1197            sm._branch_path = git.Head.to_full_path(b)
1198            sm._url = u
1199
1200            yield sm
1201        # END for each section
1202
1203    #} END iterable interface
1204