1# index.py
2# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
3#
4# This module is part of GitPython and is released under
5# the BSD License: http://www.opensource.org/licenses/bsd-license.php
6import glob
7from io import BytesIO
8import os
9from stat import S_ISLNK
10import subprocess
11import tempfile
12
13from git.compat import (
14    izip,
15    xrange,
16    string_types,
17    force_bytes,
18    defenc,
19    mviter,
20)
21from git.exc import (
22    GitCommandError,
23    CheckoutError,
24    InvalidGitRepositoryError
25)
26from git.objects import (
27    Blob,
28    Submodule,
29    Tree,
30    Object,
31    Commit,
32)
33from git.objects.util import Serializable
34from git.util import (
35    LazyMixin,
36    LockedFD,
37    join_path_native,
38    file_contents_ro,
39    to_native_path_linux,
40    unbare_repo,
41    to_bin_sha
42)
43from gitdb.base import IStream
44from gitdb.db import MemoryDB
45
46import git.diff as diff
47import os.path as osp
48
49from .fun import (
50    entry_key,
51    write_cache,
52    read_cache,
53    aggressive_tree_merge,
54    write_tree_from_cache,
55    stat_mode_to_index_mode,
56    S_IFGITLINK,
57    run_commit_hook
58)
59from .typ import (
60    BaseIndexEntry,
61    IndexEntry,
62)
63from .util import (
64    TemporaryFileSwap,
65    post_clear_cache,
66    default_index,
67    git_working_dir
68)
69
70
71__all__ = ('IndexFile', 'CheckoutError')
72
73
74class IndexFile(LazyMixin, diff.Diffable, Serializable):
75
76    """
77    Implements an Index that can be manipulated using a native implementation in
78    order to save git command function calls wherever possible.
79
80    It provides custom merging facilities allowing to merge without actually changing
81    your index or your working tree. This way you can perform own test-merges based
82    on the index only without having to deal with the working copy. This is useful
83    in case of partial working trees.
84
85    ``Entries``
86
87    The index contains an entries dict whose keys are tuples of type IndexEntry
88    to facilitate access.
89
90    You may read the entries dict or manipulate it using IndexEntry instance, i.e.::
91
92        index.entries[index.entry_key(index_entry_instance)] = index_entry_instance
93
94    Make sure you use index.write() once you are done manipulating the index directly
95    before operating on it using the git command"""
96    __slots__ = ("repo", "version", "entries", "_extension_data", "_file_path")
97    _VERSION = 2            # latest version we support
98    S_IFGITLINK = S_IFGITLINK  # a submodule
99
100    def __init__(self, repo, file_path=None):
101        """Initialize this Index instance, optionally from the given ``file_path``.
102        If no file_path is given, we will be created from the current index file.
103
104        If a stream is not given, the stream will be initialized from the current
105        repository's index on demand."""
106        self.repo = repo
107        self.version = self._VERSION
108        self._extension_data = b''
109        self._file_path = file_path or self._index_path()
110
111    def _set_cache_(self, attr):
112        if attr == "entries":
113            # read the current index
114            # try memory map for speed
115            lfd = LockedFD(self._file_path)
116            ok = False
117            try:
118                fd = lfd.open(write=False, stream=False)
119                ok = True
120            except OSError:
121                # in new repositories, there may be no index, which means we are empty
122                self.entries = {}
123                return
124            finally:
125                if not ok:
126                    lfd.rollback()
127            # END exception handling
128
129            stream = file_contents_ro(fd, stream=True, allow_mmap=True)
130
131            try:
132                self._deserialize(stream)
133            finally:
134                lfd.rollback()
135                # The handles will be closed on destruction
136            # END read from default index on demand
137        else:
138            super(IndexFile, self)._set_cache_(attr)
139
140    def _index_path(self):
141        return join_path_native(self.repo.git_dir, "index")
142
143    @property
144    def path(self):
145        """ :return: Path to the index file we are representing """
146        return self._file_path
147
148    def _delete_entries_cache(self):
149        """Safely clear the entries cache so it can be recreated"""
150        try:
151            del(self.entries)
152        except AttributeError:
153            # fails in python 2.6.5 with this exception
154            pass
155        # END exception handling
156
157    #{ Serializable Interface
158
159    def _deserialize(self, stream):
160        """Initialize this instance with index values read from the given stream"""
161        self.version, self.entries, self._extension_data, conten_sha = read_cache(stream)  # @UnusedVariable
162        return self
163
164    def _entries_sorted(self):
165        """:return: list of entries, in a sorted fashion, first by path, then by stage"""
166        return sorted(self.entries.values(), key=lambda e: (e.path, e.stage))
167
168    def _serialize(self, stream, ignore_extension_data=False):
169        entries = self._entries_sorted()
170        extension_data = self._extension_data
171        if ignore_extension_data:
172            extension_data = None
173        write_cache(entries, stream, extension_data)
174        return self
175
176    #} END serializable interface
177
178    def write(self, file_path=None, ignore_extension_data=False):
179        """Write the current state to our file path or to the given one
180
181        :param file_path:
182            If None, we will write to our stored file path from which we have
183            been initialized. Otherwise we write to the given file path.
184            Please note that this will change the file_path of this index to
185            the one you gave.
186
187        :param ignore_extension_data:
188            If True, the TREE type extension data read in the index will not
189            be written to disk. NOTE that no extension data is actually written.
190            Use this if you have altered the index and
191            would like to use git-write-tree afterwards to create a tree
192            representing your written changes.
193            If this data is present in the written index, git-write-tree
194            will instead write the stored/cached tree.
195            Alternatively, use IndexFile.write_tree() to handle this case
196            automatically
197
198        :return: self"""
199        # make sure we have our entries read before getting a write lock
200        # else it would be done when streaming. This can happen
201        # if one doesn't change the index, but writes it right away
202        self.entries
203        lfd = LockedFD(file_path or self._file_path)
204        stream = lfd.open(write=True, stream=True)
205
206        ok = False
207        try:
208            self._serialize(stream, ignore_extension_data)
209            ok = True
210        finally:
211            if not ok:
212                lfd.rollback()
213
214        lfd.commit()
215
216        # make sure we represent what we have written
217        if file_path is not None:
218            self._file_path = file_path
219
220    @post_clear_cache
221    @default_index
222    def merge_tree(self, rhs, base=None):
223        """Merge the given rhs treeish into the current index, possibly taking
224        a common base treeish into account.
225
226        As opposed to the from_tree_ method, this allows you to use an already
227        existing tree as the left side of the merge
228
229        :param rhs:
230            treeish reference pointing to the 'other' side of the merge.
231
232        :param base:
233            optional treeish reference pointing to the common base of 'rhs' and
234            this index which equals lhs
235
236        :return:
237            self ( containing the merge and possibly unmerged entries in case of
238            conflicts )
239
240        :raise GitCommandError:
241            If there is a merge conflict. The error will
242            be raised at the first conflicting path. If you want to have proper
243            merge resolution to be done by yourself, you have to commit the changed
244            index ( or make a valid tree from it ) and retry with a three-way
245            index.from_tree call. """
246        # -i : ignore working tree status
247        # --aggressive : handle more merge cases
248        # -m : do an actual merge
249        args = ["--aggressive", "-i", "-m"]
250        if base is not None:
251            args.append(base)
252        args.append(rhs)
253
254        self.repo.git.read_tree(args)
255        return self
256
257    @classmethod
258    def new(cls, repo, *tree_sha):
259        """ Merge the given treeish revisions into a new index which is returned.
260        This method behaves like git-read-tree --aggressive when doing the merge.
261
262        :param repo: The repository treeish are located in.
263
264        :param tree_sha:
265            20 byte or 40 byte tree sha or tree objects
266
267        :return:
268            New IndexFile instance. Its path will be undefined.
269            If you intend to write such a merged Index, supply an alternate file_path
270            to its 'write' method."""
271        base_entries = aggressive_tree_merge(repo.odb, [to_bin_sha(str(t)) for t in tree_sha])
272
273        inst = cls(repo)
274        # convert to entries dict
275        entries = dict(izip(((e.path, e.stage) for e in base_entries),
276                            (IndexEntry.from_base(e) for e in base_entries)))
277
278        inst.entries = entries
279        return inst
280
281    @classmethod
282    def from_tree(cls, repo, *treeish, **kwargs):
283        """Merge the given treeish revisions into a new index which is returned.
284        The original index will remain unaltered
285
286        :param repo:
287            The repository treeish are located in.
288
289        :param treeish:
290            One, two or three Tree Objects, Commits or 40 byte hexshas. The result
291            changes according to the amount of trees.
292            If 1 Tree is given, it will just be read into a new index
293            If 2 Trees are given, they will be merged into a new index using a
294            two way merge algorithm. Tree 1 is the 'current' tree, tree 2 is the 'other'
295            one. It behaves like a fast-forward.
296            If 3 Trees are given, a 3-way merge will be performed with the first tree
297            being the common ancestor of tree 2 and tree 3. Tree 2 is the 'current' tree,
298            tree 3 is the 'other' one
299
300        :param kwargs:
301            Additional arguments passed to git-read-tree
302
303        :return:
304            New IndexFile instance. It will point to a temporary index location which
305            does not exist anymore. If you intend to write such a merged Index, supply
306            an alternate file_path to its 'write' method.
307
308        :note:
309            In the three-way merge case, --aggressive will be specified to automatically
310            resolve more cases in a commonly correct manner. Specify trivial=True as kwarg
311            to override that.
312
313            As the underlying git-read-tree command takes into account the current index,
314            it will be temporarily moved out of the way to assure there are no unsuspected
315            interferences."""
316        if len(treeish) == 0 or len(treeish) > 3:
317            raise ValueError("Please specify between 1 and 3 treeish, got %i" % len(treeish))
318
319        arg_list = []
320        # ignore that working tree and index possibly are out of date
321        if len(treeish) > 1:
322            # drop unmerged entries when reading our index and merging
323            arg_list.append("--reset")
324            # handle non-trivial cases the way a real merge does
325            arg_list.append("--aggressive")
326        # END merge handling
327
328        # tmp file created in git home directory to be sure renaming
329        # works - /tmp/ dirs could be on another device
330        tmp_index = tempfile.mktemp('', '', repo.git_dir)
331        arg_list.append("--index-output=%s" % tmp_index)
332        arg_list.extend(treeish)
333
334        # move current index out of the way - otherwise the merge may fail
335        # as it considers existing entries. moving it essentially clears the index.
336        # Unfortunately there is no 'soft' way to do it.
337        # The TemporaryFileSwap assure the original file get put back
338        index_handler = TemporaryFileSwap(join_path_native(repo.git_dir, 'index'))
339        try:
340            repo.git.read_tree(*arg_list, **kwargs)
341            index = cls(repo, tmp_index)
342            index.entries       # force it to read the file as we will delete the temp-file
343            del(index_handler)  # release as soon as possible
344        finally:
345            if osp.exists(tmp_index):
346                os.remove(tmp_index)
347        # END index merge handling
348
349        return index
350
351    # UTILITIES
352    @unbare_repo
353    def _iter_expand_paths(self, paths):
354        """Expand the directories in list of paths to the corresponding paths accordingly,
355
356        Note: git will add items multiple times even if a glob overlapped
357        with manually specified paths or if paths where specified multiple
358        times - we respect that and do not prune"""
359        def raise_exc(e):
360            raise e
361        r = self.repo.working_tree_dir
362        rs = r + os.sep
363        for path in paths:
364            abs_path = path
365            if not osp.isabs(abs_path):
366                abs_path = osp.join(r, path)
367            # END make absolute path
368
369            try:
370                st = os.lstat(abs_path)     # handles non-symlinks as well
371            except OSError:
372                # the lstat call may fail as the path may contain globs as well
373                pass
374            else:
375                if S_ISLNK(st.st_mode):
376                    yield abs_path.replace(rs, '')
377                    continue
378            # end check symlink
379
380            # resolve globs if possible
381            if '?' in path or '*' in path or '[' in path:
382                resolved_paths = glob.glob(abs_path)
383                # not abs_path in resolved_paths:
384                #   a glob() resolving to the same path we are feeding it with
385                #   is a glob() that failed to resolve. If we continued calling
386                #   ourselves we'd endlessly recurse. If the condition below
387                #   evaluates to true then we are likely dealing with a file
388                #   whose name contains wildcard characters.
389                if abs_path not in resolved_paths:
390                    for f in self._iter_expand_paths(glob.glob(abs_path)):
391                        yield f.replace(rs, '')
392                    continue
393            # END glob handling
394            try:
395                for root, dirs, files in os.walk(abs_path, onerror=raise_exc):  # @UnusedVariable
396                    for rela_file in files:
397                        # add relative paths only
398                        yield osp.join(root.replace(rs, ''), rela_file)
399                    # END for each file in subdir
400                # END for each subdirectory
401            except OSError:
402                # was a file or something that could not be iterated
403                yield path.replace(rs, '')
404            # END path exception handling
405        # END for each path
406
407    def _write_path_to_stdin(self, proc, filepath, item, fmakeexc, fprogress,
408                             read_from_stdout=True):
409        """Write path to proc.stdin and make sure it processes the item, including progress.
410
411        :return: stdout string
412        :param read_from_stdout: if True, proc.stdout will be read after the item
413            was sent to stdin. In that case, it will return None
414        :note: There is a bug in git-update-index that prevents it from sending
415            reports just in time. This is why we have a version that tries to
416            read stdout and one which doesn't. In fact, the stdout is not
417            important as the piped-in files are processed anyway and just in time
418        :note: Newlines are essential here, gits behaviour is somewhat inconsistent
419            on this depending on the version, hence we try our best to deal with
420            newlines carefully. Usually the last newline will not be sent, instead
421            we will close stdin to break the pipe."""
422
423        fprogress(filepath, False, item)
424        rval = None
425        try:
426            proc.stdin.write(("%s\n" % filepath).encode(defenc))
427        except IOError:
428            # pipe broke, usually because some error happened
429            raise fmakeexc()
430        # END write exception handling
431        proc.stdin.flush()
432        if read_from_stdout:
433            rval = proc.stdout.readline().strip()
434        fprogress(filepath, True, item)
435        return rval
436
437    def iter_blobs(self, predicate=lambda t: True):
438        """
439        :return: Iterator yielding tuples of Blob objects and stages, tuple(stage, Blob)
440
441        :param predicate:
442            Function(t) returning True if tuple(stage, Blob) should be yielded by the
443            iterator. A default filter, the BlobFilter, allows you to yield blobs
444            only if they match a given list of paths. """
445        for entry in mviter(self.entries):
446            blob = entry.to_blob(self.repo)
447            blob.size = entry.size
448            output = (entry.stage, blob)
449            if predicate(output):
450                yield output
451        # END for each entry
452
453    def unmerged_blobs(self):
454        """
455        :return:
456            Iterator yielding dict(path : list( tuple( stage, Blob, ...))), being
457            a dictionary associating a path in the index with a list containing
458            sorted stage/blob pairs
459
460        :note:
461            Blobs that have been removed in one side simply do not exist in the
462            given stage. I.e. a file removed on the 'other' branch whose entries
463            are at stage 3 will not have a stage 3 entry.
464        """
465        is_unmerged_blob = lambda t: t[0] != 0
466        path_map = {}
467        for stage, blob in self.iter_blobs(is_unmerged_blob):
468            path_map.setdefault(blob.path, []).append((stage, blob))
469        # END for each unmerged blob
470        for l in mviter(path_map):
471            l.sort()
472        return path_map
473
474    @classmethod
475    def entry_key(cls, *entry):
476        return entry_key(*entry)
477
478    def resolve_blobs(self, iter_blobs):
479        """Resolve the blobs given in blob iterator. This will effectively remove the
480        index entries of the respective path at all non-null stages and add the given
481        blob as new stage null blob.
482
483        For each path there may only be one blob, otherwise a ValueError will be raised
484        claiming the path is already at stage 0.
485
486        :raise ValueError: if one of the blobs already existed at stage 0
487        :return: self
488
489        :note:
490            You will have to write the index manually once you are done, i.e.
491            index.resolve_blobs(blobs).write()
492        """
493        for blob in iter_blobs:
494            stage_null_key = (blob.path, 0)
495            if stage_null_key in self.entries:
496                raise ValueError("Path %r already exists at stage 0" % blob.path)
497            # END assert blob is not stage 0 already
498
499            # delete all possible stages
500            for stage in (1, 2, 3):
501                try:
502                    del(self.entries[(blob.path, stage)])
503                except KeyError:
504                    pass
505                # END ignore key errors
506            # END for each possible stage
507
508            self.entries[stage_null_key] = IndexEntry.from_blob(blob)
509        # END for each blob
510
511        return self
512
513    def update(self):
514        """Reread the contents of our index file, discarding all cached information
515        we might have.
516
517        :note: This is a possibly dangerious operations as it will discard your changes
518            to index.entries
519        :return: self"""
520        self._delete_entries_cache()
521        # allows to lazily reread on demand
522        return self
523
524    def write_tree(self):
525        """Writes this index to a corresponding Tree object into the repository's
526        object database and return it.
527
528        :return: Tree object representing this index
529        :note: The tree will be written even if one or more objects the tree refers to
530            does not yet exist in the object database. This could happen if you added
531            Entries to the index directly.
532        :raise ValueError: if there are no entries in the cache
533        :raise UnmergedEntriesError: """
534        # we obtain no lock as we just flush our contents to disk as tree
535        # If we are a new index, the entries access will load our data accordingly
536        mdb = MemoryDB()
537        entries = self._entries_sorted()
538        binsha, tree_items = write_tree_from_cache(entries, mdb, slice(0, len(entries)))
539
540        # copy changed trees only
541        mdb.stream_copy(mdb.sha_iter(), self.repo.odb)
542
543        # note: additional deserialization could be saved if write_tree_from_cache
544        # would return sorted tree entries
545        root_tree = Tree(self.repo, binsha, path='')
546        root_tree._cache = tree_items
547        return root_tree
548
549    def _process_diff_args(self, args):
550        try:
551            args.pop(args.index(self))
552        except IndexError:
553            pass
554        # END remove self
555        return args
556
557    def _to_relative_path(self, path):
558        """:return: Version of path relative to our git directory or raise ValueError
559        if it is not within our git direcotory"""
560        if not osp.isabs(path):
561            return path
562        if self.repo.bare:
563            raise InvalidGitRepositoryError("require non-bare repository")
564        if not path.startswith(self.repo.working_tree_dir):
565            raise ValueError("Absolute path %r is not in git repository at %r" % (path, self.repo.working_tree_dir))
566        return os.path.relpath(path, self.repo.working_tree_dir)
567
568    def _preprocess_add_items(self, items):
569        """ Split the items into two lists of path strings and BaseEntries. """
570        paths = []
571        entries = []
572
573        for item in items:
574            if isinstance(item, string_types):
575                paths.append(self._to_relative_path(item))
576            elif isinstance(item, (Blob, Submodule)):
577                entries.append(BaseIndexEntry.from_blob(item))
578            elif isinstance(item, BaseIndexEntry):
579                entries.append(item)
580            else:
581                raise TypeError("Invalid Type: %r" % item)
582        # END for each item
583        return (paths, entries)
584
585    def _store_path(self, filepath, fprogress):
586        """Store file at filepath in the database and return the base index entry
587        Needs the git_working_dir decorator active ! This must be assured in the calling code"""
588        st = os.lstat(filepath)     # handles non-symlinks as well
589        if S_ISLNK(st.st_mode):
590            # in PY3, readlink is string, but we need bytes. In PY2, it's just OS encoded bytes, we assume UTF-8
591            open_stream = lambda: BytesIO(force_bytes(os.readlink(filepath), encoding=defenc))
592        else:
593            open_stream = lambda: open(filepath, 'rb')
594        with open_stream() as stream:
595            fprogress(filepath, False, filepath)
596            istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream))
597            fprogress(filepath, True, filepath)
598        return BaseIndexEntry((stat_mode_to_index_mode(st.st_mode),
599                               istream.binsha, 0, to_native_path_linux(filepath)))
600
601    @unbare_repo
602    @git_working_dir
603    def _entries_for_paths(self, paths, path_rewriter, fprogress, entries):
604        entries_added = []
605        if path_rewriter:
606            for path in paths:
607                if osp.isabs(path):
608                    abspath = path
609                    gitrelative_path = path[len(self.repo.working_tree_dir) + 1:]
610                else:
611                    gitrelative_path = path
612                    abspath = osp.join(self.repo.working_tree_dir, gitrelative_path)
613                # end obtain relative and absolute paths
614
615                blob = Blob(self.repo, Blob.NULL_BIN_SHA,
616                            stat_mode_to_index_mode(os.stat(abspath).st_mode),
617                            to_native_path_linux(gitrelative_path))
618                # TODO: variable undefined
619                entries.append(BaseIndexEntry.from_blob(blob))
620            # END for each path
621            del(paths[:])
622        # END rewrite paths
623
624        # HANDLE PATHS
625        assert len(entries_added) == 0
626        for filepath in self._iter_expand_paths(paths):
627            entries_added.append(self._store_path(filepath, fprogress))
628        # END for each filepath
629        # END path handling
630        return entries_added
631
632    def add(self, items, force=True, fprogress=lambda *args: None, path_rewriter=None,
633            write=True, write_extension_data=False):
634        """Add files from the working tree, specific blobs or BaseIndexEntries
635        to the index.
636
637        :param items:
638            Multiple types of items are supported, types can be mixed within one call.
639            Different types imply a different handling. File paths may generally be
640            relative or absolute.
641
642            - path string
643                strings denote a relative or absolute path into the repository pointing to
644                an existing file, i.e. CHANGES, lib/myfile.ext, '/home/gitrepo/lib/myfile.ext'.
645
646                Absolute paths must start with working tree directory of this index's repository
647                to be considered valid. For example, if it was initialized with a non-normalized path, like
648                `/root/repo/../repo`, absolute paths to be added must start with `/root/repo/../repo`.
649
650                Paths provided like this must exist. When added, they will be written
651                into the object database.
652
653                PathStrings may contain globs, such as 'lib/__init__*' or can be directories
654                like 'lib', the latter ones will add all the files within the dirctory and
655                subdirectories.
656
657                This equals a straight git-add.
658
659                They are added at stage 0
660
661            - Blob or Submodule object
662                Blobs are added as they are assuming a valid mode is set.
663                The file they refer to may or may not exist in the file system, but
664                must be a path relative to our repository.
665
666                If their sha is null ( 40*0 ), their path must exist in the file system
667                relative to the git repository as an object will be created from
668                the data at the path.
669                The handling now very much equals the way string paths are processed, except that
670                the mode you have set will be kept. This allows you to create symlinks
671                by settings the mode respectively and writing the target of the symlink
672                directly into the file. This equals a default Linux-Symlink which
673                is not dereferenced automatically, except that it can be created on
674                filesystems not supporting it as well.
675
676                Please note that globs or directories are not allowed in Blob objects.
677
678                They are added at stage 0
679
680            - BaseIndexEntry or type
681                Handling equals the one of Blob objects, but the stage may be
682                explicitly set. Please note that Index Entries require binary sha's.
683
684        :param force:
685            **CURRENTLY INEFFECTIVE**
686            If True, otherwise ignored or excluded files will be
687            added anyway.
688            As opposed to the git-add command, we enable this flag by default
689            as the API user usually wants the item to be added even though
690            they might be excluded.
691
692        :param fprogress:
693            Function with signature f(path, done=False, item=item) called for each
694            path to be added, one time once it is about to be added where done==False
695            and once after it was added where done=True.
696            item is set to the actual item we handle, either a Path or a BaseIndexEntry
697            Please note that the processed path is not guaranteed to be present
698            in the index already as the index is currently being processed.
699
700        :param path_rewriter:
701            Function with signature (string) func(BaseIndexEntry) function returning a path
702            for each passed entry which is the path to be actually recorded for the
703            object created from entry.path. This allows you to write an index which
704            is not identical to the layout of the actual files on your hard-disk.
705            If not None and ``items`` contain plain paths, these paths will be
706            converted to Entries beforehand and passed to the path_rewriter.
707            Please note that entry.path is relative to the git repository.
708
709        :param write:
710            If True, the index will be written once it was altered. Otherwise
711            the changes only exist in memory and are not available to git commands.
712
713        :param write_extension_data:
714            If True, extension data will be written back to the index. This can lead to issues in case
715            it is containing the 'TREE' extension, which will cause the `git commit` command to write an
716            old tree, instead of a new one representing the now changed index.
717            This doesn't matter if you use `IndexFile.commit()`, which ignores the `TREE` extension altogether.
718            You should set it to True if you intend to use `IndexFile.commit()` exclusively while maintaining
719            support for third-party extensions. Besides that, you can usually safely ignore the built-in
720            extensions when using GitPython on repositories that are not handled manually at all.
721            All current built-in extensions are listed here:
722            http://opensource.apple.com/source/Git/Git-26/src/git-htmldocs/technical/index-format.txt
723
724        :return:
725            List(BaseIndexEntries) representing the entries just actually added.
726
727        :raise OSError:
728            if a supplied Path did not exist. Please note that BaseIndexEntry
729            Objects that do not have a null sha will be added even if their paths
730            do not exist.
731        """
732        # sort the entries into strings and Entries, Blobs are converted to entries
733        # automatically
734        # paths can be git-added, for everything else we use git-update-index
735        paths, entries = self._preprocess_add_items(items)
736        entries_added = []
737        # This code needs a working tree, therefore we try not to run it unless required.
738        # That way, we are OK on a bare repository as well.
739        # If there are no paths, the rewriter has nothing to do either
740        if paths:
741            entries_added.extend(self._entries_for_paths(paths, path_rewriter, fprogress, entries))
742
743        # HANDLE ENTRIES
744        if entries:
745            null_mode_entries = [e for e in entries if e.mode == 0]
746            if null_mode_entries:
747                raise ValueError(
748                    "At least one Entry has a null-mode - please use index.remove to remove files for clarity")
749            # END null mode should be remove
750
751            # HANLDE ENTRY OBJECT CREATION
752            # create objects if required, otherwise go with the existing shas
753            null_entries_indices = [i for i, e in enumerate(entries) if e.binsha == Object.NULL_BIN_SHA]
754            if null_entries_indices:
755                @git_working_dir
756                def handle_null_entries(self):
757                    for ei in null_entries_indices:
758                        null_entry = entries[ei]
759                        new_entry = self._store_path(null_entry.path, fprogress)
760
761                        # update null entry
762                        entries[ei] = BaseIndexEntry(
763                            (null_entry.mode, new_entry.binsha, null_entry.stage, null_entry.path))
764                    # END for each entry index
765                # end closure
766                handle_null_entries(self)
767            # END null_entry handling
768
769            # REWRITE PATHS
770            # If we have to rewrite the entries, do so now, after we have generated
771            # all object sha's
772            if path_rewriter:
773                for i, e in enumerate(entries):
774                    entries[i] = BaseIndexEntry((e.mode, e.binsha, e.stage, path_rewriter(e)))
775                # END for each entry
776            # END handle path rewriting
777
778            # just go through the remaining entries and provide progress info
779            for i, entry in enumerate(entries):
780                progress_sent = i in null_entries_indices
781                if not progress_sent:
782                    fprogress(entry.path, False, entry)
783                    fprogress(entry.path, True, entry)
784                # END handle progress
785            # END for each enty
786            entries_added.extend(entries)
787        # END if there are base entries
788
789        # FINALIZE
790        # add the new entries to this instance
791        for entry in entries_added:
792            self.entries[(entry.path, 0)] = IndexEntry.from_base(entry)
793
794        if write:
795            self.write(ignore_extension_data=not write_extension_data)
796        # END handle write
797
798        return entries_added
799
800    def _items_to_rela_paths(self, items):
801        """Returns a list of repo-relative paths from the given items which
802        may be absolute or relative paths, entries or blobs"""
803        paths = []
804        for item in items:
805            if isinstance(item, (BaseIndexEntry, (Blob, Submodule))):
806                paths.append(self._to_relative_path(item.path))
807            elif isinstance(item, string_types):
808                paths.append(self._to_relative_path(item))
809            else:
810                raise TypeError("Invalid item type: %r" % item)
811        # END for each item
812        return paths
813
814    @post_clear_cache
815    @default_index
816    def remove(self, items, working_tree=False, **kwargs):
817        """Remove the given items from the index and optionally from
818        the working tree as well.
819
820        :param items:
821            Multiple types of items are supported which may be be freely mixed.
822
823            - path string
824                Remove the given path at all stages. If it is a directory, you must
825                specify the r=True keyword argument to remove all file entries
826                below it. If absolute paths are given, they will be converted
827                to a path relative to the git repository directory containing
828                the working tree
829
830                The path string may include globs, such as *.c.
831
832            - Blob Object
833                Only the path portion is used in this case.
834
835            - BaseIndexEntry or compatible type
836                The only relevant information here Yis the path. The stage is ignored.
837
838        :param working_tree:
839            If True, the entry will also be removed from the working tree, physically
840            removing the respective file. This may fail if there are uncommitted changes
841            in it.
842
843        :param kwargs:
844            Additional keyword arguments to be passed to git-rm, such
845            as 'r' to allow recursive removal of
846
847        :return:
848            List(path_string, ...) list of repository relative paths that have
849            been removed effectively.
850            This is interesting to know in case you have provided a directory or
851            globs. Paths are relative to the repository. """
852        args = []
853        if not working_tree:
854            args.append("--cached")
855        args.append("--")
856
857        # preprocess paths
858        paths = self._items_to_rela_paths(items)
859        removed_paths = self.repo.git.rm(args, paths, **kwargs).splitlines()
860
861        # process output to gain proper paths
862        # rm 'path'
863        return [p[4:-1] for p in removed_paths]
864
865    @post_clear_cache
866    @default_index
867    def move(self, items, skip_errors=False, **kwargs):
868        """Rename/move the items, whereas the last item is considered the destination of
869        the move operation. If the destination is a file, the first item ( of two )
870        must be a file as well. If the destination is a directory, it may be preceded
871        by one or more directories or files.
872
873        The working tree will be affected in non-bare repositories.
874
875        :parma items:
876            Multiple types of items are supported, please see the 'remove' method
877            for reference.
878        :param skip_errors:
879            If True, errors such as ones resulting from missing source files will
880            be skipped.
881        :param kwargs:
882            Additional arguments you would like to pass to git-mv, such as dry_run
883            or force.
884
885        :return:List(tuple(source_path_string, destination_path_string), ...)
886            A list of pairs, containing the source file moved as well as its
887            actual destination. Relative to the repository root.
888
889        :raise ValueError: If only one item was given
890            GitCommandError: If git could not handle your request"""
891        args = []
892        if skip_errors:
893            args.append('-k')
894
895        paths = self._items_to_rela_paths(items)
896        if len(paths) < 2:
897            raise ValueError("Please provide at least one source and one destination of the move operation")
898
899        was_dry_run = kwargs.pop('dry_run', kwargs.pop('n', None))
900        kwargs['dry_run'] = True
901
902        # first execute rename in dryrun so the command tells us what it actually does
903        # ( for later output )
904        out = []
905        mvlines = self.repo.git.mv(args, paths, **kwargs).splitlines()
906
907        # parse result - first 0:n/2 lines are 'checking ', the remaining ones
908        # are the 'renaming' ones which we parse
909        for ln in xrange(int(len(mvlines) / 2), len(mvlines)):
910            tokens = mvlines[ln].split(' to ')
911            assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln]
912
913            # [0] = Renaming x
914            # [1] = y
915            out.append((tokens[0][9:], tokens[1]))
916        # END for each line to parse
917
918        # either prepare for the real run, or output the dry-run result
919        if was_dry_run:
920            return out
921        # END handle dryrun
922
923        # now apply the actual operation
924        kwargs.pop('dry_run')
925        self.repo.git.mv(args, paths, **kwargs)
926
927        return out
928
929    def commit(self, message, parent_commits=None, head=True, author=None,
930               committer=None, author_date=None, commit_date=None,
931               skip_hooks=False):
932        """Commit the current default index file, creating a commit object.
933        For more information on the arguments, see tree.commit.
934
935        :note: If you have manually altered the .entries member of this instance,
936               don't forget to write() your changes to disk beforehand.
937               Passing skip_hooks=True is the equivalent of using `-n`
938               or `--no-verify` on the command line.
939        :return: Commit object representing the new commit"""
940        if not skip_hooks:
941            run_commit_hook('pre-commit', self)
942
943            self._write_commit_editmsg(message)
944            run_commit_hook('commit-msg', self, self._commit_editmsg_filepath())
945            message = self._read_commit_editmsg()
946            self._remove_commit_editmsg()
947        tree = self.write_tree()
948        rval = Commit.create_from_tree(self.repo, tree, message, parent_commits,
949                                       head, author=author, committer=committer,
950                                       author_date=author_date, commit_date=commit_date)
951        if not skip_hooks:
952            run_commit_hook('post-commit', self)
953        return rval
954
955    def _write_commit_editmsg(self, message):
956        with open(self._commit_editmsg_filepath(), "wb") as commit_editmsg_file:
957            commit_editmsg_file.write(message.encode(defenc))
958
959    def _remove_commit_editmsg(self):
960        os.remove(self._commit_editmsg_filepath())
961
962    def _read_commit_editmsg(self):
963        with open(self._commit_editmsg_filepath(), "rb") as commit_editmsg_file:
964            return commit_editmsg_file.read().decode(defenc)
965
966    def _commit_editmsg_filepath(self):
967        return osp.join(self.repo.common_dir, "COMMIT_EDITMSG")
968
969    @classmethod
970    def _flush_stdin_and_wait(cls, proc, ignore_stdout=False):
971        proc.stdin.flush()
972        proc.stdin.close()
973        stdout = ''
974        if not ignore_stdout:
975            stdout = proc.stdout.read()
976        proc.stdout.close()
977        proc.wait()
978        return stdout
979
980    @default_index
981    def checkout(self, paths=None, force=False, fprogress=lambda *args: None, **kwargs):
982        """Checkout the given paths or all files from the version known to the index into
983        the working tree.
984
985        :note: Be sure you have written pending changes using the ``write`` method
986            in case you have altered the enties dictionary directly
987
988        :param paths:
989            If None, all paths in the index will be checked out. Otherwise an iterable
990            of relative or absolute paths or a single path pointing to files or directories
991            in the index is expected.
992
993        :param force:
994            If True, existing files will be overwritten even if they contain local modifications.
995            If False, these will trigger a CheckoutError.
996
997        :param fprogress:
998            see Index.add_ for signature and explanation.
999            The provided progress information will contain None as path and item if no
1000            explicit paths are given. Otherwise progress information will be send
1001            prior and after a file has been checked out
1002
1003        :param kwargs:
1004            Additional arguments to be passed to git-checkout-index
1005
1006        :return:
1007            iterable yielding paths to files which have been checked out and are
1008            guaranteed to match the version stored in the index
1009
1010        :raise CheckoutError:
1011            If at least one file failed to be checked out. This is a summary,
1012            hence it will checkout as many files as it can anyway.
1013            If one of files or directories do not exist in the index
1014            ( as opposed to the  original git command who ignores them ).
1015            Raise GitCommandError if error lines could not be parsed - this truly is
1016            an exceptional state
1017
1018        .. note:: The checkout is limited to checking out the files in the
1019            index. Files which are not in the index anymore and exist in
1020            the working tree will not be deleted. This behaviour is fundamentally
1021            different to *head.checkout*, i.e. if you want git-checkout like behaviour,
1022            use head.checkout instead of index.checkout.
1023            """
1024        args = ["--index"]
1025        if force:
1026            args.append("--force")
1027
1028        def handle_stderr(proc, iter_checked_out_files):
1029            stderr = proc.stderr.read()
1030            if not stderr:
1031                return
1032            # line contents:
1033            stderr = stderr.decode(defenc)
1034            # git-checkout-index: this already exists
1035            failed_files = []
1036            failed_reasons = []
1037            unknown_lines = []
1038            endings = (' already exists', ' is not in the cache', ' does not exist at stage', ' is unmerged')
1039            for line in stderr.splitlines():
1040                if not line.startswith("git checkout-index: ") and not line.startswith("git-checkout-index: "):
1041                    is_a_dir = " is a directory"
1042                    unlink_issue = "unable to unlink old '"
1043                    already_exists_issue = ' already exists, no checkout'   # created by entry.c:checkout_entry(...)
1044                    if line.endswith(is_a_dir):
1045                        failed_files.append(line[:-len(is_a_dir)])
1046                        failed_reasons.append(is_a_dir)
1047                    elif line.startswith(unlink_issue):
1048                        failed_files.append(line[len(unlink_issue):line.rfind("'")])
1049                        failed_reasons.append(unlink_issue)
1050                    elif line.endswith(already_exists_issue):
1051                        failed_files.append(line[:-len(already_exists_issue)])
1052                        failed_reasons.append(already_exists_issue)
1053                    else:
1054                        unknown_lines.append(line)
1055                    continue
1056                # END special lines parsing
1057
1058                for e in endings:
1059                    if line.endswith(e):
1060                        failed_files.append(line[20:-len(e)])
1061                        failed_reasons.append(e)
1062                        break
1063                    # END if ending matches
1064                # END for each possible ending
1065            # END for each line
1066            if unknown_lines:
1067                raise GitCommandError(("git-checkout-index",), 128, stderr)
1068            if failed_files:
1069                valid_files = list(set(iter_checked_out_files) - set(failed_files))
1070                raise CheckoutError(
1071                    "Some files could not be checked out from the index due to local modifications",
1072                    failed_files, valid_files, failed_reasons)
1073        # END stderr handler
1074
1075        if paths is None:
1076            args.append("--all")
1077            kwargs['as_process'] = 1
1078            fprogress(None, False, None)
1079            proc = self.repo.git.checkout_index(*args, **kwargs)
1080            proc.wait()
1081            fprogress(None, True, None)
1082            rval_iter = (e.path for e in mviter(self.entries))
1083            handle_stderr(proc, rval_iter)
1084            return rval_iter
1085        else:
1086            if isinstance(paths, string_types):
1087                paths = [paths]
1088
1089            # make sure we have our entries loaded before we start checkout_index
1090            # which will hold a lock on it. We try to get the lock as well during
1091            # our entries initialization
1092            self.entries
1093
1094            args.append("--stdin")
1095            kwargs['as_process'] = True
1096            kwargs['istream'] = subprocess.PIPE
1097            proc = self.repo.git.checkout_index(args, **kwargs)
1098            # FIXME: Reading from GIL!
1099            make_exc = lambda: GitCommandError(("git-checkout-index",) + tuple(args), 128, proc.stderr.read())
1100            checked_out_files = []
1101
1102            for path in paths:
1103                co_path = to_native_path_linux(self._to_relative_path(path))
1104                # if the item is not in the index, it could be a directory
1105                path_is_directory = False
1106
1107                try:
1108                    self.entries[(co_path, 0)]
1109                except KeyError:
1110                    folder = co_path
1111                    if not folder.endswith('/'):
1112                        folder += '/'
1113                    for entry in mviter(self.entries):
1114                        if entry.path.startswith(folder):
1115                            p = entry.path
1116                            self._write_path_to_stdin(proc, p, p, make_exc,
1117                                                      fprogress, read_from_stdout=False)
1118                            checked_out_files.append(p)
1119                            path_is_directory = True
1120                        # END if entry is in directory
1121                    # END for each entry
1122                # END path exception handlnig
1123
1124                if not path_is_directory:
1125                    self._write_path_to_stdin(proc, co_path, path, make_exc,
1126                                              fprogress, read_from_stdout=False)
1127                    checked_out_files.append(co_path)
1128                # END path is a file
1129            # END for each path
1130            self._flush_stdin_and_wait(proc, ignore_stdout=True)
1131
1132            handle_stderr(proc, checked_out_files)
1133            return checked_out_files
1134        # END paths handling
1135        assert "Should not reach this point"
1136
1137    @default_index
1138    def reset(self, commit='HEAD', working_tree=False, paths=None, head=False, **kwargs):
1139        """Reset the index to reflect the tree at the given commit. This will not
1140        adjust our HEAD reference as opposed to HEAD.reset by default.
1141
1142        :param commit:
1143            Revision, Reference or Commit specifying the commit we should represent.
1144            If you want to specify a tree only, use IndexFile.from_tree and overwrite
1145            the default index.
1146
1147        :param working_tree:
1148            If True, the files in the working tree will reflect the changed index.
1149            If False, the working tree will not be touched
1150            Please note that changes to the working copy will be discarded without
1151            warning !
1152
1153        :param head:
1154            If True, the head will be set to the given commit. This is False by default,
1155            but if True, this method behaves like HEAD.reset.
1156
1157        :param paths: if given as an iterable of absolute or repository-relative paths,
1158            only these will be reset to their state at the given commit'ish.
1159            The paths need to exist at the commit, otherwise an exception will be
1160            raised.
1161
1162        :param kwargs:
1163            Additional keyword arguments passed to git-reset
1164
1165        .. note:: IndexFile.reset, as opposed to HEAD.reset, will not delete anyfiles
1166            in order to maintain a consistent working tree. Instead, it will just
1167            checkout the files according to their state in the index.
1168            If you want git-reset like behaviour, use *HEAD.reset* instead.
1169
1170        :return: self """
1171        # what we actually want to do is to merge the tree into our existing
1172        # index, which is what git-read-tree does
1173        new_inst = type(self).from_tree(self.repo, commit)
1174        if not paths:
1175            self.entries = new_inst.entries
1176        else:
1177            nie = new_inst.entries
1178            for path in paths:
1179                path = self._to_relative_path(path)
1180                try:
1181                    key = entry_key(path, 0)
1182                    self.entries[key] = nie[key]
1183                except KeyError:
1184                    # if key is not in theirs, it musn't be in ours
1185                    try:
1186                        del(self.entries[key])
1187                    except KeyError:
1188                        pass
1189                    # END handle deletion keyerror
1190                # END handle keyerror
1191            # END for each path
1192        # END handle paths
1193        self.write()
1194
1195        if working_tree:
1196            self.checkout(paths=paths, force=True)
1197        # END handle working tree
1198
1199        if head:
1200            self.repo.head.set_commit(self.repo.commit(commit), logmsg="%s: Updating HEAD" % commit)
1201        # END handle head change
1202
1203        return self
1204
1205    @default_index
1206    def diff(self, other=diff.Diffable.Index, paths=None, create_patch=False, **kwargs):
1207        """Diff this index against the working copy or a Tree or Commit object
1208
1209        For a documentation of the parameters and return values, see
1210        Diffable.diff
1211
1212        :note:
1213            Will only work with indices that represent the default git index as
1214            they have not been initialized with a stream.
1215        """
1216        # index against index is always empty
1217        if other is self.Index:
1218            return diff.DiffIndex()
1219
1220        # index against anything but None is a reverse diff with the respective
1221        # item. Handle existing -R flags properly. Transform strings to the object
1222        # so that we can call diff on it
1223        if isinstance(other, string_types):
1224            other = self.repo.rev_parse(other)
1225        # END object conversion
1226
1227        if isinstance(other, Object):
1228            # invert the existing R flag
1229            cur_val = kwargs.get('R', False)
1230            kwargs['R'] = not cur_val
1231            return other.diff(self.Index, paths, create_patch, **kwargs)
1232        # END diff against other item handling
1233
1234        # if other is not None here, something is wrong
1235        if other is not None:
1236            raise ValueError("other must be None, Diffable.Index, a Tree or Commit, was %r" % other)
1237
1238        # diff against working copy - can be handled by superclass natively
1239        return super(IndexFile, self).diff(other, paths, create_patch, **kwargs)
1240