1# object_store.py -- Object store for git objects
2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
3#                         and others
4#
5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6# General Public License as public by the Free Software Foundation; version 2.0
7# or (at your option) any later version. You can redistribute it and/or
8# modify it under the terms of either of these two licenses.
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16# You should have received a copy of the licenses; if not, see
17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19# License, Version 2.0.
20#
21
22
23"""Git object store interfaces and implementation."""
24
25from io import BytesIO
26import errno
27import os
28import stat
29import sys
30import tempfile
31
32from dulwich.diff_tree import (
33    tree_changes,
34    walk_trees,
35    )
36from dulwich.errors import (
37    NotTreeError,
38    )
39from dulwich.file import GitFile
40from dulwich.objects import (
41    Commit,
42    ShaFile,
43    Tag,
44    Tree,
45    ZERO_SHA,
46    hex_to_sha,
47    sha_to_hex,
48    hex_to_filename,
49    S_ISGITLINK,
50    object_class,
51    )
52from dulwich.pack import (
53    Pack,
54    PackData,
55    PackInflater,
56    PackFileDisappeared,
57    iter_sha1,
58    pack_objects_to_data,
59    write_pack_header,
60    write_pack_index_v2,
61    write_pack_data,
62    write_pack_object,
63    compute_file_sha,
64    PackIndexer,
65    PackStreamCopier,
66    )
67from dulwich.refs import ANNOTATED_TAG_SUFFIX
68
69INFODIR = 'info'
70PACKDIR = 'pack'
71
72
73class BaseObjectStore(object):
74    """Object store interface."""
75
76    def determine_wants_all(self, refs):
77        return [sha for (ref, sha) in refs.items()
78                if sha not in self and
79                not ref.endswith(ANNOTATED_TAG_SUFFIX) and
80                not sha == ZERO_SHA]
81
82    def iter_shas(self, shas):
83        """Iterate over the objects for the specified shas.
84
85        :param shas: Iterable object with SHAs
86        :return: Object iterator
87        """
88        return ObjectStoreIterator(self, shas)
89
90    def contains_loose(self, sha):
91        """Check if a particular object is present by SHA1 and is loose."""
92        raise NotImplementedError(self.contains_loose)
93
94    def contains_packed(self, sha):
95        """Check if a particular object is present by SHA1 and is packed."""
96        raise NotImplementedError(self.contains_packed)
97
98    def __contains__(self, sha):
99        """Check if a particular object is present by SHA1.
100
101        This method makes no distinction between loose and packed objects.
102        """
103        return self.contains_packed(sha) or self.contains_loose(sha)
104
105    @property
106    def packs(self):
107        """Iterable of pack objects."""
108        raise NotImplementedError
109
110    def get_raw(self, name):
111        """Obtain the raw text for an object.
112
113        :param name: sha for the object.
114        :return: tuple with numeric type and object contents.
115        """
116        raise NotImplementedError(self.get_raw)
117
118    def __getitem__(self, sha):
119        """Obtain an object by SHA1."""
120        type_num, uncomp = self.get_raw(sha)
121        return ShaFile.from_raw_string(type_num, uncomp, sha=sha)
122
123    def __iter__(self):
124        """Iterate over the SHAs that are present in this store."""
125        raise NotImplementedError(self.__iter__)
126
127    def add_object(self, obj):
128        """Add a single object to this object store.
129
130        """
131        raise NotImplementedError(self.add_object)
132
133    def add_objects(self, objects, progress=None):
134        """Add a set of objects to this object store.
135
136        :param objects: Iterable over a list of (object, path) tuples
137        """
138        raise NotImplementedError(self.add_objects)
139
140    def add_pack_data(self, count, pack_data, progress=None):
141        """Add pack data to this object store.
142
143        :param num_items: Number of items to add
144        :param pack_data: Iterator over pack data tuples
145        """
146        if count == 0:
147            # Don't bother writing an empty pack file
148            return
149        f, commit, abort = self.add_pack()
150        try:
151            write_pack_data(f, count, pack_data, progress)
152        except BaseException:
153            abort()
154            raise
155        else:
156            return commit()
157
158    def tree_changes(self, source, target, want_unchanged=False,
159                     include_trees=False, change_type_same=False):
160        """Find the differences between the contents of two trees
161
162        :param source: SHA1 of the source tree
163        :param target: SHA1 of the target tree
164        :param want_unchanged: Whether unchanged files should be reported
165        :param include_trees: Whether to include trees
166        :param change_type_same: Whether to report files changing
167            type in the same entry.
168        :return: Iterator over tuples with
169            (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
170        """
171        for change in tree_changes(self, source, target,
172                                   want_unchanged=want_unchanged,
173                                   include_trees=include_trees,
174                                   change_type_same=change_type_same):
175            yield ((change.old.path, change.new.path),
176                   (change.old.mode, change.new.mode),
177                   (change.old.sha, change.new.sha))
178
179    def iter_tree_contents(self, tree_id, include_trees=False):
180        """Iterate the contents of a tree and all subtrees.
181
182        Iteration is depth-first pre-order, as in e.g. os.walk.
183
184        :param tree_id: SHA1 of the tree.
185        :param include_trees: If True, include tree objects in the iteration.
186        :return: Iterator over TreeEntry namedtuples for all the objects in a
187            tree.
188        """
189        for entry, _ in walk_trees(self, tree_id, None):
190            if ((entry.mode is not None and
191                 not stat.S_ISDIR(entry.mode)) or include_trees):
192                yield entry
193
194    def find_missing_objects(self, haves, wants, progress=None,
195                             get_tagged=None,
196                             get_parents=lambda commit: commit.parents,
197                             depth=None):
198        """Find the missing objects required for a set of revisions.
199
200        :param haves: Iterable over SHAs already in common.
201        :param wants: Iterable over SHAs of objects to fetch.
202        :param progress: Simple progress function that will be called with
203            updated progress strings.
204        :param get_tagged: Function that returns a dict of pointed-to sha ->
205            tag sha for including tags.
206        :param get_parents: Optional function for getting the parents of a
207            commit.
208        :return: Iterator over (sha, path) pairs.
209        """
210        finder = MissingObjectFinder(self, haves, wants, progress, get_tagged,
211                                     get_parents=get_parents)
212        return iter(finder.next, None)
213
214    def find_common_revisions(self, graphwalker):
215        """Find which revisions this store has in common using graphwalker.
216
217        :param graphwalker: A graphwalker object.
218        :return: List of SHAs that are in common
219        """
220        haves = []
221        sha = next(graphwalker)
222        while sha:
223            if sha in self:
224                haves.append(sha)
225                graphwalker.ack(sha)
226            sha = next(graphwalker)
227        return haves
228
229    def generate_pack_contents(self, have, want, progress=None):
230        """Iterate over the contents of a pack file.
231
232        :param have: List of SHA1s of objects that should not be sent
233        :param want: List of SHA1s of objects that should be sent
234        :param progress: Optional progress reporting method
235        """
236        return self.iter_shas(self.find_missing_objects(have, want, progress))
237
238    def generate_pack_data(self, have, want, progress=None, ofs_delta=True):
239        """Generate pack data objects for a set of wants/haves.
240
241        :param have: List of SHA1s of objects that should not be sent
242        :param want: List of SHA1s of objects that should be sent
243        :param ofs_delta: Whether OFS deltas can be included
244        :param progress: Optional progress reporting method
245        """
246        # TODO(user): More efficient implementation
247        return pack_objects_to_data(
248            self.generate_pack_contents(have, want, progress))
249
250    def peel_sha(self, sha):
251        """Peel all tags from a SHA.
252
253        :param sha: The object SHA to peel.
254        :return: The fully-peeled SHA1 of a tag object, after peeling all
255            intermediate tags; if the original ref does not point to a tag,
256            this will equal the original SHA1.
257        """
258        obj = self[sha]
259        obj_class = object_class(obj.type_name)
260        while obj_class is Tag:
261            obj_class, sha = obj.object
262            obj = self[sha]
263        return obj
264
265    def _collect_ancestors(self, heads, common=set(),
266                           get_parents=lambda commit: commit.parents):
267        """Collect all ancestors of heads up to (excluding) those in common.
268
269        :param heads: commits to start from
270        :param common: commits to end at, or empty set to walk repository
271            completely
272        :param get_parents: Optional function for getting the parents of a
273            commit.
274        :return: a tuple (A, B) where A - all commits reachable
275            from heads but not present in common, B - common (shared) elements
276            that are directly reachable from heads
277        """
278        bases = set()
279        commits = set()
280        queue = []
281        queue.extend(heads)
282        while queue:
283            e = queue.pop(0)
284            if e in common:
285                bases.add(e)
286            elif e not in commits:
287                commits.add(e)
288                cmt = self[e]
289                queue.extend(get_parents(cmt))
290        return (commits, bases)
291
292    def close(self):
293        """Close any files opened by this object store."""
294        # Default implementation is a NO-OP
295
296
297class PackBasedObjectStore(BaseObjectStore):
298
299    def __init__(self):
300        self._pack_cache = {}
301
302    @property
303    def alternates(self):
304        return []
305
306    def contains_packed(self, sha):
307        """Check if a particular object is present by SHA1 and is packed.
308
309        This does not check alternates.
310        """
311        for pack in self.packs:
312            try:
313                if sha in pack:
314                    return True
315            except PackFileDisappeared:
316                pass
317        return False
318
319    def __contains__(self, sha):
320        """Check if a particular object is present by SHA1.
321
322        This method makes no distinction between loose and packed objects.
323        """
324        if self.contains_packed(sha) or self.contains_loose(sha):
325            return True
326        for alternate in self.alternates:
327            if sha in alternate:
328                return True
329        return False
330
331    def _add_cached_pack(self, base_name, pack):
332        """Add a newly appeared pack to the cache by path.
333
334        """
335        prev_pack = self._pack_cache.get(base_name)
336        if prev_pack is not pack:
337            self._pack_cache[base_name] = pack
338            if prev_pack:
339                prev_pack.close()
340
341    def _clear_cached_packs(self):
342        pack_cache = self._pack_cache
343        self._pack_cache = {}
344        while pack_cache:
345            (name, pack) = pack_cache.popitem()
346            pack.close()
347
348    def _iter_cached_packs(self):
349        return self._pack_cache.values()
350
351    def _update_pack_cache(self):
352        raise NotImplementedError(self._update_pack_cache)
353
354    def close(self):
355        self._clear_cached_packs()
356
357    @property
358    def packs(self):
359        """List with pack objects."""
360        return (
361            list(self._iter_cached_packs()) + list(self._update_pack_cache()))
362
363    def _iter_alternate_objects(self):
364        """Iterate over the SHAs of all the objects in alternate stores."""
365        for alternate in self.alternates:
366            for alternate_object in alternate:
367                yield alternate_object
368
369    def _iter_loose_objects(self):
370        """Iterate over the SHAs of all loose objects."""
371        raise NotImplementedError(self._iter_loose_objects)
372
373    def _get_loose_object(self, sha):
374        raise NotImplementedError(self._get_loose_object)
375
376    def _remove_loose_object(self, sha):
377        raise NotImplementedError(self._remove_loose_object)
378
379    def _remove_pack(self, name):
380        raise NotImplementedError(self._remove_pack)
381
382    def pack_loose_objects(self):
383        """Pack loose objects.
384
385        :return: Number of objects packed
386        """
387        objects = set()
388        for sha in self._iter_loose_objects():
389            objects.add((self._get_loose_object(sha), None))
390        self.add_objects(list(objects))
391        for obj, path in objects:
392            self._remove_loose_object(obj.id)
393        return len(objects)
394
395    def repack(self):
396        """Repack the packs in this repository.
397
398        Note that this implementation is fairly naive and currently keeps all
399        objects in memory while it repacks.
400        """
401        loose_objects = set()
402        for sha in self._iter_loose_objects():
403            loose_objects.add(self._get_loose_object(sha))
404        objects = {(obj, None) for obj in loose_objects}
405        old_packs = {p.name(): p for p in self.packs}
406        for name, pack in old_packs.items():
407            objects.update((obj, None) for obj in pack.iterobjects())
408
409        # The name of the consolidated pack might match the name of a
410        # pre-existing pack. Take care not to remove the newly created
411        # consolidated pack.
412
413        consolidated = self.add_objects(objects)
414        old_packs.pop(consolidated.name(), None)
415
416        for obj in loose_objects:
417            self._remove_loose_object(obj.id)
418        for name, pack in old_packs.items():
419            self._remove_pack(pack)
420        self._update_pack_cache()
421        return len(objects)
422
423    def __iter__(self):
424        """Iterate over the SHAs that are present in this store."""
425        self._update_pack_cache()
426        for pack in self._iter_cached_packs():
427            try:
428                for sha in pack:
429                    yield sha
430            except PackFileDisappeared:
431                pass
432        for sha in self._iter_loose_objects():
433            yield sha
434        for sha in self._iter_alternate_objects():
435            yield sha
436
437    def contains_loose(self, sha):
438        """Check if a particular object is present by SHA1 and is loose.
439
440        This does not check alternates.
441        """
442        return self._get_loose_object(sha) is not None
443
444    def get_raw(self, name):
445        """Obtain the raw fulltext for an object.
446
447        :param name: sha for the object.
448        :return: tuple with numeric type and object contents.
449        """
450        if name == ZERO_SHA:
451            raise KeyError(name)
452        if len(name) == 40:
453            sha = hex_to_sha(name)
454            hexsha = name
455        elif len(name) == 20:
456            sha = name
457            hexsha = None
458        else:
459            raise AssertionError("Invalid object name %r" % name)
460        for pack in self._iter_cached_packs():
461            try:
462                return pack.get_raw(sha)
463            except (KeyError, PackFileDisappeared):
464                pass
465        if hexsha is None:
466            hexsha = sha_to_hex(name)
467        ret = self._get_loose_object(hexsha)
468        if ret is not None:
469            return ret.type_num, ret.as_raw_string()
470        # Maybe something else has added a pack with the object
471        # in the mean time?
472        for pack in self._update_pack_cache():
473            try:
474                return pack.get_raw(sha)
475            except KeyError:
476                pass
477        for alternate in self.alternates:
478            try:
479                return alternate.get_raw(hexsha)
480            except KeyError:
481                pass
482        raise KeyError(hexsha)
483
484    def add_objects(self, objects, progress=None):
485        """Add a set of objects to this object store.
486
487        :param objects: Iterable over (object, path) tuples, should support
488            __len__.
489        :return: Pack object of the objects written.
490        """
491        return self.add_pack_data(
492                *pack_objects_to_data(objects),
493                progress=progress)
494
495
496class DiskObjectStore(PackBasedObjectStore):
497    """Git-style object store that exists on disk."""
498
499    def __init__(self, path):
500        """Open an object store.
501
502        :param path: Path of the object store.
503        """
504        super(DiskObjectStore, self).__init__()
505        self.path = path
506        self.pack_dir = os.path.join(self.path, PACKDIR)
507        self._alternates = None
508
509    def __repr__(self):
510        return "<%s(%r)>" % (self.__class__.__name__, self.path)
511
512    @property
513    def alternates(self):
514        if self._alternates is not None:
515            return self._alternates
516        self._alternates = []
517        for path in self._read_alternate_paths():
518            self._alternates.append(DiskObjectStore(path))
519        return self._alternates
520
521    def _read_alternate_paths(self):
522        try:
523            f = GitFile(os.path.join(self.path, INFODIR, "alternates"), 'rb')
524        except (OSError, IOError) as e:
525            if e.errno == errno.ENOENT:
526                return
527            raise
528        with f:
529            for line in f.readlines():
530                line = line.rstrip(b"\n")
531                if line[0] == b"#":
532                    continue
533                if os.path.isabs(line):
534                    yield line.decode(sys.getfilesystemencoding())
535                else:
536                    yield os.path.join(self.path, line).decode(
537                        sys.getfilesystemencoding())
538
539    def add_alternate_path(self, path):
540        """Add an alternate path to this object store.
541        """
542        try:
543            os.mkdir(os.path.join(self.path, INFODIR))
544        except OSError as e:
545            if e.errno != errno.EEXIST:
546                raise
547        alternates_path = os.path.join(self.path, INFODIR, "alternates")
548        with GitFile(alternates_path, 'wb') as f:
549            try:
550                orig_f = open(alternates_path, 'rb')
551            except (OSError, IOError) as e:
552                if e.errno != errno.ENOENT:
553                    raise
554            else:
555                with orig_f:
556                    f.write(orig_f.read())
557            f.write(path.encode(sys.getfilesystemencoding()) + b"\n")
558
559        if not os.path.isabs(path):
560            path = os.path.join(self.path, path)
561        self.alternates.append(DiskObjectStore(path))
562
563    def _update_pack_cache(self):
564        """Read and iterate over new pack files and cache them."""
565        try:
566            pack_dir_contents = os.listdir(self.pack_dir)
567        except OSError as e:
568            if e.errno == errno.ENOENT:
569                self.close()
570                return []
571            raise
572        pack_files = set()
573        for name in pack_dir_contents:
574            if name.startswith("pack-") and name.endswith(".pack"):
575                # verify that idx exists first (otherwise the pack was not yet
576                # fully written)
577                idx_name = os.path.splitext(name)[0] + ".idx"
578                if idx_name in pack_dir_contents:
579                    pack_name = name[:-len(".pack")]
580                    pack_files.add(pack_name)
581
582        # Open newly appeared pack files
583        new_packs = []
584        for f in pack_files:
585            if f not in self._pack_cache:
586                pack = Pack(os.path.join(self.pack_dir, f))
587                new_packs.append(pack)
588                self._pack_cache[f] = pack
589        # Remove disappeared pack files
590        for f in set(self._pack_cache) - pack_files:
591            self._pack_cache.pop(f).close()
592        return new_packs
593
594    def _get_shafile_path(self, sha):
595        # Check from object dir
596        return hex_to_filename(self.path, sha)
597
598    def _iter_loose_objects(self):
599        for base in os.listdir(self.path):
600            if len(base) != 2:
601                continue
602            for rest in os.listdir(os.path.join(self.path, base)):
603                yield (base+rest).encode(sys.getfilesystemencoding())
604
605    def _get_loose_object(self, sha):
606        path = self._get_shafile_path(sha)
607        try:
608            return ShaFile.from_path(path)
609        except (OSError, IOError) as e:
610            if e.errno == errno.ENOENT:
611                return None
612            raise
613
614    def _remove_loose_object(self, sha):
615        os.remove(self._get_shafile_path(sha))
616
617    def _remove_pack(self, pack):
618        try:
619            del self._pack_cache[os.path.basename(pack._basename)]
620        except KeyError:
621            pass
622        pack.close()
623        os.remove(pack.data.path)
624        os.remove(pack.index.path)
625
626    def _get_pack_basepath(self, entries):
627        suffix = iter_sha1(entry[0] for entry in entries)
628        # TODO: Handle self.pack_dir being bytes
629        suffix = suffix.decode('ascii')
630        return os.path.join(self.pack_dir, "pack-" + suffix)
631
632    def _complete_thin_pack(self, f, path, copier, indexer):
633        """Move a specific file containing a pack into the pack directory.
634
635        :note: The file should be on the same file system as the
636            packs directory.
637
638        :param f: Open file object for the pack.
639        :param path: Path to the pack file.
640        :param copier: A PackStreamCopier to use for writing pack data.
641        :param indexer: A PackIndexer for indexing the pack.
642        """
643        entries = list(indexer)
644
645        # Update the header with the new number of objects.
646        f.seek(0)
647        write_pack_header(f, len(entries) + len(indexer.ext_refs()))
648
649        # Must flush before reading (http://bugs.python.org/issue3207)
650        f.flush()
651
652        # Rescan the rest of the pack, computing the SHA with the new header.
653        new_sha = compute_file_sha(f, end_ofs=-20)
654
655        # Must reposition before writing (http://bugs.python.org/issue3207)
656        f.seek(0, os.SEEK_CUR)
657
658        # Complete the pack.
659        for ext_sha in indexer.ext_refs():
660            assert len(ext_sha) == 20
661            type_num, data = self.get_raw(ext_sha)
662            offset = f.tell()
663            crc32 = write_pack_object(f, type_num, data, sha=new_sha)
664            entries.append((ext_sha, offset, crc32))
665        pack_sha = new_sha.digest()
666        f.write(pack_sha)
667        f.close()
668
669        # Move the pack in.
670        entries.sort()
671        pack_base_name = self._get_pack_basepath(entries)
672        target_pack = pack_base_name + '.pack'
673        if sys.platform == 'win32':
674            # Windows might have the target pack file lingering. Attempt
675            # removal, silently passing if the target does not exist.
676            try:
677                os.remove(target_pack)
678            except (IOError, OSError) as e:
679                if e.errno != errno.ENOENT:
680                    raise
681        os.rename(path, target_pack)
682
683        # Write the index.
684        index_file = GitFile(pack_base_name + '.idx', 'wb')
685        try:
686            write_pack_index_v2(index_file, entries, pack_sha)
687            index_file.close()
688        finally:
689            index_file.abort()
690
691        # Add the pack to the store and return it.
692        final_pack = Pack(pack_base_name)
693        final_pack.check_length_and_checksum()
694        self._add_cached_pack(pack_base_name, final_pack)
695        return final_pack
696
697    def add_thin_pack(self, read_all, read_some):
698        """Add a new thin pack to this object store.
699
700        Thin packs are packs that contain deltas with parents that exist
701        outside the pack. They should never be placed in the object store
702        directly, and always indexed and completed as they are copied.
703
704        :param read_all: Read function that blocks until the number of
705            requested bytes are read.
706        :param read_some: Read function that returns at least one byte, but may
707            not return the number of bytes requested.
708        :return: A Pack object pointing at the now-completed thin pack in the
709            objects/pack directory.
710        """
711        fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
712        with os.fdopen(fd, 'w+b') as f:
713            indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
714            copier = PackStreamCopier(read_all, read_some, f,
715                                      delta_iter=indexer)
716            copier.verify()
717            return self._complete_thin_pack(f, path, copier, indexer)
718
719    def move_in_pack(self, path):
720        """Move a specific file containing a pack into the pack directory.
721
722        :note: The file should be on the same file system as the
723            packs directory.
724
725        :param path: Path to the pack file.
726        """
727        with PackData(path) as p:
728            entries = p.sorted_entries()
729            basename = self._get_pack_basepath(entries)
730            with GitFile(basename+".idx", "wb") as f:
731                write_pack_index_v2(f, entries, p.get_stored_checksum())
732        for pack in self.packs:
733            if pack._basename == basename:
734                return pack
735        target_pack = basename + '.pack'
736        if sys.platform == 'win32':
737            # Windows might have the target pack file lingering. Attempt
738            # removal, silently passing if the target does not exist.
739            try:
740                os.remove(target_pack)
741            except (IOError, OSError) as e:
742                if e.errno != errno.ENOENT:
743                    raise
744        os.rename(path, target_pack)
745        final_pack = Pack(basename)
746        self._add_cached_pack(basename, final_pack)
747        return final_pack
748
749    def add_pack(self):
750        """Add a new pack to this object store.
751
752        :return: Fileobject to write to, a commit function to
753            call when the pack is finished and an abort
754            function.
755        """
756        fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
757        f = os.fdopen(fd, 'wb')
758
759        def commit():
760            f.flush()
761            os.fsync(fd)
762            f.close()
763            if os.path.getsize(path) > 0:
764                return self.move_in_pack(path)
765            else:
766                os.remove(path)
767                return None
768
769        def abort():
770            f.close()
771            os.remove(path)
772        return f, commit, abort
773
774    def add_object(self, obj):
775        """Add a single object to this object store.
776
777        :param obj: Object to add
778        """
779        path = self._get_shafile_path(obj.id)
780        dir = os.path.dirname(path)
781        try:
782            os.mkdir(dir)
783        except OSError as e:
784            if e.errno != errno.EEXIST:
785                raise
786        if os.path.exists(path):
787            return  # Already there, no need to write again
788        with GitFile(path, 'wb') as f:
789            f.write(obj.as_legacy_object())
790
791    @classmethod
792    def init(cls, path):
793        try:
794            os.mkdir(path)
795        except OSError as e:
796            if e.errno != errno.EEXIST:
797                raise
798        os.mkdir(os.path.join(path, "info"))
799        os.mkdir(os.path.join(path, PACKDIR))
800        return cls(path)
801
802
803class MemoryObjectStore(BaseObjectStore):
804    """Object store that keeps all objects in memory."""
805
806    def __init__(self):
807        super(MemoryObjectStore, self).__init__()
808        self._data = {}
809
810    def _to_hexsha(self, sha):
811        if len(sha) == 40:
812            return sha
813        elif len(sha) == 20:
814            return sha_to_hex(sha)
815        else:
816            raise ValueError("Invalid sha %r" % (sha,))
817
818    def contains_loose(self, sha):
819        """Check if a particular object is present by SHA1 and is loose."""
820        return self._to_hexsha(sha) in self._data
821
822    def contains_packed(self, sha):
823        """Check if a particular object is present by SHA1 and is packed."""
824        return False
825
826    def __iter__(self):
827        """Iterate over the SHAs that are present in this store."""
828        return iter(self._data.keys())
829
830    @property
831    def packs(self):
832        """List with pack objects."""
833        return []
834
835    def get_raw(self, name):
836        """Obtain the raw text for an object.
837
838        :param name: sha for the object.
839        :return: tuple with numeric type and object contents.
840        """
841        obj = self[self._to_hexsha(name)]
842        return obj.type_num, obj.as_raw_string()
843
844    def __getitem__(self, name):
845        return self._data[self._to_hexsha(name)].copy()
846
847    def __delitem__(self, name):
848        """Delete an object from this store, for testing only."""
849        del self._data[self._to_hexsha(name)]
850
851    def add_object(self, obj):
852        """Add a single object to this object store.
853
854        """
855        self._data[obj.id] = obj.copy()
856
857    def add_objects(self, objects, progress=None):
858        """Add a set of objects to this object store.
859
860        :param objects: Iterable over a list of (object, path) tuples
861        """
862        for obj, path in objects:
863            self.add_object(obj)
864
865    def add_pack(self):
866        """Add a new pack to this object store.
867
868        Because this object store doesn't support packs, we extract and add the
869        individual objects.
870
871        :return: Fileobject to write to and a commit function to
872            call when the pack is finished.
873        """
874        f = BytesIO()
875
876        def commit():
877            p = PackData.from_file(BytesIO(f.getvalue()), f.tell())
878            f.close()
879            for obj in PackInflater.for_pack_data(p, self.get_raw):
880                self.add_object(obj)
881
882        def abort():
883            pass
884        return f, commit, abort
885
886    def _complete_thin_pack(self, f, indexer):
887        """Complete a thin pack by adding external references.
888
889        :param f: Open file object for the pack.
890        :param indexer: A PackIndexer for indexing the pack.
891        """
892        entries = list(indexer)
893
894        # Update the header with the new number of objects.
895        f.seek(0)
896        write_pack_header(f, len(entries) + len(indexer.ext_refs()))
897
898        # Rescan the rest of the pack, computing the SHA with the new header.
899        new_sha = compute_file_sha(f, end_ofs=-20)
900
901        # Complete the pack.
902        for ext_sha in indexer.ext_refs():
903            assert len(ext_sha) == 20
904            type_num, data = self.get_raw(ext_sha)
905            write_pack_object(f, type_num, data, sha=new_sha)
906        pack_sha = new_sha.digest()
907        f.write(pack_sha)
908
909    def add_thin_pack(self, read_all, read_some):
910        """Add a new thin pack to this object store.
911
912        Thin packs are packs that contain deltas with parents that exist
913        outside the pack. Because this object store doesn't support packs, we
914        extract and add the individual objects.
915
916        :param read_all: Read function that blocks until the number of
917            requested bytes are read.
918        :param read_some: Read function that returns at least one byte, but may
919            not return the number of bytes requested.
920        """
921        f, commit, abort = self.add_pack()
922        try:
923            indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
924            copier = PackStreamCopier(read_all, read_some, f,
925                                      delta_iter=indexer)
926            copier.verify()
927            self._complete_thin_pack(f, indexer)
928        except BaseException:
929            abort()
930            raise
931        else:
932            commit()
933
934
935class ObjectIterator(object):
936    """Interface for iterating over objects."""
937
938    def iterobjects(self):
939        raise NotImplementedError(self.iterobjects)
940
941
942class ObjectStoreIterator(ObjectIterator):
943    """ObjectIterator that works on top of an ObjectStore."""
944
945    def __init__(self, store, sha_iter):
946        """Create a new ObjectIterator.
947
948        :param store: Object store to retrieve from
949        :param sha_iter: Iterator over (sha, path) tuples
950        """
951        self.store = store
952        self.sha_iter = sha_iter
953        self._shas = []
954
955    def __iter__(self):
956        """Yield tuple with next object and path."""
957        for sha, path in self.itershas():
958            yield self.store[sha], path
959
960    def iterobjects(self):
961        """Iterate over just the objects."""
962        for o, path in self:
963            yield o
964
965    def itershas(self):
966        """Iterate over the SHAs."""
967        for sha in self._shas:
968            yield sha
969        for sha in self.sha_iter:
970            self._shas.append(sha)
971            yield sha
972
973    def __contains__(self, needle):
974        """Check if an object is present.
975
976        :note: This checks if the object is present in
977            the underlying object store, not if it would
978            be yielded by the iterator.
979
980        :param needle: SHA1 of the object to check for
981        """
982        if needle == ZERO_SHA:
983            return False
984        return needle in self.store
985
986    def __getitem__(self, key):
987        """Find an object by SHA1.
988
989        :note: This retrieves the object from the underlying
990            object store. It will also succeed if the object would
991            not be returned by the iterator.
992        """
993        return self.store[key]
994
995    def __len__(self):
996        """Return the number of objects."""
997        return len(list(self.itershas()))
998
999    def empty(self):
1000        iter = self.itershas()
1001        try:
1002            iter()
1003        except StopIteration:
1004            return True
1005        else:
1006            return False
1007
1008    def __bool__(self):
1009        """Indicate whether this object has contents."""
1010        return not self.empty()
1011
1012
1013def tree_lookup_path(lookup_obj, root_sha, path):
1014    """Look up an object in a Git tree.
1015
1016    :param lookup_obj: Callback for retrieving object by SHA1
1017    :param root_sha: SHA1 of the root tree
1018    :param path: Path to lookup
1019    :return: A tuple of (mode, SHA) of the resulting path.
1020    """
1021    tree = lookup_obj(root_sha)
1022    if not isinstance(tree, Tree):
1023        raise NotTreeError(root_sha)
1024    return tree.lookup_path(lookup_obj, path)
1025
1026
1027def _collect_filetree_revs(obj_store, tree_sha, kset):
1028    """Collect SHA1s of files and directories for specified tree.
1029
1030    :param obj_store: Object store to get objects by SHA from
1031    :param tree_sha: tree reference to walk
1032    :param kset: set to fill with references to files and directories
1033    """
1034    filetree = obj_store[tree_sha]
1035    for name, mode, sha in filetree.iteritems():
1036        if not S_ISGITLINK(mode) and sha not in kset:
1037            kset.add(sha)
1038            if stat.S_ISDIR(mode):
1039                _collect_filetree_revs(obj_store, sha, kset)
1040
1041
1042def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
1043    """Split object id list into three lists with commit, tag, and other SHAs.
1044
1045    Commits referenced by tags are included into commits
1046    list as well. Only SHA1s known in this repository will get
1047    through, and unless ignore_unknown argument is True, KeyError
1048    is thrown for SHA1 missing in the repository
1049
1050    :param obj_store: Object store to get objects by SHA1 from
1051    :param lst: Collection of commit and tag SHAs
1052    :param ignore_unknown: True to skip SHA1 missing in the repository
1053        silently.
1054    :return: A tuple of (commits, tags, others) SHA1s
1055    """
1056    commits = set()
1057    tags = set()
1058    others = set()
1059    for e in lst:
1060        try:
1061            o = obj_store[e]
1062        except KeyError:
1063            if not ignore_unknown:
1064                raise
1065        else:
1066            if isinstance(o, Commit):
1067                commits.add(e)
1068            elif isinstance(o, Tag):
1069                tags.add(e)
1070                tagged = o.object[1]
1071                c, t, o = _split_commits_and_tags(
1072                    obj_store, [tagged], ignore_unknown=ignore_unknown)
1073                commits |= c
1074                tags |= t
1075                others |= o
1076            else:
1077                others.add(e)
1078    return (commits, tags, others)
1079
1080
1081class MissingObjectFinder(object):
1082    """Find the objects missing from another object store.
1083
1084    :param object_store: Object store containing at least all objects to be
1085        sent
1086    :param haves: SHA1s of commits not to send (already present in target)
1087    :param wants: SHA1s of commits to send
1088    :param progress: Optional function to report progress to.
1089    :param get_tagged: Function that returns a dict of pointed-to sha -> tag
1090        sha for including tags.
1091    :param get_parents: Optional function for getting the parents of a commit.
1092    :param tagged: dict of pointed-to sha -> tag sha for including tags
1093    """
1094
1095    def __init__(self, object_store, haves, wants, progress=None,
1096                 get_tagged=None, get_parents=lambda commit: commit.parents):
1097        self.object_store = object_store
1098        self._get_parents = get_parents
1099        # process Commits and Tags differently
1100        # Note, while haves may list commits/tags not available locally,
1101        # and such SHAs would get filtered out by _split_commits_and_tags,
1102        # wants shall list only known SHAs, and otherwise
1103        # _split_commits_and_tags fails with KeyError
1104        have_commits, have_tags, have_others = (
1105            _split_commits_and_tags(object_store, haves, True))
1106        want_commits, want_tags, want_others = (
1107            _split_commits_and_tags(object_store, wants, False))
1108        # all_ancestors is a set of commits that shall not be sent
1109        # (complete repository up to 'haves')
1110        all_ancestors = object_store._collect_ancestors(
1111            have_commits, get_parents=self._get_parents)[0]
1112        # all_missing - complete set of commits between haves and wants
1113        # common - commits from all_ancestors we hit into while
1114        # traversing parent hierarchy of wants
1115        missing_commits, common_commits = object_store._collect_ancestors(
1116            want_commits, all_ancestors, get_parents=self._get_parents)
1117        self.sha_done = set()
1118        # Now, fill sha_done with commits and revisions of
1119        # files and directories known to be both locally
1120        # and on target. Thus these commits and files
1121        # won't get selected for fetch
1122        for h in common_commits:
1123            self.sha_done.add(h)
1124            cmt = object_store[h]
1125            _collect_filetree_revs(object_store, cmt.tree, self.sha_done)
1126        # record tags we have as visited, too
1127        for t in have_tags:
1128            self.sha_done.add(t)
1129
1130        missing_tags = want_tags.difference(have_tags)
1131        missing_others = want_others.difference(have_others)
1132        # in fact, what we 'want' is commits, tags, and others
1133        # we've found missing
1134        wants = missing_commits.union(missing_tags)
1135        wants = wants.union(missing_others)
1136
1137        self.objects_to_send = set([(w, None, False) for w in wants])
1138
1139        if progress is None:
1140            self.progress = lambda x: None
1141        else:
1142            self.progress = progress
1143        self._tagged = get_tagged and get_tagged() or {}
1144
1145    def add_todo(self, entries):
1146        self.objects_to_send.update([e for e in entries
1147                                     if not e[0] in self.sha_done])
1148
1149    def next(self):
1150        while True:
1151            if not self.objects_to_send:
1152                return None
1153            (sha, name, leaf) = self.objects_to_send.pop()
1154            if sha not in self.sha_done:
1155                break
1156        if not leaf:
1157            o = self.object_store[sha]
1158            if isinstance(o, Commit):
1159                self.add_todo([(o.tree, "", False)])
1160            elif isinstance(o, Tree):
1161                self.add_todo([(s, n, not stat.S_ISDIR(m))
1162                               for n, m, s in o.iteritems()
1163                               if not S_ISGITLINK(m)])
1164            elif isinstance(o, Tag):
1165                self.add_todo([(o.object[1], None, False)])
1166        if sha in self._tagged:
1167            self.add_todo([(self._tagged[sha], None, True)])
1168        self.sha_done.add(sha)
1169        self.progress(("counting objects: %d\r" %
1170                       len(self.sha_done)).encode('ascii'))
1171        return (sha, name)
1172
1173    __next__ = next
1174
1175
1176class ObjectStoreGraphWalker(object):
1177    """Graph walker that finds what commits are missing from an object store.
1178
1179    :ivar heads: Revisions without descendants in the local repo
1180    :ivar get_parents: Function to retrieve parents in the local repo
1181    """
1182
1183    def __init__(self, local_heads, get_parents, shallow=None):
1184        """Create a new instance.
1185
1186        :param local_heads: Heads to start search with
1187        :param get_parents: Function for finding the parents of a SHA1.
1188        """
1189        self.heads = set(local_heads)
1190        self.get_parents = get_parents
1191        self.parents = {}
1192        if shallow is None:
1193            shallow = set()
1194        self.shallow = shallow
1195
1196    def ack(self, sha):
1197        """Ack that a revision and its ancestors are present in the source."""
1198        if len(sha) != 40:
1199            raise ValueError("unexpected sha %r received" % sha)
1200        ancestors = set([sha])
1201
1202        # stop if we run out of heads to remove
1203        while self.heads:
1204            for a in ancestors:
1205                if a in self.heads:
1206                    self.heads.remove(a)
1207
1208            # collect all ancestors
1209            new_ancestors = set()
1210            for a in ancestors:
1211                ps = self.parents.get(a)
1212                if ps is not None:
1213                    new_ancestors.update(ps)
1214                self.parents[a] = None
1215
1216            # no more ancestors; stop
1217            if not new_ancestors:
1218                break
1219
1220            ancestors = new_ancestors
1221
1222    def next(self):
1223        """Iterate over ancestors of heads in the target."""
1224        if self.heads:
1225            ret = self.heads.pop()
1226            ps = self.get_parents(ret)
1227            self.parents[ret] = ps
1228            self.heads.update(
1229                [p for p in ps if p not in self.parents])
1230            return ret
1231        return None
1232
1233    __next__ = next
1234
1235
1236def commit_tree_changes(object_store, tree, changes):
1237    """Commit a specified set of changes to a tree structure.
1238
1239    This will apply a set of changes on top of an existing tree, storing new
1240    objects in object_store.
1241
1242    changes are a list of tuples with (path, mode, object_sha).
1243    Paths can be both blobs and trees. See the mode and
1244    object sha to None deletes the path.
1245
1246    This method works especially well if there are only a small
1247    number of changes to a big tree. For a large number of changes
1248    to a large tree, use e.g. commit_tree.
1249
1250    :param object_store: Object store to store new objects in
1251        and retrieve old ones from.
1252    :param tree: Original tree root
1253    :param changes: changes to apply
1254    :return: New tree root object
1255    """
1256    # TODO(user): Save up the objects and add them using .add_objects
1257    # rather than with individual calls to .add_object.
1258    nested_changes = {}
1259    for (path, new_mode, new_sha) in changes:
1260        try:
1261            (dirname, subpath) = path.split(b'/', 1)
1262        except ValueError:
1263            if new_sha is None:
1264                del tree[path]
1265            else:
1266                tree[path] = (new_mode, new_sha)
1267        else:
1268            nested_changes.setdefault(dirname, []).append(
1269                (subpath, new_mode, new_sha))
1270    for name, subchanges in nested_changes.items():
1271        try:
1272            orig_subtree = object_store[tree[name][1]]
1273        except KeyError:
1274            orig_subtree = Tree()
1275        subtree = commit_tree_changes(object_store, orig_subtree, subchanges)
1276        if len(subtree) == 0:
1277            del tree[name]
1278        else:
1279            tree[name] = (stat.S_IFDIR, subtree.id)
1280    object_store.add_object(tree)
1281    return tree
1282
1283
1284class OverlayObjectStore(BaseObjectStore):
1285    """Object store that can overlay multiple object stores."""
1286
1287    def __init__(self, bases, add_store=None):
1288        self.bases = bases
1289        self.add_store = add_store
1290
1291    def add_object(self, object):
1292        if self.add_store is None:
1293            raise NotImplementedError(self.add_object)
1294        return self.add_store.add_object(object)
1295
1296    def add_objects(self, objects, progress=None):
1297        if self.add_store is None:
1298            raise NotImplementedError(self.add_object)
1299        return self.add_store.add_objects(objects, progress)
1300
1301    @property
1302    def packs(self):
1303        ret = []
1304        for b in self.bases:
1305            ret.extend(b.packs)
1306        return ret
1307
1308    def __iter__(self):
1309        done = set()
1310        for b in self.bases:
1311            for o_id in b:
1312                if o_id not in done:
1313                    yield o_id
1314                    done.add(o_id)
1315
1316    def get_raw(self, sha_id):
1317        for b in self.bases:
1318            try:
1319                return b.get_raw(sha_id)
1320            except KeyError:
1321                pass
1322        else:
1323            raise KeyError(sha_id)
1324
1325    def contains_packed(self, sha):
1326        for b in self.bases:
1327            if b.contains_packed(sha):
1328                return True
1329        else:
1330            return False
1331
1332    def contains_loose(self, sha):
1333        for b in self.bases:
1334            if b.contains_loose(sha):
1335                return True
1336        else:
1337            return False
1338
1339
1340def read_packs_file(f):
1341    """Yield the packs listed in a packs file."""
1342    for line in f.read().splitlines():
1343        if not line:
1344            continue
1345        (kind, name) = line.split(b" ", 1)
1346        if kind != b"P":
1347            continue
1348        yield name.decode(sys.getfilesystemencoding())
1349