1# object_store.py -- Object store for git objects 2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 3# and others 4# 5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 6# General Public License as public by the Free Software Foundation; version 2.0 7# or (at your option) any later version. You can redistribute it and/or 8# modify it under the terms of either of these two licenses. 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# 16# You should have received a copy of the licenses; if not, see 17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 19# License, Version 2.0. 20# 21 22 23"""Git object store interfaces and implementation.""" 24 25from io import BytesIO 26import errno 27import os 28import stat 29import sys 30import tempfile 31 32from dulwich.diff_tree import ( 33 tree_changes, 34 walk_trees, 35 ) 36from dulwich.errors import ( 37 NotTreeError, 38 ) 39from dulwich.file import GitFile 40from dulwich.objects import ( 41 Commit, 42 ShaFile, 43 Tag, 44 Tree, 45 ZERO_SHA, 46 hex_to_sha, 47 sha_to_hex, 48 hex_to_filename, 49 S_ISGITLINK, 50 object_class, 51 ) 52from dulwich.pack import ( 53 Pack, 54 PackData, 55 PackInflater, 56 PackFileDisappeared, 57 iter_sha1, 58 pack_objects_to_data, 59 write_pack_header, 60 write_pack_index_v2, 61 write_pack_data, 62 write_pack_object, 63 compute_file_sha, 64 PackIndexer, 65 PackStreamCopier, 66 ) 67from dulwich.refs import ANNOTATED_TAG_SUFFIX 68 69INFODIR = 'info' 70PACKDIR = 'pack' 71 72 73class BaseObjectStore(object): 74 """Object store interface.""" 75 76 def determine_wants_all(self, refs): 77 return [sha for (ref, sha) in refs.items() 78 if sha not in self and 79 not ref.endswith(ANNOTATED_TAG_SUFFIX) and 80 not sha == ZERO_SHA] 81 82 def iter_shas(self, shas): 83 """Iterate over the objects for the specified shas. 84 85 :param shas: Iterable object with SHAs 86 :return: Object iterator 87 """ 88 return ObjectStoreIterator(self, shas) 89 90 def contains_loose(self, sha): 91 """Check if a particular object is present by SHA1 and is loose.""" 92 raise NotImplementedError(self.contains_loose) 93 94 def contains_packed(self, sha): 95 """Check if a particular object is present by SHA1 and is packed.""" 96 raise NotImplementedError(self.contains_packed) 97 98 def __contains__(self, sha): 99 """Check if a particular object is present by SHA1. 100 101 This method makes no distinction between loose and packed objects. 102 """ 103 return self.contains_packed(sha) or self.contains_loose(sha) 104 105 @property 106 def packs(self): 107 """Iterable of pack objects.""" 108 raise NotImplementedError 109 110 def get_raw(self, name): 111 """Obtain the raw text for an object. 112 113 :param name: sha for the object. 114 :return: tuple with numeric type and object contents. 115 """ 116 raise NotImplementedError(self.get_raw) 117 118 def __getitem__(self, sha): 119 """Obtain an object by SHA1.""" 120 type_num, uncomp = self.get_raw(sha) 121 return ShaFile.from_raw_string(type_num, uncomp, sha=sha) 122 123 def __iter__(self): 124 """Iterate over the SHAs that are present in this store.""" 125 raise NotImplementedError(self.__iter__) 126 127 def add_object(self, obj): 128 """Add a single object to this object store. 129 130 """ 131 raise NotImplementedError(self.add_object) 132 133 def add_objects(self, objects, progress=None): 134 """Add a set of objects to this object store. 135 136 :param objects: Iterable over a list of (object, path) tuples 137 """ 138 raise NotImplementedError(self.add_objects) 139 140 def add_pack_data(self, count, pack_data, progress=None): 141 """Add pack data to this object store. 142 143 :param num_items: Number of items to add 144 :param pack_data: Iterator over pack data tuples 145 """ 146 if count == 0: 147 # Don't bother writing an empty pack file 148 return 149 f, commit, abort = self.add_pack() 150 try: 151 write_pack_data(f, count, pack_data, progress) 152 except BaseException: 153 abort() 154 raise 155 else: 156 return commit() 157 158 def tree_changes(self, source, target, want_unchanged=False, 159 include_trees=False, change_type_same=False): 160 """Find the differences between the contents of two trees 161 162 :param source: SHA1 of the source tree 163 :param target: SHA1 of the target tree 164 :param want_unchanged: Whether unchanged files should be reported 165 :param include_trees: Whether to include trees 166 :param change_type_same: Whether to report files changing 167 type in the same entry. 168 :return: Iterator over tuples with 169 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 170 """ 171 for change in tree_changes(self, source, target, 172 want_unchanged=want_unchanged, 173 include_trees=include_trees, 174 change_type_same=change_type_same): 175 yield ((change.old.path, change.new.path), 176 (change.old.mode, change.new.mode), 177 (change.old.sha, change.new.sha)) 178 179 def iter_tree_contents(self, tree_id, include_trees=False): 180 """Iterate the contents of a tree and all subtrees. 181 182 Iteration is depth-first pre-order, as in e.g. os.walk. 183 184 :param tree_id: SHA1 of the tree. 185 :param include_trees: If True, include tree objects in the iteration. 186 :return: Iterator over TreeEntry namedtuples for all the objects in a 187 tree. 188 """ 189 for entry, _ in walk_trees(self, tree_id, None): 190 if ((entry.mode is not None and 191 not stat.S_ISDIR(entry.mode)) or include_trees): 192 yield entry 193 194 def find_missing_objects(self, haves, wants, progress=None, 195 get_tagged=None, 196 get_parents=lambda commit: commit.parents, 197 depth=None): 198 """Find the missing objects required for a set of revisions. 199 200 :param haves: Iterable over SHAs already in common. 201 :param wants: Iterable over SHAs of objects to fetch. 202 :param progress: Simple progress function that will be called with 203 updated progress strings. 204 :param get_tagged: Function that returns a dict of pointed-to sha -> 205 tag sha for including tags. 206 :param get_parents: Optional function for getting the parents of a 207 commit. 208 :return: Iterator over (sha, path) pairs. 209 """ 210 finder = MissingObjectFinder(self, haves, wants, progress, get_tagged, 211 get_parents=get_parents) 212 return iter(finder.next, None) 213 214 def find_common_revisions(self, graphwalker): 215 """Find which revisions this store has in common using graphwalker. 216 217 :param graphwalker: A graphwalker object. 218 :return: List of SHAs that are in common 219 """ 220 haves = [] 221 sha = next(graphwalker) 222 while sha: 223 if sha in self: 224 haves.append(sha) 225 graphwalker.ack(sha) 226 sha = next(graphwalker) 227 return haves 228 229 def generate_pack_contents(self, have, want, progress=None): 230 """Iterate over the contents of a pack file. 231 232 :param have: List of SHA1s of objects that should not be sent 233 :param want: List of SHA1s of objects that should be sent 234 :param progress: Optional progress reporting method 235 """ 236 return self.iter_shas(self.find_missing_objects(have, want, progress)) 237 238 def generate_pack_data(self, have, want, progress=None, ofs_delta=True): 239 """Generate pack data objects for a set of wants/haves. 240 241 :param have: List of SHA1s of objects that should not be sent 242 :param want: List of SHA1s of objects that should be sent 243 :param ofs_delta: Whether OFS deltas can be included 244 :param progress: Optional progress reporting method 245 """ 246 # TODO(user): More efficient implementation 247 return pack_objects_to_data( 248 self.generate_pack_contents(have, want, progress)) 249 250 def peel_sha(self, sha): 251 """Peel all tags from a SHA. 252 253 :param sha: The object SHA to peel. 254 :return: The fully-peeled SHA1 of a tag object, after peeling all 255 intermediate tags; if the original ref does not point to a tag, 256 this will equal the original SHA1. 257 """ 258 obj = self[sha] 259 obj_class = object_class(obj.type_name) 260 while obj_class is Tag: 261 obj_class, sha = obj.object 262 obj = self[sha] 263 return obj 264 265 def _collect_ancestors(self, heads, common=set(), 266 get_parents=lambda commit: commit.parents): 267 """Collect all ancestors of heads up to (excluding) those in common. 268 269 :param heads: commits to start from 270 :param common: commits to end at, or empty set to walk repository 271 completely 272 :param get_parents: Optional function for getting the parents of a 273 commit. 274 :return: a tuple (A, B) where A - all commits reachable 275 from heads but not present in common, B - common (shared) elements 276 that are directly reachable from heads 277 """ 278 bases = set() 279 commits = set() 280 queue = [] 281 queue.extend(heads) 282 while queue: 283 e = queue.pop(0) 284 if e in common: 285 bases.add(e) 286 elif e not in commits: 287 commits.add(e) 288 cmt = self[e] 289 queue.extend(get_parents(cmt)) 290 return (commits, bases) 291 292 def close(self): 293 """Close any files opened by this object store.""" 294 # Default implementation is a NO-OP 295 296 297class PackBasedObjectStore(BaseObjectStore): 298 299 def __init__(self): 300 self._pack_cache = {} 301 302 @property 303 def alternates(self): 304 return [] 305 306 def contains_packed(self, sha): 307 """Check if a particular object is present by SHA1 and is packed. 308 309 This does not check alternates. 310 """ 311 for pack in self.packs: 312 try: 313 if sha in pack: 314 return True 315 except PackFileDisappeared: 316 pass 317 return False 318 319 def __contains__(self, sha): 320 """Check if a particular object is present by SHA1. 321 322 This method makes no distinction between loose and packed objects. 323 """ 324 if self.contains_packed(sha) or self.contains_loose(sha): 325 return True 326 for alternate in self.alternates: 327 if sha in alternate: 328 return True 329 return False 330 331 def _add_cached_pack(self, base_name, pack): 332 """Add a newly appeared pack to the cache by path. 333 334 """ 335 prev_pack = self._pack_cache.get(base_name) 336 if prev_pack is not pack: 337 self._pack_cache[base_name] = pack 338 if prev_pack: 339 prev_pack.close() 340 341 def _clear_cached_packs(self): 342 pack_cache = self._pack_cache 343 self._pack_cache = {} 344 while pack_cache: 345 (name, pack) = pack_cache.popitem() 346 pack.close() 347 348 def _iter_cached_packs(self): 349 return self._pack_cache.values() 350 351 def _update_pack_cache(self): 352 raise NotImplementedError(self._update_pack_cache) 353 354 def close(self): 355 self._clear_cached_packs() 356 357 @property 358 def packs(self): 359 """List with pack objects.""" 360 return ( 361 list(self._iter_cached_packs()) + list(self._update_pack_cache())) 362 363 def _iter_alternate_objects(self): 364 """Iterate over the SHAs of all the objects in alternate stores.""" 365 for alternate in self.alternates: 366 for alternate_object in alternate: 367 yield alternate_object 368 369 def _iter_loose_objects(self): 370 """Iterate over the SHAs of all loose objects.""" 371 raise NotImplementedError(self._iter_loose_objects) 372 373 def _get_loose_object(self, sha): 374 raise NotImplementedError(self._get_loose_object) 375 376 def _remove_loose_object(self, sha): 377 raise NotImplementedError(self._remove_loose_object) 378 379 def _remove_pack(self, name): 380 raise NotImplementedError(self._remove_pack) 381 382 def pack_loose_objects(self): 383 """Pack loose objects. 384 385 :return: Number of objects packed 386 """ 387 objects = set() 388 for sha in self._iter_loose_objects(): 389 objects.add((self._get_loose_object(sha), None)) 390 self.add_objects(list(objects)) 391 for obj, path in objects: 392 self._remove_loose_object(obj.id) 393 return len(objects) 394 395 def repack(self): 396 """Repack the packs in this repository. 397 398 Note that this implementation is fairly naive and currently keeps all 399 objects in memory while it repacks. 400 """ 401 loose_objects = set() 402 for sha in self._iter_loose_objects(): 403 loose_objects.add(self._get_loose_object(sha)) 404 objects = {(obj, None) for obj in loose_objects} 405 old_packs = {p.name(): p for p in self.packs} 406 for name, pack in old_packs.items(): 407 objects.update((obj, None) for obj in pack.iterobjects()) 408 409 # The name of the consolidated pack might match the name of a 410 # pre-existing pack. Take care not to remove the newly created 411 # consolidated pack. 412 413 consolidated = self.add_objects(objects) 414 old_packs.pop(consolidated.name(), None) 415 416 for obj in loose_objects: 417 self._remove_loose_object(obj.id) 418 for name, pack in old_packs.items(): 419 self._remove_pack(pack) 420 self._update_pack_cache() 421 return len(objects) 422 423 def __iter__(self): 424 """Iterate over the SHAs that are present in this store.""" 425 self._update_pack_cache() 426 for pack in self._iter_cached_packs(): 427 try: 428 for sha in pack: 429 yield sha 430 except PackFileDisappeared: 431 pass 432 for sha in self._iter_loose_objects(): 433 yield sha 434 for sha in self._iter_alternate_objects(): 435 yield sha 436 437 def contains_loose(self, sha): 438 """Check if a particular object is present by SHA1 and is loose. 439 440 This does not check alternates. 441 """ 442 return self._get_loose_object(sha) is not None 443 444 def get_raw(self, name): 445 """Obtain the raw fulltext for an object. 446 447 :param name: sha for the object. 448 :return: tuple with numeric type and object contents. 449 """ 450 if name == ZERO_SHA: 451 raise KeyError(name) 452 if len(name) == 40: 453 sha = hex_to_sha(name) 454 hexsha = name 455 elif len(name) == 20: 456 sha = name 457 hexsha = None 458 else: 459 raise AssertionError("Invalid object name %r" % name) 460 for pack in self._iter_cached_packs(): 461 try: 462 return pack.get_raw(sha) 463 except (KeyError, PackFileDisappeared): 464 pass 465 if hexsha is None: 466 hexsha = sha_to_hex(name) 467 ret = self._get_loose_object(hexsha) 468 if ret is not None: 469 return ret.type_num, ret.as_raw_string() 470 # Maybe something else has added a pack with the object 471 # in the mean time? 472 for pack in self._update_pack_cache(): 473 try: 474 return pack.get_raw(sha) 475 except KeyError: 476 pass 477 for alternate in self.alternates: 478 try: 479 return alternate.get_raw(hexsha) 480 except KeyError: 481 pass 482 raise KeyError(hexsha) 483 484 def add_objects(self, objects, progress=None): 485 """Add a set of objects to this object store. 486 487 :param objects: Iterable over (object, path) tuples, should support 488 __len__. 489 :return: Pack object of the objects written. 490 """ 491 return self.add_pack_data( 492 *pack_objects_to_data(objects), 493 progress=progress) 494 495 496class DiskObjectStore(PackBasedObjectStore): 497 """Git-style object store that exists on disk.""" 498 499 def __init__(self, path): 500 """Open an object store. 501 502 :param path: Path of the object store. 503 """ 504 super(DiskObjectStore, self).__init__() 505 self.path = path 506 self.pack_dir = os.path.join(self.path, PACKDIR) 507 self._alternates = None 508 509 def __repr__(self): 510 return "<%s(%r)>" % (self.__class__.__name__, self.path) 511 512 @property 513 def alternates(self): 514 if self._alternates is not None: 515 return self._alternates 516 self._alternates = [] 517 for path in self._read_alternate_paths(): 518 self._alternates.append(DiskObjectStore(path)) 519 return self._alternates 520 521 def _read_alternate_paths(self): 522 try: 523 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), 'rb') 524 except (OSError, IOError) as e: 525 if e.errno == errno.ENOENT: 526 return 527 raise 528 with f: 529 for line in f.readlines(): 530 line = line.rstrip(b"\n") 531 if line[0] == b"#": 532 continue 533 if os.path.isabs(line): 534 yield line.decode(sys.getfilesystemencoding()) 535 else: 536 yield os.path.join(self.path, line).decode( 537 sys.getfilesystemencoding()) 538 539 def add_alternate_path(self, path): 540 """Add an alternate path to this object store. 541 """ 542 try: 543 os.mkdir(os.path.join(self.path, INFODIR)) 544 except OSError as e: 545 if e.errno != errno.EEXIST: 546 raise 547 alternates_path = os.path.join(self.path, INFODIR, "alternates") 548 with GitFile(alternates_path, 'wb') as f: 549 try: 550 orig_f = open(alternates_path, 'rb') 551 except (OSError, IOError) as e: 552 if e.errno != errno.ENOENT: 553 raise 554 else: 555 with orig_f: 556 f.write(orig_f.read()) 557 f.write(path.encode(sys.getfilesystemencoding()) + b"\n") 558 559 if not os.path.isabs(path): 560 path = os.path.join(self.path, path) 561 self.alternates.append(DiskObjectStore(path)) 562 563 def _update_pack_cache(self): 564 """Read and iterate over new pack files and cache them.""" 565 try: 566 pack_dir_contents = os.listdir(self.pack_dir) 567 except OSError as e: 568 if e.errno == errno.ENOENT: 569 self.close() 570 return [] 571 raise 572 pack_files = set() 573 for name in pack_dir_contents: 574 if name.startswith("pack-") and name.endswith(".pack"): 575 # verify that idx exists first (otherwise the pack was not yet 576 # fully written) 577 idx_name = os.path.splitext(name)[0] + ".idx" 578 if idx_name in pack_dir_contents: 579 pack_name = name[:-len(".pack")] 580 pack_files.add(pack_name) 581 582 # Open newly appeared pack files 583 new_packs = [] 584 for f in pack_files: 585 if f not in self._pack_cache: 586 pack = Pack(os.path.join(self.pack_dir, f)) 587 new_packs.append(pack) 588 self._pack_cache[f] = pack 589 # Remove disappeared pack files 590 for f in set(self._pack_cache) - pack_files: 591 self._pack_cache.pop(f).close() 592 return new_packs 593 594 def _get_shafile_path(self, sha): 595 # Check from object dir 596 return hex_to_filename(self.path, sha) 597 598 def _iter_loose_objects(self): 599 for base in os.listdir(self.path): 600 if len(base) != 2: 601 continue 602 for rest in os.listdir(os.path.join(self.path, base)): 603 yield (base+rest).encode(sys.getfilesystemencoding()) 604 605 def _get_loose_object(self, sha): 606 path = self._get_shafile_path(sha) 607 try: 608 return ShaFile.from_path(path) 609 except (OSError, IOError) as e: 610 if e.errno == errno.ENOENT: 611 return None 612 raise 613 614 def _remove_loose_object(self, sha): 615 os.remove(self._get_shafile_path(sha)) 616 617 def _remove_pack(self, pack): 618 try: 619 del self._pack_cache[os.path.basename(pack._basename)] 620 except KeyError: 621 pass 622 pack.close() 623 os.remove(pack.data.path) 624 os.remove(pack.index.path) 625 626 def _get_pack_basepath(self, entries): 627 suffix = iter_sha1(entry[0] for entry in entries) 628 # TODO: Handle self.pack_dir being bytes 629 suffix = suffix.decode('ascii') 630 return os.path.join(self.pack_dir, "pack-" + suffix) 631 632 def _complete_thin_pack(self, f, path, copier, indexer): 633 """Move a specific file containing a pack into the pack directory. 634 635 :note: The file should be on the same file system as the 636 packs directory. 637 638 :param f: Open file object for the pack. 639 :param path: Path to the pack file. 640 :param copier: A PackStreamCopier to use for writing pack data. 641 :param indexer: A PackIndexer for indexing the pack. 642 """ 643 entries = list(indexer) 644 645 # Update the header with the new number of objects. 646 f.seek(0) 647 write_pack_header(f, len(entries) + len(indexer.ext_refs())) 648 649 # Must flush before reading (http://bugs.python.org/issue3207) 650 f.flush() 651 652 # Rescan the rest of the pack, computing the SHA with the new header. 653 new_sha = compute_file_sha(f, end_ofs=-20) 654 655 # Must reposition before writing (http://bugs.python.org/issue3207) 656 f.seek(0, os.SEEK_CUR) 657 658 # Complete the pack. 659 for ext_sha in indexer.ext_refs(): 660 assert len(ext_sha) == 20 661 type_num, data = self.get_raw(ext_sha) 662 offset = f.tell() 663 crc32 = write_pack_object(f, type_num, data, sha=new_sha) 664 entries.append((ext_sha, offset, crc32)) 665 pack_sha = new_sha.digest() 666 f.write(pack_sha) 667 f.close() 668 669 # Move the pack in. 670 entries.sort() 671 pack_base_name = self._get_pack_basepath(entries) 672 target_pack = pack_base_name + '.pack' 673 if sys.platform == 'win32': 674 # Windows might have the target pack file lingering. Attempt 675 # removal, silently passing if the target does not exist. 676 try: 677 os.remove(target_pack) 678 except (IOError, OSError) as e: 679 if e.errno != errno.ENOENT: 680 raise 681 os.rename(path, target_pack) 682 683 # Write the index. 684 index_file = GitFile(pack_base_name + '.idx', 'wb') 685 try: 686 write_pack_index_v2(index_file, entries, pack_sha) 687 index_file.close() 688 finally: 689 index_file.abort() 690 691 # Add the pack to the store and return it. 692 final_pack = Pack(pack_base_name) 693 final_pack.check_length_and_checksum() 694 self._add_cached_pack(pack_base_name, final_pack) 695 return final_pack 696 697 def add_thin_pack(self, read_all, read_some): 698 """Add a new thin pack to this object store. 699 700 Thin packs are packs that contain deltas with parents that exist 701 outside the pack. They should never be placed in the object store 702 directly, and always indexed and completed as they are copied. 703 704 :param read_all: Read function that blocks until the number of 705 requested bytes are read. 706 :param read_some: Read function that returns at least one byte, but may 707 not return the number of bytes requested. 708 :return: A Pack object pointing at the now-completed thin pack in the 709 objects/pack directory. 710 """ 711 fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_') 712 with os.fdopen(fd, 'w+b') as f: 713 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) 714 copier = PackStreamCopier(read_all, read_some, f, 715 delta_iter=indexer) 716 copier.verify() 717 return self._complete_thin_pack(f, path, copier, indexer) 718 719 def move_in_pack(self, path): 720 """Move a specific file containing a pack into the pack directory. 721 722 :note: The file should be on the same file system as the 723 packs directory. 724 725 :param path: Path to the pack file. 726 """ 727 with PackData(path) as p: 728 entries = p.sorted_entries() 729 basename = self._get_pack_basepath(entries) 730 with GitFile(basename+".idx", "wb") as f: 731 write_pack_index_v2(f, entries, p.get_stored_checksum()) 732 for pack in self.packs: 733 if pack._basename == basename: 734 return pack 735 target_pack = basename + '.pack' 736 if sys.platform == 'win32': 737 # Windows might have the target pack file lingering. Attempt 738 # removal, silently passing if the target does not exist. 739 try: 740 os.remove(target_pack) 741 except (IOError, OSError) as e: 742 if e.errno != errno.ENOENT: 743 raise 744 os.rename(path, target_pack) 745 final_pack = Pack(basename) 746 self._add_cached_pack(basename, final_pack) 747 return final_pack 748 749 def add_pack(self): 750 """Add a new pack to this object store. 751 752 :return: Fileobject to write to, a commit function to 753 call when the pack is finished and an abort 754 function. 755 """ 756 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 757 f = os.fdopen(fd, 'wb') 758 759 def commit(): 760 f.flush() 761 os.fsync(fd) 762 f.close() 763 if os.path.getsize(path) > 0: 764 return self.move_in_pack(path) 765 else: 766 os.remove(path) 767 return None 768 769 def abort(): 770 f.close() 771 os.remove(path) 772 return f, commit, abort 773 774 def add_object(self, obj): 775 """Add a single object to this object store. 776 777 :param obj: Object to add 778 """ 779 path = self._get_shafile_path(obj.id) 780 dir = os.path.dirname(path) 781 try: 782 os.mkdir(dir) 783 except OSError as e: 784 if e.errno != errno.EEXIST: 785 raise 786 if os.path.exists(path): 787 return # Already there, no need to write again 788 with GitFile(path, 'wb') as f: 789 f.write(obj.as_legacy_object()) 790 791 @classmethod 792 def init(cls, path): 793 try: 794 os.mkdir(path) 795 except OSError as e: 796 if e.errno != errno.EEXIST: 797 raise 798 os.mkdir(os.path.join(path, "info")) 799 os.mkdir(os.path.join(path, PACKDIR)) 800 return cls(path) 801 802 803class MemoryObjectStore(BaseObjectStore): 804 """Object store that keeps all objects in memory.""" 805 806 def __init__(self): 807 super(MemoryObjectStore, self).__init__() 808 self._data = {} 809 810 def _to_hexsha(self, sha): 811 if len(sha) == 40: 812 return sha 813 elif len(sha) == 20: 814 return sha_to_hex(sha) 815 else: 816 raise ValueError("Invalid sha %r" % (sha,)) 817 818 def contains_loose(self, sha): 819 """Check if a particular object is present by SHA1 and is loose.""" 820 return self._to_hexsha(sha) in self._data 821 822 def contains_packed(self, sha): 823 """Check if a particular object is present by SHA1 and is packed.""" 824 return False 825 826 def __iter__(self): 827 """Iterate over the SHAs that are present in this store.""" 828 return iter(self._data.keys()) 829 830 @property 831 def packs(self): 832 """List with pack objects.""" 833 return [] 834 835 def get_raw(self, name): 836 """Obtain the raw text for an object. 837 838 :param name: sha for the object. 839 :return: tuple with numeric type and object contents. 840 """ 841 obj = self[self._to_hexsha(name)] 842 return obj.type_num, obj.as_raw_string() 843 844 def __getitem__(self, name): 845 return self._data[self._to_hexsha(name)].copy() 846 847 def __delitem__(self, name): 848 """Delete an object from this store, for testing only.""" 849 del self._data[self._to_hexsha(name)] 850 851 def add_object(self, obj): 852 """Add a single object to this object store. 853 854 """ 855 self._data[obj.id] = obj.copy() 856 857 def add_objects(self, objects, progress=None): 858 """Add a set of objects to this object store. 859 860 :param objects: Iterable over a list of (object, path) tuples 861 """ 862 for obj, path in objects: 863 self.add_object(obj) 864 865 def add_pack(self): 866 """Add a new pack to this object store. 867 868 Because this object store doesn't support packs, we extract and add the 869 individual objects. 870 871 :return: Fileobject to write to and a commit function to 872 call when the pack is finished. 873 """ 874 f = BytesIO() 875 876 def commit(): 877 p = PackData.from_file(BytesIO(f.getvalue()), f.tell()) 878 f.close() 879 for obj in PackInflater.for_pack_data(p, self.get_raw): 880 self.add_object(obj) 881 882 def abort(): 883 pass 884 return f, commit, abort 885 886 def _complete_thin_pack(self, f, indexer): 887 """Complete a thin pack by adding external references. 888 889 :param f: Open file object for the pack. 890 :param indexer: A PackIndexer for indexing the pack. 891 """ 892 entries = list(indexer) 893 894 # Update the header with the new number of objects. 895 f.seek(0) 896 write_pack_header(f, len(entries) + len(indexer.ext_refs())) 897 898 # Rescan the rest of the pack, computing the SHA with the new header. 899 new_sha = compute_file_sha(f, end_ofs=-20) 900 901 # Complete the pack. 902 for ext_sha in indexer.ext_refs(): 903 assert len(ext_sha) == 20 904 type_num, data = self.get_raw(ext_sha) 905 write_pack_object(f, type_num, data, sha=new_sha) 906 pack_sha = new_sha.digest() 907 f.write(pack_sha) 908 909 def add_thin_pack(self, read_all, read_some): 910 """Add a new thin pack to this object store. 911 912 Thin packs are packs that contain deltas with parents that exist 913 outside the pack. Because this object store doesn't support packs, we 914 extract and add the individual objects. 915 916 :param read_all: Read function that blocks until the number of 917 requested bytes are read. 918 :param read_some: Read function that returns at least one byte, but may 919 not return the number of bytes requested. 920 """ 921 f, commit, abort = self.add_pack() 922 try: 923 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) 924 copier = PackStreamCopier(read_all, read_some, f, 925 delta_iter=indexer) 926 copier.verify() 927 self._complete_thin_pack(f, indexer) 928 except BaseException: 929 abort() 930 raise 931 else: 932 commit() 933 934 935class ObjectIterator(object): 936 """Interface for iterating over objects.""" 937 938 def iterobjects(self): 939 raise NotImplementedError(self.iterobjects) 940 941 942class ObjectStoreIterator(ObjectIterator): 943 """ObjectIterator that works on top of an ObjectStore.""" 944 945 def __init__(self, store, sha_iter): 946 """Create a new ObjectIterator. 947 948 :param store: Object store to retrieve from 949 :param sha_iter: Iterator over (sha, path) tuples 950 """ 951 self.store = store 952 self.sha_iter = sha_iter 953 self._shas = [] 954 955 def __iter__(self): 956 """Yield tuple with next object and path.""" 957 for sha, path in self.itershas(): 958 yield self.store[sha], path 959 960 def iterobjects(self): 961 """Iterate over just the objects.""" 962 for o, path in self: 963 yield o 964 965 def itershas(self): 966 """Iterate over the SHAs.""" 967 for sha in self._shas: 968 yield sha 969 for sha in self.sha_iter: 970 self._shas.append(sha) 971 yield sha 972 973 def __contains__(self, needle): 974 """Check if an object is present. 975 976 :note: This checks if the object is present in 977 the underlying object store, not if it would 978 be yielded by the iterator. 979 980 :param needle: SHA1 of the object to check for 981 """ 982 if needle == ZERO_SHA: 983 return False 984 return needle in self.store 985 986 def __getitem__(self, key): 987 """Find an object by SHA1. 988 989 :note: This retrieves the object from the underlying 990 object store. It will also succeed if the object would 991 not be returned by the iterator. 992 """ 993 return self.store[key] 994 995 def __len__(self): 996 """Return the number of objects.""" 997 return len(list(self.itershas())) 998 999 def empty(self): 1000 iter = self.itershas() 1001 try: 1002 iter() 1003 except StopIteration: 1004 return True 1005 else: 1006 return False 1007 1008 def __bool__(self): 1009 """Indicate whether this object has contents.""" 1010 return not self.empty() 1011 1012 1013def tree_lookup_path(lookup_obj, root_sha, path): 1014 """Look up an object in a Git tree. 1015 1016 :param lookup_obj: Callback for retrieving object by SHA1 1017 :param root_sha: SHA1 of the root tree 1018 :param path: Path to lookup 1019 :return: A tuple of (mode, SHA) of the resulting path. 1020 """ 1021 tree = lookup_obj(root_sha) 1022 if not isinstance(tree, Tree): 1023 raise NotTreeError(root_sha) 1024 return tree.lookup_path(lookup_obj, path) 1025 1026 1027def _collect_filetree_revs(obj_store, tree_sha, kset): 1028 """Collect SHA1s of files and directories for specified tree. 1029 1030 :param obj_store: Object store to get objects by SHA from 1031 :param tree_sha: tree reference to walk 1032 :param kset: set to fill with references to files and directories 1033 """ 1034 filetree = obj_store[tree_sha] 1035 for name, mode, sha in filetree.iteritems(): 1036 if not S_ISGITLINK(mode) and sha not in kset: 1037 kset.add(sha) 1038 if stat.S_ISDIR(mode): 1039 _collect_filetree_revs(obj_store, sha, kset) 1040 1041 1042def _split_commits_and_tags(obj_store, lst, ignore_unknown=False): 1043 """Split object id list into three lists with commit, tag, and other SHAs. 1044 1045 Commits referenced by tags are included into commits 1046 list as well. Only SHA1s known in this repository will get 1047 through, and unless ignore_unknown argument is True, KeyError 1048 is thrown for SHA1 missing in the repository 1049 1050 :param obj_store: Object store to get objects by SHA1 from 1051 :param lst: Collection of commit and tag SHAs 1052 :param ignore_unknown: True to skip SHA1 missing in the repository 1053 silently. 1054 :return: A tuple of (commits, tags, others) SHA1s 1055 """ 1056 commits = set() 1057 tags = set() 1058 others = set() 1059 for e in lst: 1060 try: 1061 o = obj_store[e] 1062 except KeyError: 1063 if not ignore_unknown: 1064 raise 1065 else: 1066 if isinstance(o, Commit): 1067 commits.add(e) 1068 elif isinstance(o, Tag): 1069 tags.add(e) 1070 tagged = o.object[1] 1071 c, t, o = _split_commits_and_tags( 1072 obj_store, [tagged], ignore_unknown=ignore_unknown) 1073 commits |= c 1074 tags |= t 1075 others |= o 1076 else: 1077 others.add(e) 1078 return (commits, tags, others) 1079 1080 1081class MissingObjectFinder(object): 1082 """Find the objects missing from another object store. 1083 1084 :param object_store: Object store containing at least all objects to be 1085 sent 1086 :param haves: SHA1s of commits not to send (already present in target) 1087 :param wants: SHA1s of commits to send 1088 :param progress: Optional function to report progress to. 1089 :param get_tagged: Function that returns a dict of pointed-to sha -> tag 1090 sha for including tags. 1091 :param get_parents: Optional function for getting the parents of a commit. 1092 :param tagged: dict of pointed-to sha -> tag sha for including tags 1093 """ 1094 1095 def __init__(self, object_store, haves, wants, progress=None, 1096 get_tagged=None, get_parents=lambda commit: commit.parents): 1097 self.object_store = object_store 1098 self._get_parents = get_parents 1099 # process Commits and Tags differently 1100 # Note, while haves may list commits/tags not available locally, 1101 # and such SHAs would get filtered out by _split_commits_and_tags, 1102 # wants shall list only known SHAs, and otherwise 1103 # _split_commits_and_tags fails with KeyError 1104 have_commits, have_tags, have_others = ( 1105 _split_commits_and_tags(object_store, haves, True)) 1106 want_commits, want_tags, want_others = ( 1107 _split_commits_and_tags(object_store, wants, False)) 1108 # all_ancestors is a set of commits that shall not be sent 1109 # (complete repository up to 'haves') 1110 all_ancestors = object_store._collect_ancestors( 1111 have_commits, get_parents=self._get_parents)[0] 1112 # all_missing - complete set of commits between haves and wants 1113 # common - commits from all_ancestors we hit into while 1114 # traversing parent hierarchy of wants 1115 missing_commits, common_commits = object_store._collect_ancestors( 1116 want_commits, all_ancestors, get_parents=self._get_parents) 1117 self.sha_done = set() 1118 # Now, fill sha_done with commits and revisions of 1119 # files and directories known to be both locally 1120 # and on target. Thus these commits and files 1121 # won't get selected for fetch 1122 for h in common_commits: 1123 self.sha_done.add(h) 1124 cmt = object_store[h] 1125 _collect_filetree_revs(object_store, cmt.tree, self.sha_done) 1126 # record tags we have as visited, too 1127 for t in have_tags: 1128 self.sha_done.add(t) 1129 1130 missing_tags = want_tags.difference(have_tags) 1131 missing_others = want_others.difference(have_others) 1132 # in fact, what we 'want' is commits, tags, and others 1133 # we've found missing 1134 wants = missing_commits.union(missing_tags) 1135 wants = wants.union(missing_others) 1136 1137 self.objects_to_send = set([(w, None, False) for w in wants]) 1138 1139 if progress is None: 1140 self.progress = lambda x: None 1141 else: 1142 self.progress = progress 1143 self._tagged = get_tagged and get_tagged() or {} 1144 1145 def add_todo(self, entries): 1146 self.objects_to_send.update([e for e in entries 1147 if not e[0] in self.sha_done]) 1148 1149 def next(self): 1150 while True: 1151 if not self.objects_to_send: 1152 return None 1153 (sha, name, leaf) = self.objects_to_send.pop() 1154 if sha not in self.sha_done: 1155 break 1156 if not leaf: 1157 o = self.object_store[sha] 1158 if isinstance(o, Commit): 1159 self.add_todo([(o.tree, "", False)]) 1160 elif isinstance(o, Tree): 1161 self.add_todo([(s, n, not stat.S_ISDIR(m)) 1162 for n, m, s in o.iteritems() 1163 if not S_ISGITLINK(m)]) 1164 elif isinstance(o, Tag): 1165 self.add_todo([(o.object[1], None, False)]) 1166 if sha in self._tagged: 1167 self.add_todo([(self._tagged[sha], None, True)]) 1168 self.sha_done.add(sha) 1169 self.progress(("counting objects: %d\r" % 1170 len(self.sha_done)).encode('ascii')) 1171 return (sha, name) 1172 1173 __next__ = next 1174 1175 1176class ObjectStoreGraphWalker(object): 1177 """Graph walker that finds what commits are missing from an object store. 1178 1179 :ivar heads: Revisions without descendants in the local repo 1180 :ivar get_parents: Function to retrieve parents in the local repo 1181 """ 1182 1183 def __init__(self, local_heads, get_parents, shallow=None): 1184 """Create a new instance. 1185 1186 :param local_heads: Heads to start search with 1187 :param get_parents: Function for finding the parents of a SHA1. 1188 """ 1189 self.heads = set(local_heads) 1190 self.get_parents = get_parents 1191 self.parents = {} 1192 if shallow is None: 1193 shallow = set() 1194 self.shallow = shallow 1195 1196 def ack(self, sha): 1197 """Ack that a revision and its ancestors are present in the source.""" 1198 if len(sha) != 40: 1199 raise ValueError("unexpected sha %r received" % sha) 1200 ancestors = set([sha]) 1201 1202 # stop if we run out of heads to remove 1203 while self.heads: 1204 for a in ancestors: 1205 if a in self.heads: 1206 self.heads.remove(a) 1207 1208 # collect all ancestors 1209 new_ancestors = set() 1210 for a in ancestors: 1211 ps = self.parents.get(a) 1212 if ps is not None: 1213 new_ancestors.update(ps) 1214 self.parents[a] = None 1215 1216 # no more ancestors; stop 1217 if not new_ancestors: 1218 break 1219 1220 ancestors = new_ancestors 1221 1222 def next(self): 1223 """Iterate over ancestors of heads in the target.""" 1224 if self.heads: 1225 ret = self.heads.pop() 1226 ps = self.get_parents(ret) 1227 self.parents[ret] = ps 1228 self.heads.update( 1229 [p for p in ps if p not in self.parents]) 1230 return ret 1231 return None 1232 1233 __next__ = next 1234 1235 1236def commit_tree_changes(object_store, tree, changes): 1237 """Commit a specified set of changes to a tree structure. 1238 1239 This will apply a set of changes on top of an existing tree, storing new 1240 objects in object_store. 1241 1242 changes are a list of tuples with (path, mode, object_sha). 1243 Paths can be both blobs and trees. See the mode and 1244 object sha to None deletes the path. 1245 1246 This method works especially well if there are only a small 1247 number of changes to a big tree. For a large number of changes 1248 to a large tree, use e.g. commit_tree. 1249 1250 :param object_store: Object store to store new objects in 1251 and retrieve old ones from. 1252 :param tree: Original tree root 1253 :param changes: changes to apply 1254 :return: New tree root object 1255 """ 1256 # TODO(user): Save up the objects and add them using .add_objects 1257 # rather than with individual calls to .add_object. 1258 nested_changes = {} 1259 for (path, new_mode, new_sha) in changes: 1260 try: 1261 (dirname, subpath) = path.split(b'/', 1) 1262 except ValueError: 1263 if new_sha is None: 1264 del tree[path] 1265 else: 1266 tree[path] = (new_mode, new_sha) 1267 else: 1268 nested_changes.setdefault(dirname, []).append( 1269 (subpath, new_mode, new_sha)) 1270 for name, subchanges in nested_changes.items(): 1271 try: 1272 orig_subtree = object_store[tree[name][1]] 1273 except KeyError: 1274 orig_subtree = Tree() 1275 subtree = commit_tree_changes(object_store, orig_subtree, subchanges) 1276 if len(subtree) == 0: 1277 del tree[name] 1278 else: 1279 tree[name] = (stat.S_IFDIR, subtree.id) 1280 object_store.add_object(tree) 1281 return tree 1282 1283 1284class OverlayObjectStore(BaseObjectStore): 1285 """Object store that can overlay multiple object stores.""" 1286 1287 def __init__(self, bases, add_store=None): 1288 self.bases = bases 1289 self.add_store = add_store 1290 1291 def add_object(self, object): 1292 if self.add_store is None: 1293 raise NotImplementedError(self.add_object) 1294 return self.add_store.add_object(object) 1295 1296 def add_objects(self, objects, progress=None): 1297 if self.add_store is None: 1298 raise NotImplementedError(self.add_object) 1299 return self.add_store.add_objects(objects, progress) 1300 1301 @property 1302 def packs(self): 1303 ret = [] 1304 for b in self.bases: 1305 ret.extend(b.packs) 1306 return ret 1307 1308 def __iter__(self): 1309 done = set() 1310 for b in self.bases: 1311 for o_id in b: 1312 if o_id not in done: 1313 yield o_id 1314 done.add(o_id) 1315 1316 def get_raw(self, sha_id): 1317 for b in self.bases: 1318 try: 1319 return b.get_raw(sha_id) 1320 except KeyError: 1321 pass 1322 else: 1323 raise KeyError(sha_id) 1324 1325 def contains_packed(self, sha): 1326 for b in self.bases: 1327 if b.contains_packed(sha): 1328 return True 1329 else: 1330 return False 1331 1332 def contains_loose(self, sha): 1333 for b in self.bases: 1334 if b.contains_loose(sha): 1335 return True 1336 else: 1337 return False 1338 1339 1340def read_packs_file(f): 1341 """Yield the packs listed in a packs file.""" 1342 for line in f.read().splitlines(): 1343 if not line: 1344 continue 1345 (kind, name) = line.split(b" ", 1) 1346 if kind != b"P": 1347 continue 1348 yield name.decode(sys.getfilesystemencoding()) 1349