1# index.py 2# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors 3# 4# This module is part of GitPython and is released under 5# the BSD License: http://www.opensource.org/licenses/bsd-license.php 6import glob 7from io import BytesIO 8import os 9from stat import S_ISLNK 10import subprocess 11import tempfile 12 13from git.compat import ( 14 izip, 15 xrange, 16 string_types, 17 force_bytes, 18 defenc, 19 mviter, 20) 21from git.exc import ( 22 GitCommandError, 23 CheckoutError, 24 InvalidGitRepositoryError 25) 26from git.objects import ( 27 Blob, 28 Submodule, 29 Tree, 30 Object, 31 Commit, 32) 33from git.objects.util import Serializable 34from git.util import ( 35 LazyMixin, 36 LockedFD, 37 join_path_native, 38 file_contents_ro, 39 to_native_path_linux, 40 unbare_repo, 41 to_bin_sha 42) 43from gitdb.base import IStream 44from gitdb.db import MemoryDB 45 46import git.diff as diff 47import os.path as osp 48 49from .fun import ( 50 entry_key, 51 write_cache, 52 read_cache, 53 aggressive_tree_merge, 54 write_tree_from_cache, 55 stat_mode_to_index_mode, 56 S_IFGITLINK, 57 run_commit_hook 58) 59from .typ import ( 60 BaseIndexEntry, 61 IndexEntry, 62) 63from .util import ( 64 TemporaryFileSwap, 65 post_clear_cache, 66 default_index, 67 git_working_dir 68) 69 70 71__all__ = ('IndexFile', 'CheckoutError') 72 73 74class IndexFile(LazyMixin, diff.Diffable, Serializable): 75 76 """ 77 Implements an Index that can be manipulated using a native implementation in 78 order to save git command function calls wherever possible. 79 80 It provides custom merging facilities allowing to merge without actually changing 81 your index or your working tree. This way you can perform own test-merges based 82 on the index only without having to deal with the working copy. This is useful 83 in case of partial working trees. 84 85 ``Entries`` 86 87 The index contains an entries dict whose keys are tuples of type IndexEntry 88 to facilitate access. 89 90 You may read the entries dict or manipulate it using IndexEntry instance, i.e.:: 91 92 index.entries[index.entry_key(index_entry_instance)] = index_entry_instance 93 94 Make sure you use index.write() once you are done manipulating the index directly 95 before operating on it using the git command""" 96 __slots__ = ("repo", "version", "entries", "_extension_data", "_file_path") 97 _VERSION = 2 # latest version we support 98 S_IFGITLINK = S_IFGITLINK # a submodule 99 100 def __init__(self, repo, file_path=None): 101 """Initialize this Index instance, optionally from the given ``file_path``. 102 If no file_path is given, we will be created from the current index file. 103 104 If a stream is not given, the stream will be initialized from the current 105 repository's index on demand.""" 106 self.repo = repo 107 self.version = self._VERSION 108 self._extension_data = b'' 109 self._file_path = file_path or self._index_path() 110 111 def _set_cache_(self, attr): 112 if attr == "entries": 113 # read the current index 114 # try memory map for speed 115 lfd = LockedFD(self._file_path) 116 ok = False 117 try: 118 fd = lfd.open(write=False, stream=False) 119 ok = True 120 except OSError: 121 # in new repositories, there may be no index, which means we are empty 122 self.entries = {} 123 return 124 finally: 125 if not ok: 126 lfd.rollback() 127 # END exception handling 128 129 stream = file_contents_ro(fd, stream=True, allow_mmap=True) 130 131 try: 132 self._deserialize(stream) 133 finally: 134 lfd.rollback() 135 # The handles will be closed on destruction 136 # END read from default index on demand 137 else: 138 super(IndexFile, self)._set_cache_(attr) 139 140 def _index_path(self): 141 return join_path_native(self.repo.git_dir, "index") 142 143 @property 144 def path(self): 145 """ :return: Path to the index file we are representing """ 146 return self._file_path 147 148 def _delete_entries_cache(self): 149 """Safely clear the entries cache so it can be recreated""" 150 try: 151 del(self.entries) 152 except AttributeError: 153 # fails in python 2.6.5 with this exception 154 pass 155 # END exception handling 156 157 #{ Serializable Interface 158 159 def _deserialize(self, stream): 160 """Initialize this instance with index values read from the given stream""" 161 self.version, self.entries, self._extension_data, conten_sha = read_cache(stream) # @UnusedVariable 162 return self 163 164 def _entries_sorted(self): 165 """:return: list of entries, in a sorted fashion, first by path, then by stage""" 166 return sorted(self.entries.values(), key=lambda e: (e.path, e.stage)) 167 168 def _serialize(self, stream, ignore_extension_data=False): 169 entries = self._entries_sorted() 170 extension_data = self._extension_data 171 if ignore_extension_data: 172 extension_data = None 173 write_cache(entries, stream, extension_data) 174 return self 175 176 #} END serializable interface 177 178 def write(self, file_path=None, ignore_extension_data=False): 179 """Write the current state to our file path or to the given one 180 181 :param file_path: 182 If None, we will write to our stored file path from which we have 183 been initialized. Otherwise we write to the given file path. 184 Please note that this will change the file_path of this index to 185 the one you gave. 186 187 :param ignore_extension_data: 188 If True, the TREE type extension data read in the index will not 189 be written to disk. NOTE that no extension data is actually written. 190 Use this if you have altered the index and 191 would like to use git-write-tree afterwards to create a tree 192 representing your written changes. 193 If this data is present in the written index, git-write-tree 194 will instead write the stored/cached tree. 195 Alternatively, use IndexFile.write_tree() to handle this case 196 automatically 197 198 :return: self""" 199 # make sure we have our entries read before getting a write lock 200 # else it would be done when streaming. This can happen 201 # if one doesn't change the index, but writes it right away 202 self.entries 203 lfd = LockedFD(file_path or self._file_path) 204 stream = lfd.open(write=True, stream=True) 205 206 ok = False 207 try: 208 self._serialize(stream, ignore_extension_data) 209 ok = True 210 finally: 211 if not ok: 212 lfd.rollback() 213 214 lfd.commit() 215 216 # make sure we represent what we have written 217 if file_path is not None: 218 self._file_path = file_path 219 220 @post_clear_cache 221 @default_index 222 def merge_tree(self, rhs, base=None): 223 """Merge the given rhs treeish into the current index, possibly taking 224 a common base treeish into account. 225 226 As opposed to the from_tree_ method, this allows you to use an already 227 existing tree as the left side of the merge 228 229 :param rhs: 230 treeish reference pointing to the 'other' side of the merge. 231 232 :param base: 233 optional treeish reference pointing to the common base of 'rhs' and 234 this index which equals lhs 235 236 :return: 237 self ( containing the merge and possibly unmerged entries in case of 238 conflicts ) 239 240 :raise GitCommandError: 241 If there is a merge conflict. The error will 242 be raised at the first conflicting path. If you want to have proper 243 merge resolution to be done by yourself, you have to commit the changed 244 index ( or make a valid tree from it ) and retry with a three-way 245 index.from_tree call. """ 246 # -i : ignore working tree status 247 # --aggressive : handle more merge cases 248 # -m : do an actual merge 249 args = ["--aggressive", "-i", "-m"] 250 if base is not None: 251 args.append(base) 252 args.append(rhs) 253 254 self.repo.git.read_tree(args) 255 return self 256 257 @classmethod 258 def new(cls, repo, *tree_sha): 259 """ Merge the given treeish revisions into a new index which is returned. 260 This method behaves like git-read-tree --aggressive when doing the merge. 261 262 :param repo: The repository treeish are located in. 263 264 :param tree_sha: 265 20 byte or 40 byte tree sha or tree objects 266 267 :return: 268 New IndexFile instance. Its path will be undefined. 269 If you intend to write such a merged Index, supply an alternate file_path 270 to its 'write' method.""" 271 base_entries = aggressive_tree_merge(repo.odb, [to_bin_sha(str(t)) for t in tree_sha]) 272 273 inst = cls(repo) 274 # convert to entries dict 275 entries = dict(izip(((e.path, e.stage) for e in base_entries), 276 (IndexEntry.from_base(e) for e in base_entries))) 277 278 inst.entries = entries 279 return inst 280 281 @classmethod 282 def from_tree(cls, repo, *treeish, **kwargs): 283 """Merge the given treeish revisions into a new index which is returned. 284 The original index will remain unaltered 285 286 :param repo: 287 The repository treeish are located in. 288 289 :param treeish: 290 One, two or three Tree Objects, Commits or 40 byte hexshas. The result 291 changes according to the amount of trees. 292 If 1 Tree is given, it will just be read into a new index 293 If 2 Trees are given, they will be merged into a new index using a 294 two way merge algorithm. Tree 1 is the 'current' tree, tree 2 is the 'other' 295 one. It behaves like a fast-forward. 296 If 3 Trees are given, a 3-way merge will be performed with the first tree 297 being the common ancestor of tree 2 and tree 3. Tree 2 is the 'current' tree, 298 tree 3 is the 'other' one 299 300 :param kwargs: 301 Additional arguments passed to git-read-tree 302 303 :return: 304 New IndexFile instance. It will point to a temporary index location which 305 does not exist anymore. If you intend to write such a merged Index, supply 306 an alternate file_path to its 'write' method. 307 308 :note: 309 In the three-way merge case, --aggressive will be specified to automatically 310 resolve more cases in a commonly correct manner. Specify trivial=True as kwarg 311 to override that. 312 313 As the underlying git-read-tree command takes into account the current index, 314 it will be temporarily moved out of the way to assure there are no unsuspected 315 interferences.""" 316 if len(treeish) == 0 or len(treeish) > 3: 317 raise ValueError("Please specify between 1 and 3 treeish, got %i" % len(treeish)) 318 319 arg_list = [] 320 # ignore that working tree and index possibly are out of date 321 if len(treeish) > 1: 322 # drop unmerged entries when reading our index and merging 323 arg_list.append("--reset") 324 # handle non-trivial cases the way a real merge does 325 arg_list.append("--aggressive") 326 # END merge handling 327 328 # tmp file created in git home directory to be sure renaming 329 # works - /tmp/ dirs could be on another device 330 tmp_index = tempfile.mktemp('', '', repo.git_dir) 331 arg_list.append("--index-output=%s" % tmp_index) 332 arg_list.extend(treeish) 333 334 # move current index out of the way - otherwise the merge may fail 335 # as it considers existing entries. moving it essentially clears the index. 336 # Unfortunately there is no 'soft' way to do it. 337 # The TemporaryFileSwap assure the original file get put back 338 index_handler = TemporaryFileSwap(join_path_native(repo.git_dir, 'index')) 339 try: 340 repo.git.read_tree(*arg_list, **kwargs) 341 index = cls(repo, tmp_index) 342 index.entries # force it to read the file as we will delete the temp-file 343 del(index_handler) # release as soon as possible 344 finally: 345 if osp.exists(tmp_index): 346 os.remove(tmp_index) 347 # END index merge handling 348 349 return index 350 351 # UTILITIES 352 @unbare_repo 353 def _iter_expand_paths(self, paths): 354 """Expand the directories in list of paths to the corresponding paths accordingly, 355 356 Note: git will add items multiple times even if a glob overlapped 357 with manually specified paths or if paths where specified multiple 358 times - we respect that and do not prune""" 359 def raise_exc(e): 360 raise e 361 r = self.repo.working_tree_dir 362 rs = r + os.sep 363 for path in paths: 364 abs_path = path 365 if not osp.isabs(abs_path): 366 abs_path = osp.join(r, path) 367 # END make absolute path 368 369 try: 370 st = os.lstat(abs_path) # handles non-symlinks as well 371 except OSError: 372 # the lstat call may fail as the path may contain globs as well 373 pass 374 else: 375 if S_ISLNK(st.st_mode): 376 yield abs_path.replace(rs, '') 377 continue 378 # end check symlink 379 380 # resolve globs if possible 381 if '?' in path or '*' in path or '[' in path: 382 resolved_paths = glob.glob(abs_path) 383 # not abs_path in resolved_paths: 384 # a glob() resolving to the same path we are feeding it with 385 # is a glob() that failed to resolve. If we continued calling 386 # ourselves we'd endlessly recurse. If the condition below 387 # evaluates to true then we are likely dealing with a file 388 # whose name contains wildcard characters. 389 if abs_path not in resolved_paths: 390 for f in self._iter_expand_paths(glob.glob(abs_path)): 391 yield f.replace(rs, '') 392 continue 393 # END glob handling 394 try: 395 for root, dirs, files in os.walk(abs_path, onerror=raise_exc): # @UnusedVariable 396 for rela_file in files: 397 # add relative paths only 398 yield osp.join(root.replace(rs, ''), rela_file) 399 # END for each file in subdir 400 # END for each subdirectory 401 except OSError: 402 # was a file or something that could not be iterated 403 yield path.replace(rs, '') 404 # END path exception handling 405 # END for each path 406 407 def _write_path_to_stdin(self, proc, filepath, item, fmakeexc, fprogress, 408 read_from_stdout=True): 409 """Write path to proc.stdin and make sure it processes the item, including progress. 410 411 :return: stdout string 412 :param read_from_stdout: if True, proc.stdout will be read after the item 413 was sent to stdin. In that case, it will return None 414 :note: There is a bug in git-update-index that prevents it from sending 415 reports just in time. This is why we have a version that tries to 416 read stdout and one which doesn't. In fact, the stdout is not 417 important as the piped-in files are processed anyway and just in time 418 :note: Newlines are essential here, gits behaviour is somewhat inconsistent 419 on this depending on the version, hence we try our best to deal with 420 newlines carefully. Usually the last newline will not be sent, instead 421 we will close stdin to break the pipe.""" 422 423 fprogress(filepath, False, item) 424 rval = None 425 try: 426 proc.stdin.write(("%s\n" % filepath).encode(defenc)) 427 except IOError: 428 # pipe broke, usually because some error happened 429 raise fmakeexc() 430 # END write exception handling 431 proc.stdin.flush() 432 if read_from_stdout: 433 rval = proc.stdout.readline().strip() 434 fprogress(filepath, True, item) 435 return rval 436 437 def iter_blobs(self, predicate=lambda t: True): 438 """ 439 :return: Iterator yielding tuples of Blob objects and stages, tuple(stage, Blob) 440 441 :param predicate: 442 Function(t) returning True if tuple(stage, Blob) should be yielded by the 443 iterator. A default filter, the BlobFilter, allows you to yield blobs 444 only if they match a given list of paths. """ 445 for entry in mviter(self.entries): 446 blob = entry.to_blob(self.repo) 447 blob.size = entry.size 448 output = (entry.stage, blob) 449 if predicate(output): 450 yield output 451 # END for each entry 452 453 def unmerged_blobs(self): 454 """ 455 :return: 456 Iterator yielding dict(path : list( tuple( stage, Blob, ...))), being 457 a dictionary associating a path in the index with a list containing 458 sorted stage/blob pairs 459 460 :note: 461 Blobs that have been removed in one side simply do not exist in the 462 given stage. I.e. a file removed on the 'other' branch whose entries 463 are at stage 3 will not have a stage 3 entry. 464 """ 465 is_unmerged_blob = lambda t: t[0] != 0 466 path_map = {} 467 for stage, blob in self.iter_blobs(is_unmerged_blob): 468 path_map.setdefault(blob.path, []).append((stage, blob)) 469 # END for each unmerged blob 470 for l in mviter(path_map): 471 l.sort() 472 return path_map 473 474 @classmethod 475 def entry_key(cls, *entry): 476 return entry_key(*entry) 477 478 def resolve_blobs(self, iter_blobs): 479 """Resolve the blobs given in blob iterator. This will effectively remove the 480 index entries of the respective path at all non-null stages and add the given 481 blob as new stage null blob. 482 483 For each path there may only be one blob, otherwise a ValueError will be raised 484 claiming the path is already at stage 0. 485 486 :raise ValueError: if one of the blobs already existed at stage 0 487 :return: self 488 489 :note: 490 You will have to write the index manually once you are done, i.e. 491 index.resolve_blobs(blobs).write() 492 """ 493 for blob in iter_blobs: 494 stage_null_key = (blob.path, 0) 495 if stage_null_key in self.entries: 496 raise ValueError("Path %r already exists at stage 0" % blob.path) 497 # END assert blob is not stage 0 already 498 499 # delete all possible stages 500 for stage in (1, 2, 3): 501 try: 502 del(self.entries[(blob.path, stage)]) 503 except KeyError: 504 pass 505 # END ignore key errors 506 # END for each possible stage 507 508 self.entries[stage_null_key] = IndexEntry.from_blob(blob) 509 # END for each blob 510 511 return self 512 513 def update(self): 514 """Reread the contents of our index file, discarding all cached information 515 we might have. 516 517 :note: This is a possibly dangerious operations as it will discard your changes 518 to index.entries 519 :return: self""" 520 self._delete_entries_cache() 521 # allows to lazily reread on demand 522 return self 523 524 def write_tree(self): 525 """Writes this index to a corresponding Tree object into the repository's 526 object database and return it. 527 528 :return: Tree object representing this index 529 :note: The tree will be written even if one or more objects the tree refers to 530 does not yet exist in the object database. This could happen if you added 531 Entries to the index directly. 532 :raise ValueError: if there are no entries in the cache 533 :raise UnmergedEntriesError: """ 534 # we obtain no lock as we just flush our contents to disk as tree 535 # If we are a new index, the entries access will load our data accordingly 536 mdb = MemoryDB() 537 entries = self._entries_sorted() 538 binsha, tree_items = write_tree_from_cache(entries, mdb, slice(0, len(entries))) 539 540 # copy changed trees only 541 mdb.stream_copy(mdb.sha_iter(), self.repo.odb) 542 543 # note: additional deserialization could be saved if write_tree_from_cache 544 # would return sorted tree entries 545 root_tree = Tree(self.repo, binsha, path='') 546 root_tree._cache = tree_items 547 return root_tree 548 549 def _process_diff_args(self, args): 550 try: 551 args.pop(args.index(self)) 552 except IndexError: 553 pass 554 # END remove self 555 return args 556 557 def _to_relative_path(self, path): 558 """:return: Version of path relative to our git directory or raise ValueError 559 if it is not within our git direcotory""" 560 if not osp.isabs(path): 561 return path 562 if self.repo.bare: 563 raise InvalidGitRepositoryError("require non-bare repository") 564 if not path.startswith(self.repo.working_tree_dir): 565 raise ValueError("Absolute path %r is not in git repository at %r" % (path, self.repo.working_tree_dir)) 566 return os.path.relpath(path, self.repo.working_tree_dir) 567 568 def _preprocess_add_items(self, items): 569 """ Split the items into two lists of path strings and BaseEntries. """ 570 paths = [] 571 entries = [] 572 573 for item in items: 574 if isinstance(item, string_types): 575 paths.append(self._to_relative_path(item)) 576 elif isinstance(item, (Blob, Submodule)): 577 entries.append(BaseIndexEntry.from_blob(item)) 578 elif isinstance(item, BaseIndexEntry): 579 entries.append(item) 580 else: 581 raise TypeError("Invalid Type: %r" % item) 582 # END for each item 583 return (paths, entries) 584 585 def _store_path(self, filepath, fprogress): 586 """Store file at filepath in the database and return the base index entry 587 Needs the git_working_dir decorator active ! This must be assured in the calling code""" 588 st = os.lstat(filepath) # handles non-symlinks as well 589 if S_ISLNK(st.st_mode): 590 # in PY3, readlink is string, but we need bytes. In PY2, it's just OS encoded bytes, we assume UTF-8 591 open_stream = lambda: BytesIO(force_bytes(os.readlink(filepath), encoding=defenc)) 592 else: 593 open_stream = lambda: open(filepath, 'rb') 594 with open_stream() as stream: 595 fprogress(filepath, False, filepath) 596 istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream)) 597 fprogress(filepath, True, filepath) 598 return BaseIndexEntry((stat_mode_to_index_mode(st.st_mode), 599 istream.binsha, 0, to_native_path_linux(filepath))) 600 601 @unbare_repo 602 @git_working_dir 603 def _entries_for_paths(self, paths, path_rewriter, fprogress, entries): 604 entries_added = [] 605 if path_rewriter: 606 for path in paths: 607 if osp.isabs(path): 608 abspath = path 609 gitrelative_path = path[len(self.repo.working_tree_dir) + 1:] 610 else: 611 gitrelative_path = path 612 abspath = osp.join(self.repo.working_tree_dir, gitrelative_path) 613 # end obtain relative and absolute paths 614 615 blob = Blob(self.repo, Blob.NULL_BIN_SHA, 616 stat_mode_to_index_mode(os.stat(abspath).st_mode), 617 to_native_path_linux(gitrelative_path)) 618 # TODO: variable undefined 619 entries.append(BaseIndexEntry.from_blob(blob)) 620 # END for each path 621 del(paths[:]) 622 # END rewrite paths 623 624 # HANDLE PATHS 625 assert len(entries_added) == 0 626 for filepath in self._iter_expand_paths(paths): 627 entries_added.append(self._store_path(filepath, fprogress)) 628 # END for each filepath 629 # END path handling 630 return entries_added 631 632 def add(self, items, force=True, fprogress=lambda *args: None, path_rewriter=None, 633 write=True, write_extension_data=False): 634 """Add files from the working tree, specific blobs or BaseIndexEntries 635 to the index. 636 637 :param items: 638 Multiple types of items are supported, types can be mixed within one call. 639 Different types imply a different handling. File paths may generally be 640 relative or absolute. 641 642 - path string 643 strings denote a relative or absolute path into the repository pointing to 644 an existing file, i.e. CHANGES, lib/myfile.ext, '/home/gitrepo/lib/myfile.ext'. 645 646 Absolute paths must start with working tree directory of this index's repository 647 to be considered valid. For example, if it was initialized with a non-normalized path, like 648 `/root/repo/../repo`, absolute paths to be added must start with `/root/repo/../repo`. 649 650 Paths provided like this must exist. When added, they will be written 651 into the object database. 652 653 PathStrings may contain globs, such as 'lib/__init__*' or can be directories 654 like 'lib', the latter ones will add all the files within the dirctory and 655 subdirectories. 656 657 This equals a straight git-add. 658 659 They are added at stage 0 660 661 - Blob or Submodule object 662 Blobs are added as they are assuming a valid mode is set. 663 The file they refer to may or may not exist in the file system, but 664 must be a path relative to our repository. 665 666 If their sha is null ( 40*0 ), their path must exist in the file system 667 relative to the git repository as an object will be created from 668 the data at the path. 669 The handling now very much equals the way string paths are processed, except that 670 the mode you have set will be kept. This allows you to create symlinks 671 by settings the mode respectively and writing the target of the symlink 672 directly into the file. This equals a default Linux-Symlink which 673 is not dereferenced automatically, except that it can be created on 674 filesystems not supporting it as well. 675 676 Please note that globs or directories are not allowed in Blob objects. 677 678 They are added at stage 0 679 680 - BaseIndexEntry or type 681 Handling equals the one of Blob objects, but the stage may be 682 explicitly set. Please note that Index Entries require binary sha's. 683 684 :param force: 685 **CURRENTLY INEFFECTIVE** 686 If True, otherwise ignored or excluded files will be 687 added anyway. 688 As opposed to the git-add command, we enable this flag by default 689 as the API user usually wants the item to be added even though 690 they might be excluded. 691 692 :param fprogress: 693 Function with signature f(path, done=False, item=item) called for each 694 path to be added, one time once it is about to be added where done==False 695 and once after it was added where done=True. 696 item is set to the actual item we handle, either a Path or a BaseIndexEntry 697 Please note that the processed path is not guaranteed to be present 698 in the index already as the index is currently being processed. 699 700 :param path_rewriter: 701 Function with signature (string) func(BaseIndexEntry) function returning a path 702 for each passed entry which is the path to be actually recorded for the 703 object created from entry.path. This allows you to write an index which 704 is not identical to the layout of the actual files on your hard-disk. 705 If not None and ``items`` contain plain paths, these paths will be 706 converted to Entries beforehand and passed to the path_rewriter. 707 Please note that entry.path is relative to the git repository. 708 709 :param write: 710 If True, the index will be written once it was altered. Otherwise 711 the changes only exist in memory and are not available to git commands. 712 713 :param write_extension_data: 714 If True, extension data will be written back to the index. This can lead to issues in case 715 it is containing the 'TREE' extension, which will cause the `git commit` command to write an 716 old tree, instead of a new one representing the now changed index. 717 This doesn't matter if you use `IndexFile.commit()`, which ignores the `TREE` extension altogether. 718 You should set it to True if you intend to use `IndexFile.commit()` exclusively while maintaining 719 support for third-party extensions. Besides that, you can usually safely ignore the built-in 720 extensions when using GitPython on repositories that are not handled manually at all. 721 All current built-in extensions are listed here: 722 http://opensource.apple.com/source/Git/Git-26/src/git-htmldocs/technical/index-format.txt 723 724 :return: 725 List(BaseIndexEntries) representing the entries just actually added. 726 727 :raise OSError: 728 if a supplied Path did not exist. Please note that BaseIndexEntry 729 Objects that do not have a null sha will be added even if their paths 730 do not exist. 731 """ 732 # sort the entries into strings and Entries, Blobs are converted to entries 733 # automatically 734 # paths can be git-added, for everything else we use git-update-index 735 paths, entries = self._preprocess_add_items(items) 736 entries_added = [] 737 # This code needs a working tree, therefore we try not to run it unless required. 738 # That way, we are OK on a bare repository as well. 739 # If there are no paths, the rewriter has nothing to do either 740 if paths: 741 entries_added.extend(self._entries_for_paths(paths, path_rewriter, fprogress, entries)) 742 743 # HANDLE ENTRIES 744 if entries: 745 null_mode_entries = [e for e in entries if e.mode == 0] 746 if null_mode_entries: 747 raise ValueError( 748 "At least one Entry has a null-mode - please use index.remove to remove files for clarity") 749 # END null mode should be remove 750 751 # HANLDE ENTRY OBJECT CREATION 752 # create objects if required, otherwise go with the existing shas 753 null_entries_indices = [i for i, e in enumerate(entries) if e.binsha == Object.NULL_BIN_SHA] 754 if null_entries_indices: 755 @git_working_dir 756 def handle_null_entries(self): 757 for ei in null_entries_indices: 758 null_entry = entries[ei] 759 new_entry = self._store_path(null_entry.path, fprogress) 760 761 # update null entry 762 entries[ei] = BaseIndexEntry( 763 (null_entry.mode, new_entry.binsha, null_entry.stage, null_entry.path)) 764 # END for each entry index 765 # end closure 766 handle_null_entries(self) 767 # END null_entry handling 768 769 # REWRITE PATHS 770 # If we have to rewrite the entries, do so now, after we have generated 771 # all object sha's 772 if path_rewriter: 773 for i, e in enumerate(entries): 774 entries[i] = BaseIndexEntry((e.mode, e.binsha, e.stage, path_rewriter(e))) 775 # END for each entry 776 # END handle path rewriting 777 778 # just go through the remaining entries and provide progress info 779 for i, entry in enumerate(entries): 780 progress_sent = i in null_entries_indices 781 if not progress_sent: 782 fprogress(entry.path, False, entry) 783 fprogress(entry.path, True, entry) 784 # END handle progress 785 # END for each enty 786 entries_added.extend(entries) 787 # END if there are base entries 788 789 # FINALIZE 790 # add the new entries to this instance 791 for entry in entries_added: 792 self.entries[(entry.path, 0)] = IndexEntry.from_base(entry) 793 794 if write: 795 self.write(ignore_extension_data=not write_extension_data) 796 # END handle write 797 798 return entries_added 799 800 def _items_to_rela_paths(self, items): 801 """Returns a list of repo-relative paths from the given items which 802 may be absolute or relative paths, entries or blobs""" 803 paths = [] 804 for item in items: 805 if isinstance(item, (BaseIndexEntry, (Blob, Submodule))): 806 paths.append(self._to_relative_path(item.path)) 807 elif isinstance(item, string_types): 808 paths.append(self._to_relative_path(item)) 809 else: 810 raise TypeError("Invalid item type: %r" % item) 811 # END for each item 812 return paths 813 814 @post_clear_cache 815 @default_index 816 def remove(self, items, working_tree=False, **kwargs): 817 """Remove the given items from the index and optionally from 818 the working tree as well. 819 820 :param items: 821 Multiple types of items are supported which may be be freely mixed. 822 823 - path string 824 Remove the given path at all stages. If it is a directory, you must 825 specify the r=True keyword argument to remove all file entries 826 below it. If absolute paths are given, they will be converted 827 to a path relative to the git repository directory containing 828 the working tree 829 830 The path string may include globs, such as *.c. 831 832 - Blob Object 833 Only the path portion is used in this case. 834 835 - BaseIndexEntry or compatible type 836 The only relevant information here Yis the path. The stage is ignored. 837 838 :param working_tree: 839 If True, the entry will also be removed from the working tree, physically 840 removing the respective file. This may fail if there are uncommitted changes 841 in it. 842 843 :param kwargs: 844 Additional keyword arguments to be passed to git-rm, such 845 as 'r' to allow recursive removal of 846 847 :return: 848 List(path_string, ...) list of repository relative paths that have 849 been removed effectively. 850 This is interesting to know in case you have provided a directory or 851 globs. Paths are relative to the repository. """ 852 args = [] 853 if not working_tree: 854 args.append("--cached") 855 args.append("--") 856 857 # preprocess paths 858 paths = self._items_to_rela_paths(items) 859 removed_paths = self.repo.git.rm(args, paths, **kwargs).splitlines() 860 861 # process output to gain proper paths 862 # rm 'path' 863 return [p[4:-1] for p in removed_paths] 864 865 @post_clear_cache 866 @default_index 867 def move(self, items, skip_errors=False, **kwargs): 868 """Rename/move the items, whereas the last item is considered the destination of 869 the move operation. If the destination is a file, the first item ( of two ) 870 must be a file as well. If the destination is a directory, it may be preceded 871 by one or more directories or files. 872 873 The working tree will be affected in non-bare repositories. 874 875 :parma items: 876 Multiple types of items are supported, please see the 'remove' method 877 for reference. 878 :param skip_errors: 879 If True, errors such as ones resulting from missing source files will 880 be skipped. 881 :param kwargs: 882 Additional arguments you would like to pass to git-mv, such as dry_run 883 or force. 884 885 :return:List(tuple(source_path_string, destination_path_string), ...) 886 A list of pairs, containing the source file moved as well as its 887 actual destination. Relative to the repository root. 888 889 :raise ValueError: If only one item was given 890 GitCommandError: If git could not handle your request""" 891 args = [] 892 if skip_errors: 893 args.append('-k') 894 895 paths = self._items_to_rela_paths(items) 896 if len(paths) < 2: 897 raise ValueError("Please provide at least one source and one destination of the move operation") 898 899 was_dry_run = kwargs.pop('dry_run', kwargs.pop('n', None)) 900 kwargs['dry_run'] = True 901 902 # first execute rename in dryrun so the command tells us what it actually does 903 # ( for later output ) 904 out = [] 905 mvlines = self.repo.git.mv(args, paths, **kwargs).splitlines() 906 907 # parse result - first 0:n/2 lines are 'checking ', the remaining ones 908 # are the 'renaming' ones which we parse 909 for ln in xrange(int(len(mvlines) / 2), len(mvlines)): 910 tokens = mvlines[ln].split(' to ') 911 assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln] 912 913 # [0] = Renaming x 914 # [1] = y 915 out.append((tokens[0][9:], tokens[1])) 916 # END for each line to parse 917 918 # either prepare for the real run, or output the dry-run result 919 if was_dry_run: 920 return out 921 # END handle dryrun 922 923 # now apply the actual operation 924 kwargs.pop('dry_run') 925 self.repo.git.mv(args, paths, **kwargs) 926 927 return out 928 929 def commit(self, message, parent_commits=None, head=True, author=None, 930 committer=None, author_date=None, commit_date=None, 931 skip_hooks=False): 932 """Commit the current default index file, creating a commit object. 933 For more information on the arguments, see tree.commit. 934 935 :note: If you have manually altered the .entries member of this instance, 936 don't forget to write() your changes to disk beforehand. 937 Passing skip_hooks=True is the equivalent of using `-n` 938 or `--no-verify` on the command line. 939 :return: Commit object representing the new commit""" 940 if not skip_hooks: 941 run_commit_hook('pre-commit', self) 942 943 self._write_commit_editmsg(message) 944 run_commit_hook('commit-msg', self, self._commit_editmsg_filepath()) 945 message = self._read_commit_editmsg() 946 self._remove_commit_editmsg() 947 tree = self.write_tree() 948 rval = Commit.create_from_tree(self.repo, tree, message, parent_commits, 949 head, author=author, committer=committer, 950 author_date=author_date, commit_date=commit_date) 951 if not skip_hooks: 952 run_commit_hook('post-commit', self) 953 return rval 954 955 def _write_commit_editmsg(self, message): 956 with open(self._commit_editmsg_filepath(), "wb") as commit_editmsg_file: 957 commit_editmsg_file.write(message.encode(defenc)) 958 959 def _remove_commit_editmsg(self): 960 os.remove(self._commit_editmsg_filepath()) 961 962 def _read_commit_editmsg(self): 963 with open(self._commit_editmsg_filepath(), "rb") as commit_editmsg_file: 964 return commit_editmsg_file.read().decode(defenc) 965 966 def _commit_editmsg_filepath(self): 967 return osp.join(self.repo.common_dir, "COMMIT_EDITMSG") 968 969 @classmethod 970 def _flush_stdin_and_wait(cls, proc, ignore_stdout=False): 971 proc.stdin.flush() 972 proc.stdin.close() 973 stdout = '' 974 if not ignore_stdout: 975 stdout = proc.stdout.read() 976 proc.stdout.close() 977 proc.wait() 978 return stdout 979 980 @default_index 981 def checkout(self, paths=None, force=False, fprogress=lambda *args: None, **kwargs): 982 """Checkout the given paths or all files from the version known to the index into 983 the working tree. 984 985 :note: Be sure you have written pending changes using the ``write`` method 986 in case you have altered the enties dictionary directly 987 988 :param paths: 989 If None, all paths in the index will be checked out. Otherwise an iterable 990 of relative or absolute paths or a single path pointing to files or directories 991 in the index is expected. 992 993 :param force: 994 If True, existing files will be overwritten even if they contain local modifications. 995 If False, these will trigger a CheckoutError. 996 997 :param fprogress: 998 see Index.add_ for signature and explanation. 999 The provided progress information will contain None as path and item if no 1000 explicit paths are given. Otherwise progress information will be send 1001 prior and after a file has been checked out 1002 1003 :param kwargs: 1004 Additional arguments to be passed to git-checkout-index 1005 1006 :return: 1007 iterable yielding paths to files which have been checked out and are 1008 guaranteed to match the version stored in the index 1009 1010 :raise CheckoutError: 1011 If at least one file failed to be checked out. This is a summary, 1012 hence it will checkout as many files as it can anyway. 1013 If one of files or directories do not exist in the index 1014 ( as opposed to the original git command who ignores them ). 1015 Raise GitCommandError if error lines could not be parsed - this truly is 1016 an exceptional state 1017 1018 .. note:: The checkout is limited to checking out the files in the 1019 index. Files which are not in the index anymore and exist in 1020 the working tree will not be deleted. This behaviour is fundamentally 1021 different to *head.checkout*, i.e. if you want git-checkout like behaviour, 1022 use head.checkout instead of index.checkout. 1023 """ 1024 args = ["--index"] 1025 if force: 1026 args.append("--force") 1027 1028 def handle_stderr(proc, iter_checked_out_files): 1029 stderr = proc.stderr.read() 1030 if not stderr: 1031 return 1032 # line contents: 1033 stderr = stderr.decode(defenc) 1034 # git-checkout-index: this already exists 1035 failed_files = [] 1036 failed_reasons = [] 1037 unknown_lines = [] 1038 endings = (' already exists', ' is not in the cache', ' does not exist at stage', ' is unmerged') 1039 for line in stderr.splitlines(): 1040 if not line.startswith("git checkout-index: ") and not line.startswith("git-checkout-index: "): 1041 is_a_dir = " is a directory" 1042 unlink_issue = "unable to unlink old '" 1043 already_exists_issue = ' already exists, no checkout' # created by entry.c:checkout_entry(...) 1044 if line.endswith(is_a_dir): 1045 failed_files.append(line[:-len(is_a_dir)]) 1046 failed_reasons.append(is_a_dir) 1047 elif line.startswith(unlink_issue): 1048 failed_files.append(line[len(unlink_issue):line.rfind("'")]) 1049 failed_reasons.append(unlink_issue) 1050 elif line.endswith(already_exists_issue): 1051 failed_files.append(line[:-len(already_exists_issue)]) 1052 failed_reasons.append(already_exists_issue) 1053 else: 1054 unknown_lines.append(line) 1055 continue 1056 # END special lines parsing 1057 1058 for e in endings: 1059 if line.endswith(e): 1060 failed_files.append(line[20:-len(e)]) 1061 failed_reasons.append(e) 1062 break 1063 # END if ending matches 1064 # END for each possible ending 1065 # END for each line 1066 if unknown_lines: 1067 raise GitCommandError(("git-checkout-index",), 128, stderr) 1068 if failed_files: 1069 valid_files = list(set(iter_checked_out_files) - set(failed_files)) 1070 raise CheckoutError( 1071 "Some files could not be checked out from the index due to local modifications", 1072 failed_files, valid_files, failed_reasons) 1073 # END stderr handler 1074 1075 if paths is None: 1076 args.append("--all") 1077 kwargs['as_process'] = 1 1078 fprogress(None, False, None) 1079 proc = self.repo.git.checkout_index(*args, **kwargs) 1080 proc.wait() 1081 fprogress(None, True, None) 1082 rval_iter = (e.path for e in mviter(self.entries)) 1083 handle_stderr(proc, rval_iter) 1084 return rval_iter 1085 else: 1086 if isinstance(paths, string_types): 1087 paths = [paths] 1088 1089 # make sure we have our entries loaded before we start checkout_index 1090 # which will hold a lock on it. We try to get the lock as well during 1091 # our entries initialization 1092 self.entries 1093 1094 args.append("--stdin") 1095 kwargs['as_process'] = True 1096 kwargs['istream'] = subprocess.PIPE 1097 proc = self.repo.git.checkout_index(args, **kwargs) 1098 # FIXME: Reading from GIL! 1099 make_exc = lambda: GitCommandError(("git-checkout-index",) + tuple(args), 128, proc.stderr.read()) 1100 checked_out_files = [] 1101 1102 for path in paths: 1103 co_path = to_native_path_linux(self._to_relative_path(path)) 1104 # if the item is not in the index, it could be a directory 1105 path_is_directory = False 1106 1107 try: 1108 self.entries[(co_path, 0)] 1109 except KeyError: 1110 folder = co_path 1111 if not folder.endswith('/'): 1112 folder += '/' 1113 for entry in mviter(self.entries): 1114 if entry.path.startswith(folder): 1115 p = entry.path 1116 self._write_path_to_stdin(proc, p, p, make_exc, 1117 fprogress, read_from_stdout=False) 1118 checked_out_files.append(p) 1119 path_is_directory = True 1120 # END if entry is in directory 1121 # END for each entry 1122 # END path exception handlnig 1123 1124 if not path_is_directory: 1125 self._write_path_to_stdin(proc, co_path, path, make_exc, 1126 fprogress, read_from_stdout=False) 1127 checked_out_files.append(co_path) 1128 # END path is a file 1129 # END for each path 1130 self._flush_stdin_and_wait(proc, ignore_stdout=True) 1131 1132 handle_stderr(proc, checked_out_files) 1133 return checked_out_files 1134 # END paths handling 1135 assert "Should not reach this point" 1136 1137 @default_index 1138 def reset(self, commit='HEAD', working_tree=False, paths=None, head=False, **kwargs): 1139 """Reset the index to reflect the tree at the given commit. This will not 1140 adjust our HEAD reference as opposed to HEAD.reset by default. 1141 1142 :param commit: 1143 Revision, Reference or Commit specifying the commit we should represent. 1144 If you want to specify a tree only, use IndexFile.from_tree and overwrite 1145 the default index. 1146 1147 :param working_tree: 1148 If True, the files in the working tree will reflect the changed index. 1149 If False, the working tree will not be touched 1150 Please note that changes to the working copy will be discarded without 1151 warning ! 1152 1153 :param head: 1154 If True, the head will be set to the given commit. This is False by default, 1155 but if True, this method behaves like HEAD.reset. 1156 1157 :param paths: if given as an iterable of absolute or repository-relative paths, 1158 only these will be reset to their state at the given commit'ish. 1159 The paths need to exist at the commit, otherwise an exception will be 1160 raised. 1161 1162 :param kwargs: 1163 Additional keyword arguments passed to git-reset 1164 1165 .. note:: IndexFile.reset, as opposed to HEAD.reset, will not delete anyfiles 1166 in order to maintain a consistent working tree. Instead, it will just 1167 checkout the files according to their state in the index. 1168 If you want git-reset like behaviour, use *HEAD.reset* instead. 1169 1170 :return: self """ 1171 # what we actually want to do is to merge the tree into our existing 1172 # index, which is what git-read-tree does 1173 new_inst = type(self).from_tree(self.repo, commit) 1174 if not paths: 1175 self.entries = new_inst.entries 1176 else: 1177 nie = new_inst.entries 1178 for path in paths: 1179 path = self._to_relative_path(path) 1180 try: 1181 key = entry_key(path, 0) 1182 self.entries[key] = nie[key] 1183 except KeyError: 1184 # if key is not in theirs, it musn't be in ours 1185 try: 1186 del(self.entries[key]) 1187 except KeyError: 1188 pass 1189 # END handle deletion keyerror 1190 # END handle keyerror 1191 # END for each path 1192 # END handle paths 1193 self.write() 1194 1195 if working_tree: 1196 self.checkout(paths=paths, force=True) 1197 # END handle working tree 1198 1199 if head: 1200 self.repo.head.set_commit(self.repo.commit(commit), logmsg="%s: Updating HEAD" % commit) 1201 # END handle head change 1202 1203 return self 1204 1205 @default_index 1206 def diff(self, other=diff.Diffable.Index, paths=None, create_patch=False, **kwargs): 1207 """Diff this index against the working copy or a Tree or Commit object 1208 1209 For a documentation of the parameters and return values, see 1210 Diffable.diff 1211 1212 :note: 1213 Will only work with indices that represent the default git index as 1214 they have not been initialized with a stream. 1215 """ 1216 # index against index is always empty 1217 if other is self.Index: 1218 return diff.DiffIndex() 1219 1220 # index against anything but None is a reverse diff with the respective 1221 # item. Handle existing -R flags properly. Transform strings to the object 1222 # so that we can call diff on it 1223 if isinstance(other, string_types): 1224 other = self.repo.rev_parse(other) 1225 # END object conversion 1226 1227 if isinstance(other, Object): 1228 # invert the existing R flag 1229 cur_val = kwargs.get('R', False) 1230 kwargs['R'] = not cur_val 1231 return other.diff(self.Index, paths, create_patch, **kwargs) 1232 # END diff against other item handling 1233 1234 # if other is not None here, something is wrong 1235 if other is not None: 1236 raise ValueError("other must be None, Diffable.Index, a Tree or Commit, was %r" % other) 1237 1238 # diff against working copy - can be handled by superclass natively 1239 return super(IndexFile, self).diff(other, paths, create_patch, **kwargs) 1240