1# Copyright (C) 2005-2011 Canonical Ltd 2# 3# This program is free software; you can redistribute it and/or modify 4# it under the terms of the GNU General Public License as published by 5# the Free Software Foundation; either version 2 of the License, or 6# (at your option) any later version. 7# 8# This program is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11# GNU General Public License for more details. 12# 13# You should have received a copy of the GNU General Public License 14# along with this program; if not, write to the Free Software 15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16 17# FIXME: This refactoring of the workingtree code doesn't seem to keep 18# the WorkingTree's copy of the inventory in sync with the branch. The 19# branch modifies its working inventory when it does a commit to make 20# missing files permanently removed. 21 22# TODO: Maybe also keep the full path of the entry, and the children? 23# But those depend on its position within a particular inventory, and 24# it would be nice not to need to hold the backpointer here. 25 26# This should really be an id randomly assigned when the tree is 27# created, but it's not for now. 28ROOT_ID = b"TREE_ROOT" 29 30try: 31 from collections.abc import deque 32except ImportError: # python < 3.7 33 from collections import deque 34 35 36from ..lazy_import import lazy_import 37lazy_import(globals(), """ 38 39from breezy.bzr import ( 40 chk_map, 41 generate_ids, 42 ) 43""") 44 45from .. import ( 46 errors, 47 lazy_regex, 48 osutils, 49 trace, 50 ) 51from ..static_tuple import StaticTuple 52 53 54class InvalidEntryName(errors.InternalBzrError): 55 56 _fmt = "Invalid entry name: %(name)s" 57 58 def __init__(self, name): 59 errors.BzrError.__init__(self) 60 self.name = name 61 62 63class DuplicateFileId(errors.BzrError): 64 65 _fmt = "File id {%(file_id)s} already exists in inventory as %(entry)s" 66 67 def __init__(self, file_id, entry): 68 errors.BzrError.__init__(self) 69 self.file_id = file_id 70 self.entry = entry 71 72 73class InventoryEntry(object): 74 """Description of a versioned file. 75 76 An InventoryEntry has the following fields, which are also 77 present in the XML inventory-entry element: 78 79 file_id 80 81 name 82 (within the parent directory) 83 84 parent_id 85 file_id of the parent directory, or ROOT_ID 86 87 revision 88 the revision_id in which this variation of this file was 89 introduced. 90 91 executable 92 Indicates that this file should be executable on systems 93 that support it. 94 95 text_sha1 96 sha-1 of the text of the file 97 98 text_size 99 size in bytes of the text of the file 100 101 (reading a version 4 tree created a text_id field.) 102 103 >>> i = Inventory() 104 >>> i.path2id('') 105 b'TREE_ROOT' 106 >>> i.add(InventoryDirectory(b'123', 'src', ROOT_ID)) 107 InventoryDirectory(b'123', 'src', parent_id=b'TREE_ROOT', revision=None) 108 >>> i.add(InventoryFile(b'2323', 'hello.c', parent_id=b'123')) 109 InventoryFile(b'2323', 'hello.c', parent_id=b'123', sha1=None, len=None, revision=None) 110 >>> shouldbe = {0: '', 1: 'src', 2: 'src/hello.c'} 111 >>> for ix, j in enumerate(i.iter_entries()): 112 ... print(j[0] == shouldbe[ix], j[1]) 113 ... 114 True InventoryDirectory(b'TREE_ROOT', '', parent_id=None, revision=None) 115 True InventoryDirectory(b'123', 'src', parent_id=b'TREE_ROOT', revision=None) 116 True InventoryFile(b'2323', 'hello.c', parent_id=b'123', sha1=None, len=None, revision=None) 117 >>> i.add(InventoryFile(b'2324', 'bye.c', b'123')) 118 InventoryFile(b'2324', 'bye.c', parent_id=b'123', sha1=None, len=None, revision=None) 119 >>> i.add(InventoryDirectory(b'2325', 'wibble', b'123')) 120 InventoryDirectory(b'2325', 'wibble', parent_id=b'123', revision=None) 121 >>> i.path2id('src/wibble') 122 b'2325' 123 >>> i.add(InventoryFile(b'2326', 'wibble.c', b'2325')) 124 InventoryFile(b'2326', 'wibble.c', parent_id=b'2325', sha1=None, len=None, revision=None) 125 >>> i.get_entry(b'2326') 126 InventoryFile(b'2326', 'wibble.c', parent_id=b'2325', sha1=None, len=None, revision=None) 127 >>> for path, entry in i.iter_entries(): 128 ... print(path) 129 ... 130 <BLANKLINE> 131 src 132 src/bye.c 133 src/hello.c 134 src/wibble 135 src/wibble/wibble.c 136 >>> i.id2path(b'2326') 137 'src/wibble/wibble.c' 138 """ 139 140 # Constants returned by describe_change() 141 # 142 # TODO: These should probably move to some kind of FileChangeDescription 143 # class; that's like what's inside a TreeDelta but we want to be able to 144 # generate them just for one file at a time. 145 RENAMED = 'renamed' 146 MODIFIED_AND_RENAMED = 'modified and renamed' 147 148 __slots__ = ['file_id', 'revision', 'parent_id', 'name'] 149 150 # Attributes that all InventoryEntry instances are expected to have, but 151 # that don't vary for all kinds of entry. (e.g. symlink_target is only 152 # relevant to InventoryLink, so there's no reason to make every 153 # InventoryFile instance allocate space to hold a value for it.) 154 # Attributes that only vary for files: executable, text_sha1, text_size, 155 # text_id 156 executable = False 157 text_sha1 = None 158 text_size = None 159 text_id = None 160 # Attributes that only vary for symlinks: symlink_target 161 symlink_target = None 162 # Attributes that only vary for tree-references: reference_revision 163 reference_revision = None 164 165 def detect_changes(self, old_entry): 166 """Return a (text_modified, meta_modified) from this to old_entry. 167 168 _read_tree_state must have been called on self and old_entry prior to 169 calling detect_changes. 170 """ 171 return False, False 172 173 def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree, 174 output_to, reverse=False): 175 """Perform a diff between two entries of the same kind.""" 176 177 def parent_candidates(self, previous_inventories): 178 """Find possible per-file graph parents. 179 180 This is currently defined by: 181 - Select the last changed revision in the parent inventory. 182 - Do deal with a short lived bug in bzr 0.8's development two entries 183 that have the same last changed but different 'x' bit settings are 184 changed in-place. 185 """ 186 # revision:ie mapping for each ie found in previous_inventories. 187 candidates = {} 188 # identify candidate head revision ids. 189 for inv in previous_inventories: 190 try: 191 ie = inv.get_entry(self.file_id) 192 except errors.NoSuchId: 193 pass 194 else: 195 if ie.revision in candidates: 196 # same revision value in two different inventories: 197 # correct possible inconsistencies: 198 # * there was a bug in revision updates with 'x' bit 199 # support. 200 try: 201 if candidates[ie.revision].executable != ie.executable: 202 candidates[ie.revision].executable = False 203 ie.executable = False 204 except AttributeError: 205 pass 206 else: 207 # add this revision as a candidate. 208 candidates[ie.revision] = ie 209 return candidates 210 211 def has_text(self): 212 """Return true if the object this entry represents has textual data. 213 214 Note that textual data includes binary content. 215 216 Also note that all entries get weave files created for them. 217 This attribute is primarily used when upgrading from old trees that 218 did not have the weave index for all inventory entries. 219 """ 220 return False 221 222 def __init__(self, file_id, name, parent_id): 223 """Create an InventoryEntry 224 225 The filename must be a single component, relative to the 226 parent directory; it cannot be a whole path or relative name. 227 228 >>> e = InventoryFile(b'123', 'hello.c', ROOT_ID) 229 >>> e.name 230 'hello.c' 231 >>> e.file_id 232 b'123' 233 >>> e = InventoryFile(b'123', 'src/hello.c', ROOT_ID) 234 Traceback (most recent call last): 235 breezy.bzr.inventory.InvalidEntryName: Invalid entry name: src/hello.c 236 """ 237 if u'/' in name: 238 raise InvalidEntryName(name=name) 239 if not isinstance(file_id, bytes): 240 raise TypeError(file_id) 241 self.file_id = file_id 242 self.revision = None 243 self.name = name 244 self.parent_id = parent_id 245 246 def kind_character(self): 247 """Return a short kind indicator useful for appending to names.""" 248 raise errors.BzrError('unknown kind %r' % self.kind) 249 250 known_kinds = ('file', 'directory', 'symlink') 251 252 @staticmethod 253 def versionable_kind(kind): 254 return (kind in ('file', 'directory', 'symlink', 'tree-reference')) 255 256 def check(self, checker, rev_id, inv): 257 """Check this inventory entry is intact. 258 259 This is a template method, override _check for kind specific 260 tests. 261 262 :param checker: Check object providing context for the checks; 263 can be used to find out what parts of the repository have already 264 been checked. 265 :param rev_id: Revision id from which this InventoryEntry was loaded. 266 Not necessarily the last-changed revision for this file. 267 :param inv: Inventory from which the entry was loaded. 268 """ 269 if self.parent_id is not None: 270 if not inv.has_id(self.parent_id): 271 raise errors.BzrCheckError( 272 'missing parent {%s} in inventory for revision {%s}' % ( 273 self.parent_id, rev_id)) 274 checker._add_entry_to_text_key_references(inv, self) 275 self._check(checker, rev_id) 276 277 def _check(self, checker, rev_id): 278 """Check this inventory entry for kind specific errors.""" 279 checker._report_items.append( 280 'unknown entry kind %r in revision {%s}' % (self.kind, rev_id)) 281 282 def copy(self): 283 """Clone this inventory entry.""" 284 raise NotImplementedError 285 286 @staticmethod 287 def describe_change(old_entry, new_entry): 288 """Describe the change between old_entry and this. 289 290 This smells of being an InterInventoryEntry situation, but as its 291 the first one, we're making it a static method for now. 292 293 An entry with a different parent, or different name is considered 294 to be renamed. Reparenting is an internal detail. 295 Note that renaming the parent does not trigger a rename for the 296 child entry itself. 297 """ 298 # TODO: Perhaps return an object rather than just a string 299 if old_entry is new_entry: 300 # also the case of both being None 301 return 'unchanged' 302 elif old_entry is None: 303 return 'added' 304 elif new_entry is None: 305 return 'removed' 306 if old_entry.kind != new_entry.kind: 307 return 'modified' 308 text_modified, meta_modified = new_entry.detect_changes(old_entry) 309 if text_modified or meta_modified: 310 modified = True 311 else: 312 modified = False 313 # TODO 20060511 (mbp, rbc) factor out 'detect_rename' here. 314 if old_entry.parent_id != new_entry.parent_id: 315 renamed = True 316 elif old_entry.name != new_entry.name: 317 renamed = True 318 else: 319 renamed = False 320 if renamed and not modified: 321 return InventoryEntry.RENAMED 322 if modified and not renamed: 323 return 'modified' 324 if modified and renamed: 325 return InventoryEntry.MODIFIED_AND_RENAMED 326 return 'unchanged' 327 328 def __repr__(self): 329 return ("%s(%r, %r, parent_id=%r, revision=%r)" 330 % (self.__class__.__name__, 331 self.file_id, 332 self.name, 333 self.parent_id, 334 self.revision)) 335 336 def is_unmodified(self, other): 337 other_revision = getattr(other, 'revision', None) 338 if other_revision is None: 339 return False 340 return self.revision == other.revision 341 342 def __eq__(self, other): 343 if other is self: 344 # For the case when objects are cached 345 return True 346 if not isinstance(other, InventoryEntry): 347 return NotImplemented 348 349 return ((self.file_id == other.file_id) and 350 (self.name == other.name) and 351 (other.symlink_target == self.symlink_target) and 352 (self.text_sha1 == other.text_sha1) and 353 (self.text_size == other.text_size) and 354 (self.text_id == other.text_id) and 355 (self.parent_id == other.parent_id) and 356 (self.kind == other.kind) and 357 (self.revision == other.revision) and 358 (self.executable == other.executable) and 359 (self.reference_revision == other.reference_revision) 360 ) 361 362 def __ne__(self, other): 363 return not (self == other) 364 365 def __hash__(self): 366 raise ValueError('not hashable') 367 368 def _unchanged(self, previous_ie): 369 """Has this entry changed relative to previous_ie. 370 371 This method should be overridden in child classes. 372 """ 373 compatible = True 374 # different inv parent 375 if previous_ie.parent_id != self.parent_id: 376 compatible = False 377 # renamed 378 elif previous_ie.name != self.name: 379 compatible = False 380 elif previous_ie.kind != self.kind: 381 compatible = False 382 return compatible 383 384 def _read_tree_state(self, path, work_tree): 385 """Populate fields in the inventory entry from the given tree. 386 387 Note that this should be modified to be a noop on virtual trees 388 as all entries created there are prepopulated. 389 """ 390 # TODO: Rather than running this manually, we should check the 391 # working sha1 and other expensive properties when they're 392 # first requested, or preload them if they're already known 393 pass # nothing to do by default 394 395 def _forget_tree_state(self): 396 pass 397 398 399class InventoryDirectory(InventoryEntry): 400 """A directory in an inventory.""" 401 402 __slots__ = ['children'] 403 404 kind = 'directory' 405 406 def _check(self, checker, rev_id): 407 """See InventoryEntry._check""" 408 # In non rich root repositories we do not expect a file graph for the 409 # root. 410 if self.name == '' and not checker.rich_roots: 411 return 412 # Directories are stored as an empty file, but the file should exist 413 # to provide a per-fileid log. The hash of every directory content is 414 # "da..." below (the sha1sum of ''). 415 checker.add_pending_item(rev_id, 416 ('texts', self.file_id, self.revision), b'text', 417 b'da39a3ee5e6b4b0d3255bfef95601890afd80709') 418 419 def copy(self): 420 other = InventoryDirectory(self.file_id, self.name, self.parent_id) 421 other.revision = self.revision 422 # note that children are *not* copied; they're pulled across when 423 # others are added 424 return other 425 426 def __init__(self, file_id, name, parent_id): 427 super(InventoryDirectory, self).__init__(file_id, name, parent_id) 428 self.children = {} 429 430 def sorted_children(self): 431 return sorted(self.children.items()) 432 433 def kind_character(self): 434 """See InventoryEntry.kind_character.""" 435 return '/' 436 437 438class InventoryFile(InventoryEntry): 439 """A file in an inventory.""" 440 441 __slots__ = ['text_sha1', 'text_size', 'text_id', 'executable'] 442 443 kind = 'file' 444 445 def __init__(self, file_id, name, parent_id): 446 super(InventoryFile, self).__init__(file_id, name, parent_id) 447 self.text_sha1 = None 448 self.text_size = None 449 self.text_id = None 450 self.executable = False 451 452 def _check(self, checker, tree_revision_id): 453 """See InventoryEntry._check""" 454 # TODO: check size too. 455 checker.add_pending_item(tree_revision_id, 456 ('texts', self.file_id, self.revision), b'text', 457 self.text_sha1) 458 if self.text_size is None: 459 checker._report_items.append( 460 'fileid {%s} in {%s} has None for text_size' % (self.file_id, 461 tree_revision_id)) 462 463 def copy(self): 464 other = InventoryFile(self.file_id, self.name, self.parent_id) 465 other.executable = self.executable 466 other.text_id = self.text_id 467 other.text_sha1 = self.text_sha1 468 other.text_size = self.text_size 469 other.revision = self.revision 470 return other 471 472 def detect_changes(self, old_entry): 473 """See InventoryEntry.detect_changes.""" 474 text_modified = (self.text_sha1 != old_entry.text_sha1) 475 meta_modified = (self.executable != old_entry.executable) 476 return text_modified, meta_modified 477 478 def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree, 479 output_to, reverse=False): 480 """See InventoryEntry._diff.""" 481 from breezy.diff import DiffText 482 from_file_id = self.file_id 483 if to_entry: 484 to_file_id = to_entry.file_id 485 to_path = to_tree.id2path(to_file_id) 486 else: 487 to_file_id = None 488 to_path = None 489 if from_file_id is not None: 490 from_path = tree.id2path(from_file_id) 491 else: 492 from_path = None 493 if reverse: 494 to_file_id, from_file_id = from_file_id, to_file_id 495 tree, to_tree = to_tree, tree 496 from_label, to_label = to_label, from_label 497 differ = DiffText(tree, to_tree, output_to, 'utf-8', '', '', 498 text_diff) 499 return differ.diff_text(from_path, to_path, from_label, to_label, 500 from_file_id, to_file_id) 501 502 def has_text(self): 503 """See InventoryEntry.has_text.""" 504 return True 505 506 def kind_character(self): 507 """See InventoryEntry.kind_character.""" 508 return '' 509 510 def _read_tree_state(self, path, work_tree): 511 """See InventoryEntry._read_tree_state.""" 512 self.text_sha1 = work_tree.get_file_sha1(path) 513 # FIXME: 20050930 probe for the text size when getting sha1 514 # in _read_tree_state 515 self.executable = work_tree.is_executable(path) 516 517 def __repr__(self): 518 return ("%s(%r, %r, parent_id=%r, sha1=%r, len=%s, revision=%s)" 519 % (self.__class__.__name__, 520 self.file_id, 521 self.name, 522 self.parent_id, 523 self.text_sha1, 524 self.text_size, 525 self.revision)) 526 527 def _forget_tree_state(self): 528 self.text_sha1 = None 529 530 def _unchanged(self, previous_ie): 531 """See InventoryEntry._unchanged.""" 532 compatible = super(InventoryFile, self)._unchanged(previous_ie) 533 if self.text_sha1 != previous_ie.text_sha1: 534 compatible = False 535 else: 536 # FIXME: 20050930 probe for the text size when getting sha1 537 # in _read_tree_state 538 self.text_size = previous_ie.text_size 539 if self.executable != previous_ie.executable: 540 compatible = False 541 return compatible 542 543 544class InventoryLink(InventoryEntry): 545 """A file in an inventory.""" 546 547 __slots__ = ['symlink_target'] 548 549 kind = 'symlink' 550 551 def __init__(self, file_id, name, parent_id): 552 super(InventoryLink, self).__init__(file_id, name, parent_id) 553 self.symlink_target = None 554 555 def _check(self, checker, tree_revision_id): 556 """See InventoryEntry._check""" 557 if self.symlink_target is None: 558 checker._report_items.append( 559 'symlink {%s} has no target in revision {%s}' 560 % (self.file_id, tree_revision_id)) 561 # Symlinks are stored as '' 562 checker.add_pending_item(tree_revision_id, 563 ('texts', self.file_id, self.revision), b'text', 564 b'da39a3ee5e6b4b0d3255bfef95601890afd80709') 565 566 def copy(self): 567 other = InventoryLink(self.file_id, self.name, self.parent_id) 568 other.symlink_target = self.symlink_target 569 other.revision = self.revision 570 return other 571 572 def detect_changes(self, old_entry): 573 """See InventoryEntry.detect_changes.""" 574 # FIXME: which _modified field should we use ? RBC 20051003 575 text_modified = (self.symlink_target != old_entry.symlink_target) 576 if text_modified: 577 trace.mutter(" symlink target changed") 578 meta_modified = False 579 return text_modified, meta_modified 580 581 def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree, 582 output_to, reverse=False): 583 """See InventoryEntry._diff.""" 584 from breezy.diff import DiffSymlink 585 old_target = self.symlink_target 586 if to_entry is not None: 587 new_target = to_entry.symlink_target 588 else: 589 new_target = None 590 if not reverse: 591 old_tree = tree 592 new_tree = to_tree 593 else: 594 old_tree = to_tree 595 new_tree = tree 596 new_target, old_target = old_target, new_target 597 differ = DiffSymlink(old_tree, new_tree, output_to) 598 return differ.diff_symlink(old_target, new_target) 599 600 def kind_character(self): 601 """See InventoryEntry.kind_character.""" 602 return '' 603 604 def _read_tree_state(self, path, work_tree): 605 """See InventoryEntry._read_tree_state.""" 606 self.symlink_target = work_tree.get_symlink_target( 607 work_tree.id2path(self.file_id), self.file_id) 608 609 def _forget_tree_state(self): 610 self.symlink_target = None 611 612 def _unchanged(self, previous_ie): 613 """See InventoryEntry._unchanged.""" 614 compatible = super(InventoryLink, self)._unchanged(previous_ie) 615 if self.symlink_target != previous_ie.symlink_target: 616 compatible = False 617 return compatible 618 619 620class TreeReference(InventoryEntry): 621 622 __slots__ = ['reference_revision'] 623 624 kind = 'tree-reference' 625 626 def __init__(self, file_id, name, parent_id, revision=None, 627 reference_revision=None): 628 InventoryEntry.__init__(self, file_id, name, parent_id) 629 self.revision = revision 630 self.reference_revision = reference_revision 631 632 def copy(self): 633 return TreeReference(self.file_id, self.name, self.parent_id, 634 self.revision, self.reference_revision) 635 636 def _read_tree_state(self, path, work_tree): 637 """Populate fields in the inventory entry from the given tree. 638 """ 639 self.reference_revision = work_tree.get_reference_revision( 640 path, self.file_id) 641 642 def _forget_tree_state(self): 643 self.reference_revision = None 644 645 def _unchanged(self, previous_ie): 646 """See InventoryEntry._unchanged.""" 647 compatible = super(TreeReference, self)._unchanged(previous_ie) 648 if self.reference_revision != previous_ie.reference_revision: 649 compatible = False 650 return compatible 651 652 def kind_character(self): 653 """See InventoryEntry.kind_character.""" 654 return '+' 655 656 657class CommonInventory(object): 658 """Basic inventory logic, defined in terms of primitives like has_id. 659 660 An inventory is the metadata about the contents of a tree. 661 662 This is broadly a map from file_id to entries such as directories, files, 663 symlinks and tree references. Each entry maintains its own metadata like 664 SHA1 and length for files, or children for a directory. 665 666 Entries can be looked up either by path or by file_id. 667 668 InventoryEntry objects must not be modified after they are 669 inserted, other than through the Inventory API. 670 """ 671 672 def has_filename(self, filename): 673 return bool(self.path2id(filename)) 674 675 def id2path(self, file_id): 676 """Return as a string the path to file_id. 677 678 >>> i = Inventory() 679 >>> e = i.add(InventoryDirectory(b'src-id', 'src', ROOT_ID)) 680 >>> e = i.add(InventoryFile(b'foo-id', 'foo.c', parent_id=b'src-id')) 681 >>> print(i.id2path(b'foo-id')) 682 src/foo.c 683 684 :raises NoSuchId: If file_id is not present in the inventory. 685 """ 686 # get all names, skipping root 687 return '/'.join(reversed( 688 [parent.name for parent in 689 self._iter_file_id_parents(file_id)][:-1])) 690 691 def iter_entries(self, from_dir=None, recursive=True): 692 """Return (path, entry) pairs, in order by name. 693 694 :param from_dir: if None, start from the root, 695 otherwise start from this directory (either file-id or entry) 696 :param recursive: recurse into directories or not 697 """ 698 if from_dir is None: 699 if self.root is None: 700 return 701 from_dir = self.root 702 yield '', self.root 703 elif isinstance(from_dir, bytes): 704 from_dir = self.get_entry(from_dir) 705 706 # unrolling the recursive called changed the time from 707 # 440ms/663ms (inline/total) to 116ms/116ms 708 children = sorted(from_dir.children.items()) 709 if not recursive: 710 yield from children 711 return 712 children = deque(children) 713 stack = [(u'', children)] 714 while stack: 715 from_dir_relpath, children = stack[-1] 716 717 while children: 718 name, ie = children.popleft() 719 720 # we know that from_dir_relpath never ends in a slash 721 # and 'f' doesn't begin with one, we can do a string op, rather 722 # than the checks of pathjoin(), though this means that all paths 723 # start with a slash 724 path = from_dir_relpath + '/' + name 725 726 yield path[1:], ie 727 728 if ie.kind != 'directory': 729 continue 730 731 # But do this child first 732 new_children = sorted(ie.children.items()) 733 new_children = deque(new_children) 734 stack.append((path, new_children)) 735 # Break out of inner loop, so that we start outer loop with child 736 break 737 else: 738 # if we finished all children, pop it off the stack 739 stack.pop() 740 741 def _preload_cache(self): 742 """Populate any caches, we are about to access all items. 743 744 The default implementation does nothing, because CommonInventory doesn't 745 have a cache. 746 """ 747 pass 748 749 def iter_entries_by_dir(self, from_dir=None, specific_file_ids=None): 750 """Iterate over the entries in a directory first order. 751 752 This returns all entries for a directory before returning 753 the entries for children of a directory. This is not 754 lexicographically sorted order, and is a hybrid between 755 depth-first and breadth-first. 756 757 :return: This yields (path, entry) pairs 758 """ 759 if specific_file_ids and not isinstance(specific_file_ids, set): 760 specific_file_ids = set(specific_file_ids) 761 # TODO? Perhaps this should return the from_dir so that the root is 762 # yielded? or maybe an option? 763 if from_dir is None and specific_file_ids is None: 764 # They are iterating from the root, and have not specified any 765 # specific entries to look at. All current callers fully consume the 766 # iterator, so we can safely assume we are accessing all entries 767 self._preload_cache() 768 if from_dir is None: 769 if self.root is None: 770 return 771 # Optimize a common case 772 if (specific_file_ids is not None 773 and len(specific_file_ids) == 1): 774 file_id = list(specific_file_ids)[0] 775 if file_id is not None: 776 try: 777 path = self.id2path(file_id) 778 except errors.NoSuchId: 779 pass 780 else: 781 yield path, self.get_entry(file_id) 782 return 783 from_dir = self.root 784 if (specific_file_ids is None 785 or self.root.file_id in specific_file_ids): 786 yield u'', self.root 787 elif isinstance(from_dir, bytes): 788 from_dir = self.get_entry(from_dir) 789 else: 790 raise TypeError(from_dir) 791 792 if specific_file_ids is not None: 793 # TODO: jam 20070302 This could really be done as a loop rather 794 # than a bunch of recursive calls. 795 parents = set() 796 byid = self 797 798 def add_ancestors(file_id): 799 if not byid.has_id(file_id): 800 return 801 parent_id = byid.get_entry(file_id).parent_id 802 if parent_id is None: 803 return 804 if parent_id not in parents: 805 parents.add(parent_id) 806 add_ancestors(parent_id) 807 for file_id in specific_file_ids: 808 add_ancestors(file_id) 809 else: 810 parents = None 811 812 stack = [(u'', from_dir)] 813 while stack: 814 cur_relpath, cur_dir = stack.pop() 815 816 child_dirs = [] 817 for child_name, child_ie in sorted(cur_dir.children.items()): 818 819 child_relpath = cur_relpath + child_name 820 821 if (specific_file_ids is None 822 or child_ie.file_id in specific_file_ids): 823 yield child_relpath, child_ie 824 825 if child_ie.kind == 'directory': 826 if parents is None or child_ie.file_id in parents: 827 child_dirs.append((child_relpath + '/', child_ie)) 828 stack.extend(reversed(child_dirs)) 829 830 def _make_delta(self, old): 831 """Make an inventory delta from two inventories.""" 832 old_ids = set(old.iter_all_ids()) 833 new_ids = set(self.iter_all_ids()) 834 adds = new_ids - old_ids 835 deletes = old_ids - new_ids 836 common = old_ids.intersection(new_ids) 837 delta = [] 838 for file_id in deletes: 839 delta.append((old.id2path(file_id), None, file_id, None)) 840 for file_id in adds: 841 delta.append((None, self.id2path(file_id), 842 file_id, self.get_entry(file_id))) 843 for file_id in common: 844 if old.get_entry(file_id) != self.get_entry(file_id): 845 delta.append((old.id2path(file_id), self.id2path(file_id), 846 file_id, self.get_entry(file_id))) 847 return delta 848 849 def make_entry(self, kind, name, parent_id, file_id=None): 850 """Simple thunk to breezy.bzr.inventory.make_entry.""" 851 return make_entry(kind, name, parent_id, file_id) 852 853 def entries(self): 854 """Return list of (path, ie) for all entries except the root. 855 856 This may be faster than iter_entries. 857 """ 858 accum = [] 859 860 def descend(dir_ie, dir_path): 861 kids = sorted(dir_ie.children.items()) 862 for name, ie in kids: 863 child_path = osutils.pathjoin(dir_path, name) 864 accum.append((child_path, ie)) 865 if ie.kind == 'directory': 866 descend(ie, child_path) 867 868 if self.root is not None: 869 descend(self.root, u'') 870 return accum 871 872 def get_entry_by_path_partial(self, relpath): 873 """Like get_entry_by_path, but return TreeReference objects. 874 875 :param relpath: Path to resolve, either as string with / as separators, 876 or as list of elements. 877 :return: tuple with ie, resolved elements and elements left to resolve 878 """ 879 if isinstance(relpath, str): 880 names = osutils.splitpath(relpath) 881 else: 882 names = relpath 883 884 try: 885 parent = self.root 886 except errors.NoSuchId: 887 # root doesn't exist yet so nothing else can 888 return None, None, None 889 if parent is None: 890 return None, None, None 891 for i, f in enumerate(names): 892 try: 893 children = getattr(parent, 'children', None) 894 if children is None: 895 return None, None, None 896 cie = children[f] 897 if cie.kind == 'tree-reference': 898 return cie, names[:i + 1], names[i + 1:] 899 parent = cie 900 except KeyError: 901 # or raise an error? 902 return None, None, None 903 return parent, names, [] 904 905 def get_entry_by_path(self, relpath): 906 """Return an inventory entry by path. 907 908 :param relpath: may be either a list of path components, or a single 909 string, in which case it is automatically split. 910 911 This returns the entry of the last component in the path, 912 which may be either a file or a directory. 913 914 Returns None IFF the path is not found. 915 """ 916 if isinstance(relpath, str): 917 names = osutils.splitpath(relpath) 918 else: 919 names = relpath 920 921 try: 922 parent = self.root 923 except errors.NoSuchId: 924 # root doesn't exist yet so nothing else can 925 return None 926 if parent is None: 927 return None 928 for f in names: 929 try: 930 children = getattr(parent, 'children', None) 931 if children is None: 932 return None 933 cie = children[f] 934 parent = cie 935 except KeyError: 936 # or raise an error? 937 return None 938 return parent 939 940 def path2id(self, relpath): 941 """Walk down through directories to return entry of last component. 942 943 :param relpath: may be either a list of path components, or a single 944 string, in which case it is automatically split. 945 946 This returns the entry of the last component in the path, 947 which may be either a file or a directory. 948 949 Returns None IFF the path is not found. 950 """ 951 ie = self.get_entry_by_path(relpath) 952 if ie is None: 953 return None 954 return ie.file_id 955 956 def filter(self, specific_fileids): 957 """Get an inventory view filtered against a set of file-ids. 958 959 Children of directories and parents are included. 960 961 The result may or may not reference the underlying inventory 962 so it should be treated as immutable. 963 """ 964 interesting_parents = set() 965 for fileid in specific_fileids: 966 try: 967 interesting_parents.update(self.get_idpath(fileid)) 968 except errors.NoSuchId: 969 # This fileid is not in the inventory - that's ok 970 pass 971 entries = self.iter_entries() 972 if self.root is None: 973 return Inventory(root_id=None) 974 other = Inventory(next(entries)[1].file_id) 975 other.root.revision = self.root.revision 976 other.revision_id = self.revision_id 977 directories_to_expand = set() 978 for path, entry in entries: 979 file_id = entry.file_id 980 if (file_id in specific_fileids or 981 entry.parent_id in directories_to_expand): 982 if entry.kind == 'directory': 983 directories_to_expand.add(file_id) 984 elif file_id not in interesting_parents: 985 continue 986 other.add(entry.copy()) 987 return other 988 989 def get_idpath(self, file_id): 990 """Return a list of file_ids for the path to an entry. 991 992 The list contains one element for each directory followed by 993 the id of the file itself. So the length of the returned list 994 is equal to the depth of the file in the tree, counting the 995 root directory as depth 1. 996 """ 997 p = [] 998 for parent in self._iter_file_id_parents(file_id): 999 p.insert(0, parent.file_id) 1000 return p 1001 1002 1003class Inventory(CommonInventory): 1004 """Mutable dict based in-memory inventory. 1005 1006 We never store the full path to a file, because renaming a directory 1007 implicitly moves all of its contents. This class internally maintains a 1008 lookup tree that allows the children under a directory to be 1009 returned quickly. 1010 1011 >>> inv = Inventory() 1012 >>> inv.add(InventoryFile(b'123-123', 'hello.c', ROOT_ID)) 1013 InventoryFile(b'123-123', 'hello.c', parent_id=b'TREE_ROOT', sha1=None, len=None, revision=None) 1014 >>> inv.get_entry(b'123-123').name 1015 'hello.c' 1016 1017 Id's may be looked up from paths: 1018 1019 >>> inv.path2id('hello.c') 1020 b'123-123' 1021 >>> inv.has_id(b'123-123') 1022 True 1023 1024 There are iterators over the contents: 1025 1026 >>> [entry[0] for entry in inv.iter_entries()] 1027 ['', 'hello.c'] 1028 """ 1029 1030 def __init__(self, root_id=ROOT_ID, revision_id=None): 1031 """Create or read an inventory. 1032 1033 If a working directory is specified, the inventory is read 1034 from there. If the file is specified, read from that. If not, 1035 the inventory is created empty. 1036 1037 The inventory is created with a default root directory, with 1038 an id of None. 1039 """ 1040 if root_id is not None: 1041 self._set_root(InventoryDirectory(root_id, u'', None)) 1042 else: 1043 self.root = None 1044 self._byid = {} 1045 self.revision_id = revision_id 1046 1047 def __repr__(self): 1048 # More than one page of ouput is not useful anymore to debug 1049 max_len = 2048 1050 closing = '...}' 1051 contents = repr(self._byid) 1052 if len(contents) > max_len: 1053 contents = contents[:(max_len - len(closing))] + closing 1054 return "<Inventory object at %x, contents=%r>" % (id(self), contents) 1055 1056 def apply_delta(self, delta): 1057 """Apply a delta to this inventory. 1058 1059 See the inventory developers documentation for the theory behind 1060 inventory deltas. 1061 1062 If delta application fails the inventory is left in an indeterminate 1063 state and must not be used. 1064 1065 :param delta: A list of changes to apply. After all the changes are 1066 applied the final inventory must be internally consistent, but it 1067 is ok to supply changes which, if only half-applied would have an 1068 invalid result - such as supplying two changes which rename two 1069 files, 'A' and 'B' with each other : [('A', 'B', b'A-id', a_entry), 1070 ('B', 'A', b'B-id', b_entry)]. 1071 1072 Each change is a tuple, of the form (old_path, new_path, file_id, 1073 new_entry). 1074 1075 When new_path is None, the change indicates the removal of an entry 1076 from the inventory and new_entry will be ignored (using None is 1077 appropriate). If new_path is not None, then new_entry must be an 1078 InventoryEntry instance, which will be incorporated into the 1079 inventory (and replace any existing entry with the same file id). 1080 1081 When old_path is None, the change indicates the addition of 1082 a new entry to the inventory. 1083 1084 When neither new_path nor old_path are None, the change is a 1085 modification to an entry, such as a rename, reparent, kind change 1086 etc. 1087 1088 The children attribute of new_entry is ignored. This is because 1089 this method preserves children automatically across alterations to 1090 the parent of the children, and cases where the parent id of a 1091 child is changing require the child to be passed in as a separate 1092 change regardless. E.g. in the recursive deletion of a directory - 1093 the directory's children must be included in the delta, or the 1094 final inventory will be invalid. 1095 1096 Note that a file_id must only appear once within a given delta. 1097 An AssertionError is raised otherwise. 1098 """ 1099 # Check that the delta is legal. It would be nice if this could be 1100 # done within the loops below but it's safer to validate the delta 1101 # before starting to mutate the inventory, as there isn't a rollback 1102 # facility. 1103 list(_check_delta_unique_ids(_check_delta_unique_new_paths( 1104 _check_delta_unique_old_paths(_check_delta_ids_match_entry( 1105 _check_delta_ids_are_valid( 1106 _check_delta_new_path_entry_both_or_None( 1107 delta))))))) 1108 1109 children = {} 1110 # Remove all affected items which were in the original inventory, 1111 # starting with the longest paths, thus ensuring parents are examined 1112 # after their children, which means that everything we examine has no 1113 # modified children remaining by the time we examine it. 1114 for old_path, file_id in sorted(((op, f) for op, np, f, e in delta 1115 if op is not None), reverse=True): 1116 # Preserve unaltered children of file_id for later reinsertion. 1117 file_id_children = getattr(self.get_entry(file_id), 'children', {}) 1118 if len(file_id_children): 1119 children[file_id] = file_id_children 1120 if self.id2path(file_id) != old_path: 1121 raise errors.InconsistentDelta(old_path, file_id, 1122 "Entry was at wrong other path %r." % self.id2path(file_id)) 1123 # Remove file_id and the unaltered children. If file_id is not 1124 # being deleted it will be reinserted back later. 1125 self.remove_recursive_id(file_id) 1126 # Insert all affected which should be in the new inventory, reattaching 1127 # their children if they had any. This is done from shortest path to 1128 # longest, ensuring that items which were modified and whose parents in 1129 # the resulting inventory were also modified, are inserted after their 1130 # parents. 1131 for new_path, f, new_entry in sorted((np, f, e) for op, np, f, e in 1132 delta if np is not None): 1133 if new_entry.kind == 'directory': 1134 # Pop the child which to allow detection of children whose 1135 # parents were deleted and which were not reattached to a new 1136 # parent. 1137 replacement = InventoryDirectory(new_entry.file_id, 1138 new_entry.name, new_entry.parent_id) 1139 replacement.revision = new_entry.revision 1140 replacement.children = children.pop(replacement.file_id, {}) 1141 new_entry = replacement 1142 try: 1143 self.add(new_entry) 1144 except DuplicateFileId: 1145 raise errors.InconsistentDelta(new_path, new_entry.file_id, 1146 "New id is already present in target.") 1147 except AttributeError: 1148 raise errors.InconsistentDelta(new_path, new_entry.file_id, 1149 "Parent is not a directory.") 1150 if self.id2path(new_entry.file_id) != new_path: 1151 raise errors.InconsistentDelta(new_path, new_entry.file_id, 1152 "New path is not consistent with parent path.") 1153 if len(children): 1154 # Get the parent id that was deleted 1155 parent_id, children = children.popitem() 1156 raise errors.InconsistentDelta("<deleted>", parent_id, 1157 "The file id was deleted but its children were not deleted.") 1158 1159 def create_by_apply_delta(self, inventory_delta, new_revision_id, 1160 propagate_caches=False): 1161 """See CHKInventory.create_by_apply_delta()""" 1162 new_inv = self.copy() 1163 new_inv.apply_delta(inventory_delta) 1164 new_inv.revision_id = new_revision_id 1165 return new_inv 1166 1167 def _set_root(self, ie): 1168 self.root = ie 1169 self._byid = {self.root.file_id: self.root} 1170 1171 def copy(self): 1172 # TODO: jam 20051218 Should copy also copy the revision_id? 1173 entries = self.iter_entries() 1174 if self.root is None: 1175 return Inventory(root_id=None) 1176 other = Inventory(next(entries)[1].file_id) 1177 other.root.revision = self.root.revision 1178 # copy recursively so we know directories will be added before 1179 # their children. There are more efficient ways than this... 1180 for path, entry in entries: 1181 other.add(entry.copy()) 1182 return other 1183 1184 def iter_all_ids(self): 1185 """Iterate over all file-ids.""" 1186 return iter(self._byid) 1187 1188 def iter_just_entries(self): 1189 """Iterate over all entries. 1190 1191 Unlike iter_entries(), just the entries are returned (not (path, ie)) 1192 and the order of entries is undefined. 1193 1194 XXX: We may not want to merge this into bzr.dev. 1195 """ 1196 if self.root is None: 1197 return () 1198 return self._byid.values() 1199 1200 def __len__(self): 1201 """Returns number of entries.""" 1202 return len(self._byid) 1203 1204 def get_entry(self, file_id): 1205 """Return the entry for given file_id. 1206 1207 >>> inv = Inventory() 1208 >>> inv.add(InventoryFile(b'123123', 'hello.c', ROOT_ID)) 1209 InventoryFile(b'123123', 'hello.c', parent_id=b'TREE_ROOT', sha1=None, len=None, revision=None) 1210 >>> inv.get_entry(b'123123').name 1211 'hello.c' 1212 """ 1213 if not isinstance(file_id, bytes): 1214 raise TypeError(file_id) 1215 try: 1216 return self._byid[file_id] 1217 except KeyError: 1218 # really we're passing an inventory, not a tree... 1219 raise errors.NoSuchId(self, file_id) 1220 1221 def get_file_kind(self, file_id): 1222 return self._byid[file_id].kind 1223 1224 def get_child(self, parent_id, filename): 1225 return self.get_entry(parent_id).children.get(filename) 1226 1227 def _add_child(self, entry): 1228 """Add an entry to the inventory, without adding it to its parent""" 1229 if entry.file_id in self._byid: 1230 raise errors.BzrError( 1231 "inventory already contains entry with id {%s}" % 1232 entry.file_id) 1233 self._byid[entry.file_id] = entry 1234 children = getattr(entry, 'children', {}) 1235 if children is not None: 1236 for child in children.values(): 1237 self._add_child(child) 1238 return entry 1239 1240 def add(self, entry): 1241 """Add entry to inventory. 1242 1243 :return: entry 1244 """ 1245 if entry.file_id in self._byid: 1246 raise DuplicateFileId(entry.file_id, self._byid[entry.file_id]) 1247 if entry.parent_id is None: 1248 self.root = entry 1249 else: 1250 try: 1251 parent = self._byid[entry.parent_id] 1252 except KeyError: 1253 raise errors.InconsistentDelta("<unknown>", entry.parent_id, 1254 "Parent not in inventory.") 1255 if entry.name in parent.children: 1256 raise errors.InconsistentDelta( 1257 self.id2path(parent.children[entry.name].file_id), 1258 entry.file_id, 1259 "Path already versioned") 1260 parent.children[entry.name] = entry 1261 return self._add_child(entry) 1262 1263 def add_path(self, relpath, kind, file_id=None, parent_id=None): 1264 """Add entry from a path. 1265 1266 The immediate parent must already be versioned. 1267 1268 Returns the new entry object.""" 1269 1270 parts = osutils.splitpath(relpath) 1271 1272 if len(parts) == 0: 1273 if file_id is None: 1274 file_id = generate_ids.gen_root_id() 1275 self.root = InventoryDirectory(file_id, '', None) 1276 self._byid = {self.root.file_id: self.root} 1277 return self.root 1278 else: 1279 parent_path = parts[:-1] 1280 parent_id = self.path2id(parent_path) 1281 if parent_id is None: 1282 raise errors.NotVersionedError(path=parent_path) 1283 ie = make_entry(kind, parts[-1], parent_id, file_id) 1284 return self.add(ie) 1285 1286 def delete(self, file_id): 1287 """Remove entry by id. 1288 1289 >>> inv = Inventory() 1290 >>> inv.add(InventoryFile(b'123', 'foo.c', ROOT_ID)) 1291 InventoryFile(b'123', 'foo.c', parent_id=b'TREE_ROOT', sha1=None, len=None, revision=None) 1292 >>> inv.has_id(b'123') 1293 True 1294 >>> inv.delete(b'123') 1295 >>> inv.has_id(b'123') 1296 False 1297 """ 1298 ie = self.get_entry(file_id) 1299 del self._byid[file_id] 1300 if ie.parent_id is not None: 1301 del self.get_entry(ie.parent_id).children[ie.name] 1302 1303 def __eq__(self, other): 1304 """Compare two sets by comparing their contents. 1305 1306 >>> i1 = Inventory() 1307 >>> i2 = Inventory() 1308 >>> i1 == i2 1309 True 1310 >>> i1.add(InventoryFile(b'123', 'foo', ROOT_ID)) 1311 InventoryFile(b'123', 'foo', parent_id=b'TREE_ROOT', sha1=None, len=None, revision=None) 1312 >>> i1 == i2 1313 False 1314 >>> i2.add(InventoryFile(b'123', 'foo', ROOT_ID)) 1315 InventoryFile(b'123', 'foo', parent_id=b'TREE_ROOT', sha1=None, len=None, revision=None) 1316 >>> i1 == i2 1317 True 1318 """ 1319 if not isinstance(other, Inventory): 1320 return NotImplemented 1321 1322 return self._byid == other._byid 1323 1324 def __ne__(self, other): 1325 return not self.__eq__(other) 1326 1327 def __hash__(self): 1328 raise ValueError('not hashable') 1329 1330 def _iter_file_id_parents(self, file_id): 1331 """Yield the parents of file_id up to the root.""" 1332 while file_id is not None: 1333 try: 1334 ie = self._byid[file_id] 1335 except KeyError: 1336 raise errors.NoSuchId(tree=None, file_id=file_id) 1337 yield ie 1338 file_id = ie.parent_id 1339 1340 def has_id(self, file_id): 1341 return (file_id in self._byid) 1342 1343 def _make_delta(self, old): 1344 """Make an inventory delta from two inventories.""" 1345 old_getter = old.get_entry 1346 new_getter = self.get_entry 1347 old_ids = set(old.iter_all_ids()) 1348 new_ids = set(self.iter_all_ids()) 1349 adds = new_ids - old_ids 1350 deletes = old_ids - new_ids 1351 if not adds and not deletes: 1352 common = new_ids 1353 else: 1354 common = old_ids.intersection(new_ids) 1355 delta = [] 1356 for file_id in deletes: 1357 delta.append((old.id2path(file_id), None, file_id, None)) 1358 for file_id in adds: 1359 delta.append((None, self.id2path(file_id), 1360 file_id, self.get_entry(file_id))) 1361 for file_id in common: 1362 new_ie = new_getter(file_id) 1363 old_ie = old_getter(file_id) 1364 # If xml_serializer returns the cached InventoryEntries (rather 1365 # than always doing .copy()), inlining the 'is' check saves 2.7M 1366 # calls to __eq__. Under lsprof this saves 20s => 6s. 1367 # It is a minor improvement without lsprof. 1368 if old_ie is new_ie or old_ie == new_ie: 1369 continue 1370 else: 1371 delta.append((old.id2path(file_id), self.id2path(file_id), 1372 file_id, new_ie)) 1373 return delta 1374 1375 def remove_recursive_id(self, file_id): 1376 """Remove file_id, and children, from the inventory. 1377 1378 :param file_id: A file_id to remove. 1379 """ 1380 to_find_delete = [self._byid[file_id]] 1381 to_delete = [] 1382 while to_find_delete: 1383 ie = to_find_delete.pop() 1384 to_delete.append(ie.file_id) 1385 if ie.kind == 'directory': 1386 to_find_delete.extend(ie.children.values()) 1387 for file_id in reversed(to_delete): 1388 ie = self.get_entry(file_id) 1389 del self._byid[file_id] 1390 if ie.parent_id is not None: 1391 del self.get_entry(ie.parent_id).children[ie.name] 1392 else: 1393 self.root = None 1394 1395 def rename(self, file_id, new_parent_id, new_name): 1396 """Move a file within the inventory. 1397 1398 This can change either the name, or the parent, or both. 1399 1400 This does not move the working file. 1401 """ 1402 new_name = ensure_normalized_name(new_name) 1403 if not is_valid_name(new_name): 1404 raise errors.BzrError("not an acceptable filename: %r" % new_name) 1405 1406 new_parent = self._byid[new_parent_id] 1407 if new_name in new_parent.children: 1408 raise errors.BzrError("%r already exists in %r" % 1409 (new_name, self.id2path(new_parent_id))) 1410 1411 new_parent_idpath = self.get_idpath(new_parent_id) 1412 if file_id in new_parent_idpath: 1413 raise errors.BzrError( 1414 "cannot move directory %r into a subdirectory of itself, %r" 1415 % (self.id2path(file_id), self.id2path(new_parent_id))) 1416 1417 file_ie = self._byid[file_id] 1418 old_parent = self._byid[file_ie.parent_id] 1419 1420 # TODO: Don't leave things messed up if this fails 1421 1422 del old_parent.children[file_ie.name] 1423 new_parent.children[new_name] = file_ie 1424 1425 file_ie.name = new_name 1426 file_ie.parent_id = new_parent_id 1427 1428 def is_root(self, file_id): 1429 return self.root is not None and file_id == self.root.file_id 1430 1431 1432class CHKInventory(CommonInventory): 1433 """An inventory persisted in a CHK store. 1434 1435 By design, a CHKInventory is immutable so many of the methods 1436 supported by Inventory - add, rename, apply_delta, etc - are *not* 1437 supported. To create a new CHKInventory, use create_by_apply_delta() 1438 or from_inventory(), say. 1439 1440 Internally, a CHKInventory has one or two CHKMaps: 1441 1442 * id_to_entry - a map from (file_id,) => InventoryEntry as bytes 1443 * parent_id_basename_to_file_id - a map from (parent_id, basename_utf8) 1444 => file_id as bytes 1445 1446 The second map is optional and not present in early CHkRepository's. 1447 1448 No caching is performed: every method call or item access will perform 1449 requests to the storage layer. As such, keep references to objects you 1450 want to reuse. 1451 """ 1452 1453 def __init__(self, search_key_name): 1454 CommonInventory.__init__(self) 1455 self._fileid_to_entry_cache = {} 1456 self._fully_cached = False 1457 self._path_to_fileid_cache = {} 1458 self._search_key_name = search_key_name 1459 self.root_id = None 1460 1461 def __eq__(self, other): 1462 """Compare two sets by comparing their contents.""" 1463 if not isinstance(other, CHKInventory): 1464 return NotImplemented 1465 1466 this_key = self.id_to_entry.key() 1467 other_key = other.id_to_entry.key() 1468 this_pid_key = self.parent_id_basename_to_file_id.key() 1469 other_pid_key = other.parent_id_basename_to_file_id.key() 1470 if None in (this_key, this_pid_key, other_key, other_pid_key): 1471 return False 1472 return this_key == other_key and this_pid_key == other_pid_key 1473 1474 def _entry_to_bytes(self, entry): 1475 """Serialise entry as a single bytestring. 1476 1477 :param Entry: An inventory entry. 1478 :return: A bytestring for the entry. 1479 1480 The BNF: 1481 ENTRY ::= FILE | DIR | SYMLINK | TREE 1482 FILE ::= "file: " COMMON SEP SHA SEP SIZE SEP EXECUTABLE 1483 DIR ::= "dir: " COMMON 1484 SYMLINK ::= "symlink: " COMMON SEP TARGET_UTF8 1485 TREE ::= "tree: " COMMON REFERENCE_REVISION 1486 COMMON ::= FILE_ID SEP PARENT_ID SEP NAME_UTF8 SEP REVISION 1487 SEP ::= "\n" 1488 """ 1489 if entry.parent_id is not None: 1490 parent_str = entry.parent_id 1491 else: 1492 parent_str = b'' 1493 name_str = entry.name.encode("utf8") 1494 if entry.kind == 'file': 1495 if entry.executable: 1496 exec_str = b"Y" 1497 else: 1498 exec_str = b"N" 1499 return b"file: %s\n%s\n%s\n%s\n%s\n%d\n%s" % ( 1500 entry.file_id, parent_str, name_str, entry.revision, 1501 entry.text_sha1, entry.text_size, exec_str) 1502 elif entry.kind == 'directory': 1503 return b"dir: %s\n%s\n%s\n%s" % ( 1504 entry.file_id, parent_str, name_str, entry.revision) 1505 elif entry.kind == 'symlink': 1506 return b"symlink: %s\n%s\n%s\n%s\n%s" % ( 1507 entry.file_id, parent_str, name_str, entry.revision, 1508 entry.symlink_target.encode("utf8")) 1509 elif entry.kind == 'tree-reference': 1510 return b"tree: %s\n%s\n%s\n%s\n%s" % ( 1511 entry.file_id, parent_str, name_str, entry.revision, 1512 entry.reference_revision) 1513 else: 1514 raise ValueError("unknown kind %r" % entry.kind) 1515 1516 def _expand_fileids_to_parents_and_children(self, file_ids): 1517 """Give a more wholistic view starting with the given file_ids. 1518 1519 For any file_id which maps to a directory, we will include all children 1520 of that directory. We will also include all directories which are 1521 parents of the given file_ids, but we will not include their children. 1522 1523 eg: 1524 / # TREE_ROOT 1525 foo/ # foo-id 1526 baz # baz-id 1527 frob/ # frob-id 1528 fringle # fringle-id 1529 bar/ # bar-id 1530 bing # bing-id 1531 1532 if given [foo-id] we will include 1533 TREE_ROOT as interesting parents 1534 and 1535 foo-id, baz-id, frob-id, fringle-id 1536 As interesting ids. 1537 """ 1538 interesting = set() 1539 # TODO: Pre-pass over the list of fileids to see if anything is already 1540 # deserialized in self._fileid_to_entry_cache 1541 1542 directories_to_expand = set() 1543 children_of_parent_id = {} 1544 # It is okay if some of the fileids are missing 1545 for entry in self._getitems(file_ids): 1546 if entry.kind == 'directory': 1547 directories_to_expand.add(entry.file_id) 1548 interesting.add(entry.parent_id) 1549 children_of_parent_id.setdefault(entry.parent_id, set() 1550 ).add(entry.file_id) 1551 1552 # Now, interesting has all of the direct parents, but not the 1553 # parents of those parents. It also may have some duplicates with 1554 # specific_fileids 1555 remaining_parents = interesting.difference(file_ids) 1556 # When we hit the TREE_ROOT, we'll get an interesting parent of None, 1557 # but we don't actually want to recurse into that 1558 interesting.add(None) # this will auto-filter it in the loop 1559 remaining_parents.discard(None) 1560 while remaining_parents: 1561 next_parents = set() 1562 for entry in self._getitems(remaining_parents): 1563 next_parents.add(entry.parent_id) 1564 children_of_parent_id.setdefault(entry.parent_id, set() 1565 ).add(entry.file_id) 1566 # Remove any search tips we've already processed 1567 remaining_parents = next_parents.difference(interesting) 1568 interesting.update(remaining_parents) 1569 # We should probably also .difference(directories_to_expand) 1570 interesting.update(file_ids) 1571 interesting.discard(None) 1572 while directories_to_expand: 1573 # Expand directories by looking in the 1574 # parent_id_basename_to_file_id map 1575 keys = [StaticTuple(f,).intern() for f in directories_to_expand] 1576 directories_to_expand = set() 1577 items = self.parent_id_basename_to_file_id.iteritems(keys) 1578 next_file_ids = {item[1] for item in items} 1579 next_file_ids = next_file_ids.difference(interesting) 1580 interesting.update(next_file_ids) 1581 for entry in self._getitems(next_file_ids): 1582 if entry.kind == 'directory': 1583 directories_to_expand.add(entry.file_id) 1584 children_of_parent_id.setdefault(entry.parent_id, set() 1585 ).add(entry.file_id) 1586 return interesting, children_of_parent_id 1587 1588 def filter(self, specific_fileids): 1589 """Get an inventory view filtered against a set of file-ids. 1590 1591 Children of directories and parents are included. 1592 1593 The result may or may not reference the underlying inventory 1594 so it should be treated as immutable. 1595 """ 1596 (interesting, 1597 parent_to_children) = self._expand_fileids_to_parents_and_children( 1598 specific_fileids) 1599 # There is some overlap here, but we assume that all interesting items 1600 # are in the _fileid_to_entry_cache because we had to read them to 1601 # determine if they were a dir we wanted to recurse, or just a file 1602 # This should give us all the entries we'll want to add, so start 1603 # adding 1604 other = Inventory(self.root_id) 1605 other.root.revision = self.root.revision 1606 other.revision_id = self.revision_id 1607 if not interesting or not parent_to_children: 1608 # empty filter, or filtering entrys that don't exist 1609 # (if even 1 existed, then we would have populated 1610 # parent_to_children with at least the tree root.) 1611 return other 1612 cache = self._fileid_to_entry_cache 1613 remaining_children = deque( 1614 parent_to_children[self.root_id]) 1615 while remaining_children: 1616 file_id = remaining_children.popleft() 1617 ie = cache[file_id] 1618 if ie.kind == 'directory': 1619 ie = ie.copy() # We create a copy to depopulate the .children attribute 1620 # TODO: depending on the uses of 'other' we should probably alwyas 1621 # '.copy()' to prevent someone from mutating other and 1622 # invaliding our internal cache 1623 other.add(ie) 1624 if file_id in parent_to_children: 1625 remaining_children.extend(parent_to_children[file_id]) 1626 return other 1627 1628 @staticmethod 1629 def _bytes_to_utf8name_key(data): 1630 """Get the file_id, revision_id key out of data.""" 1631 # We don't normally care about name, except for times when we want 1632 # to filter out empty names because of non rich-root... 1633 sections = data.split(b'\n') 1634 kind, file_id = sections[0].split(b': ') 1635 return (sections[2], file_id, sections[3]) 1636 1637 def _bytes_to_entry(self, bytes): 1638 """Deserialise a serialised entry.""" 1639 sections = bytes.split(b'\n') 1640 if sections[0].startswith(b"file: "): 1641 result = InventoryFile(sections[0][6:], 1642 sections[2].decode('utf8'), 1643 sections[1]) 1644 result.text_sha1 = sections[4] 1645 result.text_size = int(sections[5]) 1646 result.executable = sections[6] == b"Y" 1647 elif sections[0].startswith(b"dir: "): 1648 result = CHKInventoryDirectory(sections[0][5:], 1649 sections[2].decode('utf8'), 1650 sections[1], self) 1651 elif sections[0].startswith(b"symlink: "): 1652 result = InventoryLink(sections[0][9:], 1653 sections[2].decode('utf8'), 1654 sections[1]) 1655 result.symlink_target = sections[4].decode('utf8') 1656 elif sections[0].startswith(b"tree: "): 1657 result = TreeReference(sections[0][6:], 1658 sections[2].decode('utf8'), 1659 sections[1]) 1660 result.reference_revision = sections[4] 1661 else: 1662 raise ValueError("Not a serialised entry %r" % bytes) 1663 result.file_id = result.file_id 1664 result.revision = sections[3] 1665 if result.parent_id == b'': 1666 result.parent_id = None 1667 self._fileid_to_entry_cache[result.file_id] = result 1668 return result 1669 1670 def create_by_apply_delta(self, inventory_delta, new_revision_id, 1671 propagate_caches=False): 1672 """Create a new CHKInventory by applying inventory_delta to this one. 1673 1674 See the inventory developers documentation for the theory behind 1675 inventory deltas. 1676 1677 :param inventory_delta: The inventory delta to apply. See 1678 Inventory.apply_delta for details. 1679 :param new_revision_id: The revision id of the resulting CHKInventory. 1680 :param propagate_caches: If True, the caches for this inventory are 1681 copied to and updated for the result. 1682 :return: The new CHKInventory. 1683 """ 1684 split = osutils.split 1685 result = CHKInventory(self._search_key_name) 1686 if propagate_caches: 1687 # Just propagate the path-to-fileid cache for now 1688 result._path_to_fileid_cache = self._path_to_fileid_cache.copy() 1689 search_key_func = chk_map.search_key_registry.get( 1690 self._search_key_name) 1691 self.id_to_entry._ensure_root() 1692 maximum_size = self.id_to_entry._root_node.maximum_size 1693 result.revision_id = new_revision_id 1694 result.id_to_entry = chk_map.CHKMap( 1695 self.id_to_entry._store, 1696 self.id_to_entry.key(), 1697 search_key_func=search_key_func) 1698 result.id_to_entry._ensure_root() 1699 result.id_to_entry._root_node.set_maximum_size(maximum_size) 1700 # Change to apply to the parent_id_basename delta. The dict maps 1701 # (parent_id, basename) -> (old_key, new_value). We use a dict because 1702 # when a path has its id replaced (e.g. the root is changed, or someone 1703 # does bzr mv a b, bzr mv c a, we should output a single change to this 1704 # map rather than two. 1705 parent_id_basename_delta = {} 1706 if self.parent_id_basename_to_file_id is not None: 1707 result.parent_id_basename_to_file_id = chk_map.CHKMap( 1708 self.parent_id_basename_to_file_id._store, 1709 self.parent_id_basename_to_file_id.key(), 1710 search_key_func=search_key_func) 1711 result.parent_id_basename_to_file_id._ensure_root() 1712 self.parent_id_basename_to_file_id._ensure_root() 1713 result_p_id_root = result.parent_id_basename_to_file_id._root_node 1714 p_id_root = self.parent_id_basename_to_file_id._root_node 1715 result_p_id_root.set_maximum_size(p_id_root.maximum_size) 1716 result_p_id_root._key_width = p_id_root._key_width 1717 else: 1718 result.parent_id_basename_to_file_id = None 1719 result.root_id = self.root_id 1720 id_to_entry_delta = [] 1721 # inventory_delta is only traversed once, so we just update the 1722 # variable. 1723 # Check for repeated file ids 1724 inventory_delta = _check_delta_unique_ids(inventory_delta) 1725 # Repeated old paths 1726 inventory_delta = _check_delta_unique_old_paths(inventory_delta) 1727 # Check for repeated new paths 1728 inventory_delta = _check_delta_unique_new_paths(inventory_delta) 1729 # Check for entries that don't match the fileid 1730 inventory_delta = _check_delta_ids_match_entry(inventory_delta) 1731 # Check for nonsense fileids 1732 inventory_delta = _check_delta_ids_are_valid(inventory_delta) 1733 # Check for new_path <-> entry consistency 1734 inventory_delta = _check_delta_new_path_entry_both_or_None( 1735 inventory_delta) 1736 # All changed entries need to have their parents be directories and be 1737 # at the right path. This set contains (path, id) tuples. 1738 parents = set() 1739 # When we delete an item, all the children of it must be either deleted 1740 # or altered in their own right. As we batch process the change via 1741 # CHKMap.apply_delta, we build a set of things to use to validate the 1742 # delta. 1743 deletes = set() 1744 altered = set() 1745 for old_path, new_path, file_id, entry in inventory_delta: 1746 # file id changes 1747 if new_path == '': 1748 result.root_id = file_id 1749 if new_path is None: 1750 # Make a delete: 1751 new_key = None 1752 new_value = None 1753 # Update caches 1754 if propagate_caches: 1755 try: 1756 del result._path_to_fileid_cache[old_path] 1757 except KeyError: 1758 pass 1759 deletes.add(file_id) 1760 else: 1761 new_key = StaticTuple(file_id,) 1762 new_value = result._entry_to_bytes(entry) 1763 # Update caches. It's worth doing this whether 1764 # we're propagating the old caches or not. 1765 result._path_to_fileid_cache[new_path] = file_id 1766 parents.add((split(new_path)[0], entry.parent_id)) 1767 if old_path is None: 1768 old_key = None 1769 else: 1770 old_key = StaticTuple(file_id,) 1771 if self.id2path(file_id) != old_path: 1772 raise errors.InconsistentDelta(old_path, file_id, 1773 "Entry was at wrong other path %r." % 1774 self.id2path(file_id)) 1775 altered.add(file_id) 1776 id_to_entry_delta.append(StaticTuple(old_key, new_key, new_value)) 1777 if result.parent_id_basename_to_file_id is not None: 1778 # parent_id, basename changes 1779 if old_path is None: 1780 old_key = None 1781 else: 1782 old_entry = self.get_entry(file_id) 1783 old_key = self._parent_id_basename_key(old_entry) 1784 if new_path is None: 1785 new_key = None 1786 new_value = None 1787 else: 1788 new_key = self._parent_id_basename_key(entry) 1789 new_value = file_id 1790 # If the two keys are the same, the value will be unchanged 1791 # as its always the file id for this entry. 1792 if old_key != new_key: 1793 # Transform a change into explicit delete/add preserving 1794 # a possible match on the key from a different file id. 1795 if old_key is not None: 1796 parent_id_basename_delta.setdefault( 1797 old_key, [None, None])[0] = old_key 1798 if new_key is not None: 1799 parent_id_basename_delta.setdefault( 1800 new_key, [None, None])[1] = new_value 1801 # validate that deletes are complete. 1802 for file_id in deletes: 1803 entry = self.get_entry(file_id) 1804 if entry.kind != 'directory': 1805 continue 1806 # This loop could potentially be better by using the id_basename 1807 # map to just get the child file ids. 1808 for child in entry.children.values(): 1809 if child.file_id not in altered: 1810 raise errors.InconsistentDelta(self.id2path(child.file_id), 1811 child.file_id, "Child not deleted or reparented when " 1812 "parent deleted.") 1813 result.id_to_entry.apply_delta(id_to_entry_delta) 1814 if parent_id_basename_delta: 1815 # Transform the parent_id_basename delta data into a linear delta 1816 # with only one record for a given key. Optimally this would allow 1817 # re-keying, but its simpler to just output that as a delete+add 1818 # to spend less time calculating the delta. 1819 delta_list = [] 1820 for key, (old_key, value) in parent_id_basename_delta.items(): 1821 if value is not None: 1822 delta_list.append((old_key, key, value)) 1823 else: 1824 delta_list.append((old_key, None, None)) 1825 result.parent_id_basename_to_file_id.apply_delta(delta_list) 1826 parents.discard(('', None)) 1827 for parent_path, parent in parents: 1828 try: 1829 if result.get_entry(parent).kind != 'directory': 1830 raise errors.InconsistentDelta(result.id2path(parent), parent, 1831 'Not a directory, but given children') 1832 except errors.NoSuchId: 1833 raise errors.InconsistentDelta("<unknown>", parent, 1834 "Parent is not present in resulting inventory.") 1835 if result.path2id(parent_path) != parent: 1836 raise errors.InconsistentDelta(parent_path, parent, 1837 "Parent has wrong path %r." % result.path2id(parent_path)) 1838 return result 1839 1840 @classmethod 1841 def deserialise(klass, chk_store, lines, expected_revision_id): 1842 """Deserialise a CHKInventory. 1843 1844 :param chk_store: A CHK capable VersionedFiles instance. 1845 :param bytes: The serialised bytes. 1846 :param expected_revision_id: The revision ID we think this inventory is 1847 for. 1848 :return: A CHKInventory 1849 """ 1850 if not lines[-1].endswith(b'\n'): 1851 raise ValueError("last line should have trailing eol\n") 1852 if lines[0] != b'chkinventory:\n': 1853 raise ValueError("not a serialised CHKInventory: %r" % bytes) 1854 info = {} 1855 allowed_keys = frozenset((b'root_id', b'revision_id', 1856 b'parent_id_basename_to_file_id', 1857 b'search_key_name', b'id_to_entry')) 1858 for line in lines[1:]: 1859 key, value = line.rstrip(b'\n').split(b': ', 1) 1860 if key not in allowed_keys: 1861 raise errors.BzrError('Unknown key in inventory: %r\n%r' 1862 % (key, bytes)) 1863 if key in info: 1864 raise errors.BzrError('Duplicate key in inventory: %r\n%r' 1865 % (key, bytes)) 1866 info[key] = value 1867 revision_id = info[b'revision_id'] 1868 root_id = info[b'root_id'] 1869 search_key_name = info.get(b'search_key_name', b'plain') 1870 parent_id_basename_to_file_id = info.get( 1871 b'parent_id_basename_to_file_id', None) 1872 if not parent_id_basename_to_file_id.startswith(b'sha1:'): 1873 raise ValueError('parent_id_basename_to_file_id should be a sha1' 1874 ' key not %r' % (parent_id_basename_to_file_id,)) 1875 id_to_entry = info[b'id_to_entry'] 1876 if not id_to_entry.startswith(b'sha1:'): 1877 raise ValueError('id_to_entry should be a sha1' 1878 ' key not %r' % (id_to_entry,)) 1879 1880 result = CHKInventory(search_key_name) 1881 result.revision_id = revision_id 1882 result.root_id = root_id 1883 search_key_func = chk_map.search_key_registry.get( 1884 result._search_key_name) 1885 if parent_id_basename_to_file_id is not None: 1886 result.parent_id_basename_to_file_id = chk_map.CHKMap( 1887 chk_store, StaticTuple(parent_id_basename_to_file_id,), 1888 search_key_func=search_key_func) 1889 else: 1890 result.parent_id_basename_to_file_id = None 1891 1892 result.id_to_entry = chk_map.CHKMap(chk_store, 1893 StaticTuple(id_to_entry,), 1894 search_key_func=search_key_func) 1895 if (result.revision_id,) != expected_revision_id: 1896 raise ValueError("Mismatched revision id and expected: %r, %r" % 1897 (result.revision_id, expected_revision_id)) 1898 return result 1899 1900 @classmethod 1901 def from_inventory(klass, chk_store, inventory, maximum_size=0, search_key_name=b'plain'): 1902 """Create a CHKInventory from an existing inventory. 1903 1904 The content of inventory is copied into the chk_store, and a 1905 CHKInventory referencing that is returned. 1906 1907 :param chk_store: A CHK capable VersionedFiles instance. 1908 :param inventory: The inventory to copy. 1909 :param maximum_size: The CHKMap node size limit. 1910 :param search_key_name: The identifier for the search key function 1911 """ 1912 result = klass(search_key_name) 1913 result.revision_id = inventory.revision_id 1914 result.root_id = inventory.root.file_id 1915 1916 entry_to_bytes = result._entry_to_bytes 1917 parent_id_basename_key = result._parent_id_basename_key 1918 id_to_entry_dict = {} 1919 parent_id_basename_dict = {} 1920 for path, entry in inventory.iter_entries(): 1921 key = StaticTuple(entry.file_id,).intern() 1922 id_to_entry_dict[key] = entry_to_bytes(entry) 1923 p_id_key = parent_id_basename_key(entry) 1924 parent_id_basename_dict[p_id_key] = entry.file_id 1925 1926 result._populate_from_dicts(chk_store, id_to_entry_dict, 1927 parent_id_basename_dict, maximum_size=maximum_size) 1928 return result 1929 1930 def _populate_from_dicts(self, chk_store, id_to_entry_dict, 1931 parent_id_basename_dict, maximum_size): 1932 search_key_func = chk_map.search_key_registry.get( 1933 self._search_key_name) 1934 root_key = chk_map.CHKMap.from_dict(chk_store, id_to_entry_dict, 1935 maximum_size=maximum_size, key_width=1, 1936 search_key_func=search_key_func) 1937 self.id_to_entry = chk_map.CHKMap(chk_store, root_key, 1938 search_key_func) 1939 root_key = chk_map.CHKMap.from_dict(chk_store, 1940 parent_id_basename_dict, 1941 maximum_size=maximum_size, key_width=2, 1942 search_key_func=search_key_func) 1943 self.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store, 1944 root_key, search_key_func) 1945 1946 def _parent_id_basename_key(self, entry): 1947 """Create a key for a entry in a parent_id_basename_to_file_id index.""" 1948 if entry.parent_id is not None: 1949 parent_id = entry.parent_id 1950 else: 1951 parent_id = b'' 1952 return StaticTuple(parent_id, entry.name.encode('utf8')).intern() 1953 1954 def get_entry(self, file_id): 1955 """map a single file_id -> InventoryEntry.""" 1956 if file_id is None: 1957 raise errors.NoSuchId(self, file_id) 1958 result = self._fileid_to_entry_cache.get(file_id, None) 1959 if result is not None: 1960 return result 1961 try: 1962 return self._bytes_to_entry( 1963 next(self.id_to_entry.iteritems([StaticTuple(file_id,)]))[1]) 1964 except StopIteration: 1965 # really we're passing an inventory, not a tree... 1966 raise errors.NoSuchId(self, file_id) 1967 1968 def _getitems(self, file_ids): 1969 """Similar to get_entry, but lets you query for multiple. 1970 1971 The returned order is undefined. And currently if an item doesn't 1972 exist, it isn't included in the output. 1973 """ 1974 result = [] 1975 remaining = [] 1976 for file_id in file_ids: 1977 entry = self._fileid_to_entry_cache.get(file_id, None) 1978 if entry is None: 1979 remaining.append(file_id) 1980 else: 1981 result.append(entry) 1982 file_keys = [StaticTuple(f,).intern() for f in remaining] 1983 for file_key, value in self.id_to_entry.iteritems(file_keys): 1984 entry = self._bytes_to_entry(value) 1985 result.append(entry) 1986 self._fileid_to_entry_cache[entry.file_id] = entry 1987 return result 1988 1989 def has_id(self, file_id): 1990 # Perhaps have an explicit 'contains' method on CHKMap ? 1991 if self._fileid_to_entry_cache.get(file_id, None) is not None: 1992 return True 1993 return len(list( 1994 self.id_to_entry.iteritems([StaticTuple(file_id,)]))) == 1 1995 1996 def is_root(self, file_id): 1997 return file_id == self.root_id 1998 1999 def _iter_file_id_parents(self, file_id): 2000 """Yield the parents of file_id up to the root.""" 2001 while file_id is not None: 2002 try: 2003 ie = self.get_entry(file_id) 2004 except KeyError: 2005 raise errors.NoSuchId(tree=self, file_id=file_id) 2006 yield ie 2007 file_id = ie.parent_id 2008 2009 def iter_all_ids(self): 2010 """Iterate over all file-ids.""" 2011 for key, _ in self.id_to_entry.iteritems(): 2012 yield key[-1] 2013 2014 def iter_just_entries(self): 2015 """Iterate over all entries. 2016 2017 Unlike iter_entries(), just the entries are returned (not (path, ie)) 2018 and the order of entries is undefined. 2019 2020 XXX: We may not want to merge this into bzr.dev. 2021 """ 2022 for key, entry in self.id_to_entry.iteritems(): 2023 file_id = key[0] 2024 ie = self._fileid_to_entry_cache.get(file_id, None) 2025 if ie is None: 2026 ie = self._bytes_to_entry(entry) 2027 self._fileid_to_entry_cache[file_id] = ie 2028 yield ie 2029 2030 def _preload_cache(self): 2031 """Make sure all file-ids are in _fileid_to_entry_cache""" 2032 if self._fully_cached: 2033 return # No need to do it again 2034 # The optimal sort order is to use iteritems() directly 2035 cache = self._fileid_to_entry_cache 2036 for key, entry in self.id_to_entry.iteritems(): 2037 file_id = key[0] 2038 if file_id not in cache: 2039 ie = self._bytes_to_entry(entry) 2040 cache[file_id] = ie 2041 else: 2042 ie = cache[file_id] 2043 last_parent_id = last_parent_ie = None 2044 pid_items = self.parent_id_basename_to_file_id.iteritems() 2045 for key, child_file_id in pid_items: 2046 if key == (b'', b''): # This is the root 2047 if child_file_id != self.root_id: 2048 raise ValueError('Data inconsistency detected.' 2049 ' We expected data with key ("","") to match' 2050 ' the root id, but %s != %s' 2051 % (child_file_id, self.root_id)) 2052 continue 2053 parent_id, basename = key 2054 ie = cache[child_file_id] 2055 if parent_id == last_parent_id: 2056 parent_ie = last_parent_ie 2057 else: 2058 parent_ie = cache[parent_id] 2059 if parent_ie.kind != 'directory': 2060 raise ValueError('Data inconsistency detected.' 2061 ' An entry in the parent_id_basename_to_file_id map' 2062 ' has parent_id {%s} but the kind of that object' 2063 ' is %r not "directory"' % (parent_id, parent_ie.kind)) 2064 if parent_ie._children is None: 2065 parent_ie._children = {} 2066 basename = basename.decode('utf-8') 2067 if basename in parent_ie._children: 2068 existing_ie = parent_ie._children[basename] 2069 if existing_ie != ie: 2070 raise ValueError('Data inconsistency detected.' 2071 ' Two entries with basename %r were found' 2072 ' in the parent entry {%s}' 2073 % (basename, parent_id)) 2074 if basename != ie.name: 2075 raise ValueError('Data inconsistency detected.' 2076 ' In the parent_id_basename_to_file_id map, file_id' 2077 ' {%s} is listed as having basename %r, but in the' 2078 ' id_to_entry map it is %r' 2079 % (child_file_id, basename, ie.name)) 2080 parent_ie._children[basename] = ie 2081 self._fully_cached = True 2082 2083 def iter_changes(self, basis): 2084 """Generate a Tree.iter_changes change list between this and basis. 2085 2086 :param basis: Another CHKInventory. 2087 :return: An iterator over the changes between self and basis, as per 2088 tree.iter_changes(). 2089 """ 2090 # We want: (file_id, (path_in_source, path_in_target), 2091 # changed_content, versioned, parent, name, kind, 2092 # executable) 2093 for key, basis_value, self_value in \ 2094 self.id_to_entry.iter_changes(basis.id_to_entry): 2095 file_id = key[0] 2096 if basis_value is not None: 2097 basis_entry = basis._bytes_to_entry(basis_value) 2098 path_in_source = basis.id2path(file_id) 2099 basis_parent = basis_entry.parent_id 2100 basis_name = basis_entry.name 2101 basis_executable = basis_entry.executable 2102 else: 2103 path_in_source = None 2104 basis_parent = None 2105 basis_name = None 2106 basis_executable = None 2107 if self_value is not None: 2108 self_entry = self._bytes_to_entry(self_value) 2109 path_in_target = self.id2path(file_id) 2110 self_parent = self_entry.parent_id 2111 self_name = self_entry.name 2112 self_executable = self_entry.executable 2113 else: 2114 path_in_target = None 2115 self_parent = None 2116 self_name = None 2117 self_executable = None 2118 if basis_value is None: 2119 # add 2120 kind = (None, self_entry.kind) 2121 versioned = (False, True) 2122 elif self_value is None: 2123 # delete 2124 kind = (basis_entry.kind, None) 2125 versioned = (True, False) 2126 else: 2127 kind = (basis_entry.kind, self_entry.kind) 2128 versioned = (True, True) 2129 changed_content = False 2130 if kind[0] != kind[1]: 2131 changed_content = True 2132 elif kind[0] == 'file': 2133 if (self_entry.text_size != basis_entry.text_size 2134 or self_entry.text_sha1 != basis_entry.text_sha1): 2135 changed_content = True 2136 elif kind[0] == 'symlink': 2137 if self_entry.symlink_target != basis_entry.symlink_target: 2138 changed_content = True 2139 elif kind[0] == 'tree-reference': 2140 if (self_entry.reference_revision 2141 != basis_entry.reference_revision): 2142 changed_content = True 2143 parent = (basis_parent, self_parent) 2144 name = (basis_name, self_name) 2145 executable = (basis_executable, self_executable) 2146 if (not changed_content and 2147 parent[0] == parent[1] and 2148 name[0] == name[1] and 2149 executable[0] == executable[1]): 2150 # Could happen when only the revision changed for a directory 2151 # for instance. 2152 continue 2153 yield ( 2154 file_id, (path_in_source, path_in_target), changed_content, 2155 versioned, parent, name, kind, executable) 2156 2157 def __len__(self): 2158 """Return the number of entries in the inventory.""" 2159 return len(self.id_to_entry) 2160 2161 def _make_delta(self, old): 2162 """Make an inventory delta from two inventories.""" 2163 if not isinstance(old, CHKInventory): 2164 return CommonInventory._make_delta(self, old) 2165 delta = [] 2166 for key, old_value, self_value in \ 2167 self.id_to_entry.iter_changes(old.id_to_entry): 2168 file_id = key[0] 2169 if old_value is not None: 2170 old_path = old.id2path(file_id) 2171 else: 2172 old_path = None 2173 if self_value is not None: 2174 entry = self._bytes_to_entry(self_value) 2175 self._fileid_to_entry_cache[file_id] = entry 2176 new_path = self.id2path(file_id) 2177 else: 2178 entry = None 2179 new_path = None 2180 delta.append((old_path, new_path, file_id, entry)) 2181 return delta 2182 2183 def path2id(self, relpath): 2184 """See CommonInventory.path2id().""" 2185 # TODO: perhaps support negative hits? 2186 if isinstance(relpath, str): 2187 names = osutils.splitpath(relpath) 2188 else: 2189 names = relpath 2190 if relpath == []: 2191 relpath = [""] 2192 relpath = osutils.pathjoin(*relpath) 2193 result = self._path_to_fileid_cache.get(relpath, None) 2194 if result is not None: 2195 return result 2196 current_id = self.root_id 2197 if current_id is None: 2198 return None 2199 parent_id_index = self.parent_id_basename_to_file_id 2200 cur_path = None 2201 for basename in names: 2202 if cur_path is None: 2203 cur_path = basename 2204 else: 2205 cur_path = cur_path + '/' + basename 2206 basename_utf8 = basename.encode('utf8') 2207 file_id = self._path_to_fileid_cache.get(cur_path, None) 2208 if file_id is None: 2209 key_filter = [StaticTuple(current_id, basename_utf8)] 2210 items = parent_id_index.iteritems(key_filter) 2211 for (parent_id, name_utf8), file_id in items: 2212 if parent_id != current_id or name_utf8 != basename_utf8: 2213 raise errors.BzrError("corrupt inventory lookup! " 2214 "%r %r %r %r" % (parent_id, current_id, name_utf8, 2215 basename_utf8)) 2216 if file_id is None: 2217 return None 2218 else: 2219 self._path_to_fileid_cache[cur_path] = file_id 2220 current_id = file_id 2221 return current_id 2222 2223 def to_lines(self): 2224 """Serialise the inventory to lines.""" 2225 lines = [b"chkinventory:\n"] 2226 if self._search_key_name != b'plain': 2227 # custom ordering grouping things that don't change together 2228 lines.append(b'search_key_name: %s\n' % ( 2229 self._search_key_name)) 2230 lines.append(b"root_id: %s\n" % self.root_id) 2231 lines.append(b'parent_id_basename_to_file_id: %s\n' % 2232 (self.parent_id_basename_to_file_id.key()[0],)) 2233 lines.append(b"revision_id: %s\n" % self.revision_id) 2234 lines.append(b"id_to_entry: %s\n" % (self.id_to_entry.key()[0],)) 2235 else: 2236 lines.append(b"revision_id: %s\n" % self.revision_id) 2237 lines.append(b"root_id: %s\n" % self.root_id) 2238 if self.parent_id_basename_to_file_id is not None: 2239 lines.append(b'parent_id_basename_to_file_id: %s\n' % 2240 (self.parent_id_basename_to_file_id.key()[0],)) 2241 lines.append(b"id_to_entry: %s\n" % (self.id_to_entry.key()[0],)) 2242 return lines 2243 2244 @property 2245 def root(self): 2246 """Get the root entry.""" 2247 return self.get_entry(self.root_id) 2248 2249 2250class CHKInventoryDirectory(InventoryDirectory): 2251 """A directory in an inventory.""" 2252 2253 __slots__ = ['_children', '_chk_inventory'] 2254 2255 def __init__(self, file_id, name, parent_id, chk_inventory): 2256 # Don't call InventoryDirectory.__init__ - it isn't right for this 2257 # class. 2258 InventoryEntry.__init__(self, file_id, name, parent_id) 2259 self._children = None 2260 self._chk_inventory = chk_inventory 2261 2262 @property 2263 def children(self): 2264 """Access the list of children of this directory. 2265 2266 With a parent_id_basename_to_file_id index, loads all the children, 2267 without loads the entire index. Without is bad. A more sophisticated 2268 proxy object might be nice, to allow partial loading of children as 2269 well when specific names are accessed. (So path traversal can be 2270 written in the obvious way but not examine siblings.). 2271 """ 2272 if self._children is not None: 2273 return self._children 2274 # No longer supported 2275 if self._chk_inventory.parent_id_basename_to_file_id is None: 2276 raise AssertionError("Inventories without" 2277 " parent_id_basename_to_file_id are no longer supported") 2278 result = {} 2279 # XXX: Todo - use proxy objects for the children rather than loading 2280 # all when the attribute is referenced. 2281 parent_id_index = self._chk_inventory.parent_id_basename_to_file_id 2282 child_keys = set() 2283 for (parent_id, name_utf8), file_id in parent_id_index.iteritems( 2284 key_filter=[StaticTuple(self.file_id,)]): 2285 child_keys.add(StaticTuple(file_id,)) 2286 cached = set() 2287 for file_id_key in child_keys: 2288 entry = self._chk_inventory._fileid_to_entry_cache.get( 2289 file_id_key[0], None) 2290 if entry is not None: 2291 result[entry.name] = entry 2292 cached.add(file_id_key) 2293 child_keys.difference_update(cached) 2294 # populate; todo: do by name 2295 id_to_entry = self._chk_inventory.id_to_entry 2296 for file_id_key, bytes in id_to_entry.iteritems(child_keys): 2297 entry = self._chk_inventory._bytes_to_entry(bytes) 2298 result[entry.name] = entry 2299 self._chk_inventory._fileid_to_entry_cache[file_id_key[0]] = entry 2300 self._children = result 2301 return result 2302 2303 2304entry_factory = { 2305 'directory': InventoryDirectory, 2306 'file': InventoryFile, 2307 'symlink': InventoryLink, 2308 'tree-reference': TreeReference 2309} 2310 2311 2312def make_entry(kind, name, parent_id, file_id=None): 2313 """Create an inventory entry. 2314 2315 :param kind: the type of inventory entry to create. 2316 :param name: the basename of the entry. 2317 :param parent_id: the parent_id of the entry. 2318 :param file_id: the file_id to use. if None, one will be created. 2319 """ 2320 if file_id is None: 2321 file_id = generate_ids.gen_file_id(name) 2322 name = ensure_normalized_name(name) 2323 try: 2324 factory = entry_factory[kind] 2325 except KeyError: 2326 raise errors.BadFileKindError(name, kind) 2327 return factory(file_id, name, parent_id) 2328 2329 2330def ensure_normalized_name(name): 2331 """Normalize name. 2332 2333 :raises InvalidNormalization: When name is not normalized, and cannot be 2334 accessed on this platform by the normalized path. 2335 :return: The NFC normalised version of name. 2336 """ 2337 # ------- This has been copied to breezy.dirstate.DirState.add, please 2338 # keep them synchronised. 2339 # we dont import normalized_filename directly because we want to be 2340 # able to change the implementation at runtime for tests. 2341 norm_name, can_access = osutils.normalized_filename(name) 2342 if norm_name != name: 2343 if can_access: 2344 return norm_name 2345 else: 2346 # TODO: jam 20060701 This would probably be more useful 2347 # if the error was raised with the full path 2348 raise errors.InvalidNormalization(name) 2349 return name 2350 2351 2352_NAME_RE = lazy_regex.lazy_compile(r'^[^/\\]+$') 2353 2354 2355def is_valid_name(name): 2356 return bool(_NAME_RE.match(name)) 2357 2358 2359def _check_delta_unique_ids(delta): 2360 """Decorate a delta and check that the file ids in it are unique. 2361 2362 :return: A generator over delta. 2363 """ 2364 ids = set() 2365 for item in delta: 2366 length = len(ids) + 1 2367 ids.add(item[2]) 2368 if len(ids) != length: 2369 raise errors.InconsistentDelta(item[0] or item[1], item[2], 2370 "repeated file_id") 2371 yield item 2372 2373 2374def _check_delta_unique_new_paths(delta): 2375 """Decorate a delta and check that the new paths in it are unique. 2376 2377 :return: A generator over delta. 2378 """ 2379 paths = set() 2380 for item in delta: 2381 length = len(paths) + 1 2382 path = item[1] 2383 if path is not None: 2384 paths.add(path) 2385 if len(paths) != length: 2386 raise errors.InconsistentDelta(path, item[2], "repeated path") 2387 yield item 2388 2389 2390def _check_delta_unique_old_paths(delta): 2391 """Decorate a delta and check that the old paths in it are unique. 2392 2393 :return: A generator over delta. 2394 """ 2395 paths = set() 2396 for item in delta: 2397 length = len(paths) + 1 2398 path = item[0] 2399 if path is not None: 2400 paths.add(path) 2401 if len(paths) != length: 2402 raise errors.InconsistentDelta(path, item[2], "repeated path") 2403 yield item 2404 2405 2406def _check_delta_ids_are_valid(delta): 2407 """Decorate a delta and check that the ids in it are valid. 2408 2409 :return: A generator over delta. 2410 """ 2411 for item in delta: 2412 entry = item[3] 2413 if item[2] is None: 2414 raise errors.InconsistentDelta(item[0] or item[1], item[2], 2415 "entry with file_id None %r" % entry) 2416 if not isinstance(item[2], bytes): 2417 raise errors.InconsistentDelta(item[0] or item[1], item[2], 2418 "entry with non bytes file_id %r" % entry) 2419 yield item 2420 2421 2422def _check_delta_ids_match_entry(delta): 2423 """Decorate a delta and check that the ids in it match the entry.file_id. 2424 2425 :return: A generator over delta. 2426 """ 2427 for item in delta: 2428 entry = item[3] 2429 if entry is not None: 2430 if entry.file_id != item[2]: 2431 raise errors.InconsistentDelta(item[0] or item[1], item[2], 2432 "mismatched id with %r" % entry) 2433 yield item 2434 2435 2436def _check_delta_new_path_entry_both_or_None(delta): 2437 """Decorate a delta and check that the new_path and entry are paired. 2438 2439 :return: A generator over delta. 2440 """ 2441 for item in delta: 2442 new_path = item[1] 2443 entry = item[3] 2444 if new_path is None and entry is not None: 2445 raise errors.InconsistentDelta(item[0], item[1], 2446 "Entry with no new_path") 2447 if new_path is not None and entry is None: 2448 raise errors.InconsistentDelta(new_path, item[1], 2449 "new_path with no entry") 2450 yield item 2451 2452 2453def mutable_inventory_from_tree(tree): 2454 """Create a new inventory that has the same contents as a specified tree. 2455 2456 :param tree: Revision tree to create inventory from 2457 """ 2458 entries = tree.iter_entries_by_dir() 2459 inv = Inventory(None, tree.get_revision_id()) 2460 for path, inv_entry in entries: 2461 inv.add(inv_entry.copy()) 2462 return inv 2463