1# Copyright (C) 2006-2011 Canonical Ltd 2# 3# This program is free software; you can redistribute it and/or modify 4# it under the terms of the GNU General Public License as published by 5# the Free Software Foundation; either version 2 of the License, or 6# (at your option) any later version. 7# 8# This program is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11# GNU General Public License for more details. 12# 13# You should have received a copy of the GNU General Public License 14# along with this program; if not, write to the Free Software 15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16 17"""DirState objects record the state of a directory and its bzr metadata. 18 19Pseudo EBNF grammar for the state file. Fields are separated by NULLs, and 20lines by NL. The field delimiters are ommitted in the grammar, line delimiters 21are not - this is done for clarity of reading. All string data is in utf8. 22 23:: 24 25 MINIKIND = "f" | "d" | "l" | "a" | "r" | "t"; 26 NL = "\\n"; 27 NULL = "\\0"; 28 WHOLE_NUMBER = {digit}, digit; 29 BOOLEAN = "y" | "n"; 30 REVISION_ID = a non-empty utf8 string; 31 32 dirstate format = header line, full checksum, row count, parent details, 33 ghost_details, entries; 34 header line = "#bazaar dirstate flat format 3", NL; 35 full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL; 36 row count = "num_entries: ", WHOLE_NUMBER, NL; 37 parent_details = WHOLE NUMBER, {REVISION_ID}* NL; 38 ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL; 39 entries = {entry}; 40 entry = entry_key, current_entry_details, {parent_entry_details}; 41 entry_key = dirname, basename, fileid; 42 current_entry_details = common_entry_details, working_entry_details; 43 parent_entry_details = common_entry_details, history_entry_details; 44 common_entry_details = MINIKIND, fingerprint, size, executable 45 working_entry_details = packed_stat 46 history_entry_details = REVISION_ID; 47 executable = BOOLEAN; 48 size = WHOLE_NUMBER; 49 fingerprint = a nonempty utf8 sequence with meaning defined by minikind. 50 51Given this definition, the following is useful to know:: 52 53 entry (aka row) - all the data for a given key. 54 entry[0]: The key (dirname, basename, fileid) 55 entry[0][0]: dirname 56 entry[0][1]: basename 57 entry[0][2]: fileid 58 entry[1]: The tree(s) data for this path and id combination. 59 entry[1][0]: The current tree 60 entry[1][1]: The second tree 61 62For an entry for a tree, we have (using tree 0 - current tree) to demonstrate:: 63 64 entry[1][0][0]: minikind 65 entry[1][0][1]: fingerprint 66 entry[1][0][2]: size 67 entry[1][0][3]: executable 68 entry[1][0][4]: packed_stat 69 70OR (for non tree-0):: 71 72 entry[1][1][4]: revision_id 73 74There may be multiple rows at the root, one per id present in the root, so the 75in memory root row is now:: 76 77 self._dirblocks[0] -> ('', [entry ...]), 78 79and the entries in there are:: 80 81 entries[0][0]: b'' 82 entries[0][1]: b'' 83 entries[0][2]: file_id 84 entries[1][0]: The tree data for the current tree for this fileid at / 85 etc. 86 87Kinds:: 88 89 b'r' is a relocated entry: This path is not present in this tree with this 90 id, but the id can be found at another location. The fingerprint is 91 used to point to the target location. 92 b'a' is an absent entry: In that tree the id is not present at this path. 93 b'd' is a directory entry: This path in this tree is a directory with the 94 current file id. There is no fingerprint for directories. 95 b'f' is a file entry: As for directory, but it's a file. The fingerprint is 96 the sha1 value of the file's canonical form, i.e. after any read 97 filters have been applied to the convenience form stored in the working 98 tree. 99 b'l' is a symlink entry: As for directory, but a symlink. The fingerprint is 100 the link target. 101 b't' is a reference to a nested subtree; the fingerprint is the referenced 102 revision. 103 104Ordering: 105 106The entries on disk and in memory are ordered according to the following keys:: 107 108 directory, as a list of components 109 filename 110 file-id 111 112--- Format 1 had the following different definition: --- 113 114:: 115 116 rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL, 117 WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target, 118 {PARENT ROW} 119 PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL, 120 basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL, 121 SHA1 122 123PARENT ROW's are emitted for every parent that is not in the ghosts details 124line. That is, if the parents are foo, bar, baz, and the ghosts are bar, then 125each row will have a PARENT ROW for foo and baz, but not for bar. 126 127 128In any tree, a kind of 'moved' indicates that the fingerprint field 129(which we treat as opaque data specific to the 'kind' anyway) has the 130details for the id of this row in that tree. 131 132I'm strongly tempted to add a id->path index as well, but I think that 133where we need id->path mapping; we also usually read the whole file, so 134I'm going to skip that for the moment, as we have the ability to locate 135via bisect any path in any tree, and if we lookup things by path, we can 136accumulate an id->path mapping as we go, which will tend to match what we 137looked for. 138 139I plan to implement this asap, so please speak up now to alter/tweak the 140design - and once we stabilise on this, I'll update the wiki page for 141it. 142 143The rationale for all this is that we want fast operations for the 144common case (diff/status/commit/merge on all files) and extremely fast 145operations for the less common but still occurs a lot status/diff/commit 146on specific files). Operations on specific files involve a scan for all 147the children of a path, *in every involved tree*, which the current 148format did not accommodate. 149---- 150 151Design priorities: 152 1. Fast end to end use for bzr's top 5 uses cases. (commmit/diff/status/merge/???) 153 2. fall back current object model as needed. 154 3. scale usably to the largest trees known today - say 50K entries. (mozilla 155 is an example of this) 156 157 158Locking: 159 160 Eventually reuse dirstate objects across locks IFF the dirstate file has not 161 been modified, but will require that we flush/ignore cached stat-hit data 162 because we won't want to restat all files on disk just because a lock was 163 acquired, yet we cannot trust the data after the previous lock was released. 164 165Memory representation:: 166 167 vector of all directories, and vector of the childen ? 168 i.e. 169 root_entries = (direntry for root, [parent_direntries_for_root]), 170 dirblocks = [ 171 ('', ['data for achild', 'data for bchild', 'data for cchild']) 172 ('dir', ['achild', 'cchild', 'echild']) 173 ] 174 - single bisect to find N subtrees from a path spec 175 - in-order for serialisation - this is 'dirblock' grouping. 176 - insertion of a file '/a' affects only the '/' child-vector, that is, to 177 insert 10K elements from scratch does not generates O(N^2) memoves of a 178 single vector, rather each individual, which tends to be limited to a 179 manageable number. Will scale badly on trees with 10K entries in a 180 single directory. compare with Inventory.InventoryDirectory which has 181 a dictionary for the children. No bisect capability, can only probe for 182 exact matches, or grab all elements and sort. 183 - What's the risk of error here? Once we have the base format being processed 184 we should have a net win regardless of optimality. So we are going to 185 go with what seems reasonable. 186 187open questions: 188 189Maybe we should do a test profile of the core structure - 10K simulated 190searches/lookups/etc? 191 192Objects for each row? 193The lifetime of Dirstate objects is current per lock, but see above for 194possible extensions. The lifetime of a row from a dirstate is expected to be 195very short in the optimistic case: which we are optimising for. For instance, 196subtree status will determine from analysis of the disk data what rows need to 197be examined at all, and will be able to determine from a single row whether 198that file has altered or not, so we are aiming to process tens of thousands of 199entries each second within the dirstate context, before exposing anything to 200the larger codebase. This suggests we want the time for a single file 201comparison to be < 0.1 milliseconds. That would give us 10000 paths per second 202processed, and to scale to 100 thousand we'll another order of magnitude to do 203that. Now, as the lifetime for all unchanged entries is the time to parse, stat 204the file on disk, and then immediately discard, the overhead of object creation 205becomes a significant cost. 206 207Figures: Creating a tuple from 3 elements was profiled at 0.0625 208microseconds, whereas creating a object which is subclassed from tuple was 2090.500 microseconds, and creating an object with 3 elements and slots was 3 210microseconds long. 0.1 milliseconds is 100 microseconds, and ideally we'll get 211down to 10 microseconds for the total processing - having 33% of that be object 212creation is a huge overhead. There is a potential cost in using tuples within 213each row which is that the conditional code to do comparisons may be slower 214than method invocation, but method invocation is known to be slow due to stack 215frame creation, so avoiding methods in these tight inner loops in unfortunately 216desirable. We can consider a pyrex version of this with objects in future if 217desired. 218 219""" 220 221import bisect 222import contextlib 223import errno 224import operator 225import os 226from stat import S_IEXEC 227import stat 228import sys 229import time 230import zlib 231 232from . import ( 233 inventory, 234 ) 235from .. import ( 236 cache_utf8, 237 config, 238 debug, 239 errors, 240 lock, 241 osutils, 242 static_tuple, 243 trace, 244 urlutils, 245 ) 246from .inventorytree import InventoryTreeChange 247 248 249# This is the Windows equivalent of ENOTDIR 250# It is defined in pywin32.winerror, but we don't want a strong dependency for 251# just an error code. 252ERROR_PATH_NOT_FOUND = 3 253ERROR_DIRECTORY = 267 254 255 256class DirstateCorrupt(errors.BzrError): 257 258 _fmt = "The dirstate file (%(state)s) appears to be corrupt: %(msg)s" 259 260 def __init__(self, state, msg): 261 errors.BzrError.__init__(self) 262 self.state = state 263 self.msg = msg 264 265 266class SHA1Provider(object): 267 """An interface for getting sha1s of a file.""" 268 269 def sha1(self, abspath): 270 """Return the sha1 of a file given its absolute path. 271 272 :param abspath: May be a filesystem encoded absolute path 273 or a unicode path. 274 """ 275 raise NotImplementedError(self.sha1) 276 277 def stat_and_sha1(self, abspath): 278 """Return the stat and sha1 of a file given its absolute path. 279 280 :param abspath: May be a filesystem encoded absolute path 281 or a unicode path. 282 283 Note: the stat should be the stat of the physical file 284 while the sha may be the sha of its canonical content. 285 """ 286 raise NotImplementedError(self.stat_and_sha1) 287 288 289class DefaultSHA1Provider(SHA1Provider): 290 """A SHA1Provider that reads directly from the filesystem.""" 291 292 def sha1(self, abspath): 293 """Return the sha1 of a file given its absolute path.""" 294 return osutils.sha_file_by_name(abspath) 295 296 def stat_and_sha1(self, abspath): 297 """Return the stat and sha1 of a file given its absolute path.""" 298 with open(abspath, 'rb') as file_obj: 299 statvalue = os.fstat(file_obj.fileno()) 300 sha1 = osutils.sha_file(file_obj) 301 return statvalue, sha1 302 303 304class DirState(object): 305 """Record directory and metadata state for fast access. 306 307 A dirstate is a specialised data structure for managing local working 308 tree state information. Its not yet well defined whether it is platform 309 specific, and if it is how we detect/parameterize that. 310 311 Dirstates use the usual lock_write, lock_read and unlock mechanisms. 312 Unlike most bzr disk formats, DirStates must be locked for reading, using 313 lock_read. (This is an os file lock internally.) This is necessary 314 because the file can be rewritten in place. 315 316 DirStates must be explicitly written with save() to commit changes; just 317 unlocking them does not write the changes to disk. 318 """ 319 320 _kind_to_minikind = { 321 'absent': b'a', 322 'file': b'f', 323 'directory': b'd', 324 'relocated': b'r', 325 'symlink': b'l', 326 'tree-reference': b't', 327 } 328 _minikind_to_kind = { 329 b'a': 'absent', 330 b'f': 'file', 331 b'd': 'directory', 332 b'l': 'symlink', 333 b'r': 'relocated', 334 b't': 'tree-reference', 335 } 336 _stat_to_minikind = { 337 stat.S_IFDIR: b'd', 338 stat.S_IFREG: b'f', 339 stat.S_IFLNK: b'l', 340 } 341 _to_yesno = {True: b'y', False: b'n'} # TODO profile the performance gain 342 # of using int conversion rather than a dict here. AND BLAME ANDREW IF 343 # it is faster. 344 345 # TODO: jam 20070221 Figure out what to do if we have a record that exceeds 346 # the BISECT_PAGE_SIZE. For now, we just have to make it large enough 347 # that we are sure a single record will always fit. 348 BISECT_PAGE_SIZE = 4096 349 350 NOT_IN_MEMORY = 0 351 IN_MEMORY_UNMODIFIED = 1 352 IN_MEMORY_MODIFIED = 2 353 IN_MEMORY_HASH_MODIFIED = 3 # Only hash-cache updates 354 355 # A pack_stat (the x's) that is just noise and will never match the output 356 # of base64 encode. 357 NULLSTAT = b'x' * 32 358 NULL_PARENT_DETAILS = static_tuple.StaticTuple(b'a', b'', 0, False, b'') 359 360 HEADER_FORMAT_2 = b'#bazaar dirstate flat format 2\n' 361 HEADER_FORMAT_3 = b'#bazaar dirstate flat format 3\n' 362 363 def __init__(self, path, sha1_provider, worth_saving_limit=0, 364 use_filesystem_for_exec=True): 365 """Create a DirState object. 366 367 :param path: The path at which the dirstate file on disk should live. 368 :param sha1_provider: an object meeting the SHA1Provider interface. 369 :param worth_saving_limit: when the exact number of hash changed 370 entries is known, only bother saving the dirstate if more than 371 this count of entries have changed. 372 -1 means never save hash changes, 0 means always save hash changes. 373 :param use_filesystem_for_exec: Whether to trust the filesystem 374 for executable bit information 375 """ 376 # _header_state and _dirblock_state represent the current state 377 # of the dirstate metadata and the per-row data respectiely. 378 # NOT_IN_MEMORY indicates that no data is in memory 379 # IN_MEMORY_UNMODIFIED indicates that what we have in memory 380 # is the same as is on disk 381 # IN_MEMORY_MODIFIED indicates that we have a modified version 382 # of what is on disk. 383 # In future we will add more granularity, for instance _dirblock_state 384 # will probably support partially-in-memory as a separate variable, 385 # allowing for partially-in-memory unmodified and partially-in-memory 386 # modified states. 387 self._header_state = DirState.NOT_IN_MEMORY 388 self._dirblock_state = DirState.NOT_IN_MEMORY 389 # If true, an error has been detected while updating the dirstate, and 390 # for safety we're not going to commit to disk. 391 self._changes_aborted = False 392 self._dirblocks = [] 393 self._ghosts = [] 394 self._parents = [] 395 self._state_file = None 396 self._filename = path 397 self._lock_token = None 398 self._lock_state = None 399 self._id_index = None 400 # a map from packed_stat to sha's. 401 self._packed_stat_index = None 402 self._end_of_header = None 403 self._cutoff_time = None 404 self._split_path_cache = {} 405 self._bisect_page_size = DirState.BISECT_PAGE_SIZE 406 self._sha1_provider = sha1_provider 407 if 'hashcache' in debug.debug_flags: 408 self._sha1_file = self._sha1_file_and_mutter 409 else: 410 self._sha1_file = self._sha1_provider.sha1 411 # These two attributes provide a simple cache for lookups into the 412 # dirstate in-memory vectors. By probing respectively for the last 413 # block, and for the next entry, we save nearly 2 bisections per path 414 # during commit. 415 self._last_block_index = None 416 self._last_entry_index = None 417 # The set of known hash changes 418 self._known_hash_changes = set() 419 # How many hash changed entries can we have without saving 420 self._worth_saving_limit = worth_saving_limit 421 self._config_stack = config.LocationStack(urlutils.local_path_to_url( 422 path)) 423 self._use_filesystem_for_exec = use_filesystem_for_exec 424 425 def __repr__(self): 426 return "%s(%r)" % \ 427 (self.__class__.__name__, self._filename) 428 429 def _mark_modified(self, hash_changed_entries=None, header_modified=False): 430 """Mark this dirstate as modified. 431 432 :param hash_changed_entries: if non-None, mark just these entries as 433 having their hash modified. 434 :param header_modified: mark the header modified as well, not just the 435 dirblocks. 436 """ 437 #trace.mutter_callsite(3, "modified hash entries: %s", hash_changed_entries) 438 if hash_changed_entries: 439 self._known_hash_changes.update( 440 [e[0] for e in hash_changed_entries]) 441 if self._dirblock_state in (DirState.NOT_IN_MEMORY, 442 DirState.IN_MEMORY_UNMODIFIED): 443 # If the dirstate is already marked a IN_MEMORY_MODIFIED, then 444 # that takes precedence. 445 self._dirblock_state = DirState.IN_MEMORY_HASH_MODIFIED 446 else: 447 # TODO: Since we now have a IN_MEMORY_HASH_MODIFIED state, we 448 # should fail noisily if someone tries to set 449 # IN_MEMORY_MODIFIED but we don't have a write-lock! 450 # We don't know exactly what changed so disable smart saving 451 self._dirblock_state = DirState.IN_MEMORY_MODIFIED 452 if header_modified: 453 self._header_state = DirState.IN_MEMORY_MODIFIED 454 455 def _mark_unmodified(self): 456 """Mark this dirstate as unmodified.""" 457 self._header_state = DirState.IN_MEMORY_UNMODIFIED 458 self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED 459 self._known_hash_changes = set() 460 461 def add(self, path, file_id, kind, stat, fingerprint): 462 """Add a path to be tracked. 463 464 :param path: The path within the dirstate - b'' is the root, 'foo' is the 465 path foo within the root, 'foo/bar' is the path bar within foo 466 within the root. 467 :param file_id: The file id of the path being added. 468 :param kind: The kind of the path, as a string like 'file', 469 'directory', etc. 470 :param stat: The output of os.lstat for the path. 471 :param fingerprint: The sha value of the file's canonical form (i.e. 472 after any read filters have been applied), 473 or the target of a symlink, 474 or the referenced revision id for tree-references, 475 or b'' for directories. 476 """ 477 # adding a file: 478 # find the block its in. 479 # find the location in the block. 480 # check its not there 481 # add it. 482 # ------- copied from inventory.ensure_normalized_name - keep synced. 483 # --- normalized_filename wants a unicode basename only, so get one. 484 dirname, basename = osutils.split(path) 485 # we dont import normalized_filename directly because we want to be 486 # able to change the implementation at runtime for tests. 487 norm_name, can_access = osutils.normalized_filename(basename) 488 if norm_name != basename: 489 if can_access: 490 basename = norm_name 491 else: 492 raise errors.InvalidNormalization(path) 493 # you should never have files called . or ..; just add the directory 494 # in the parent, or according to the special treatment for the root 495 if basename == '.' or basename == '..': 496 raise inventory.InvalidEntryName(path) 497 # now that we've normalised, we need the correct utf8 path and 498 # dirname and basename elements. This single encode and split should be 499 # faster than three separate encodes. 500 utf8path = (dirname + '/' + basename).strip('/').encode('utf8') 501 dirname, basename = osutils.split(utf8path) 502 # uses __class__ for speed; the check is needed for safety 503 if file_id.__class__ is not bytes: 504 raise AssertionError( 505 "must be a utf8 file_id not %s" % (type(file_id), )) 506 # Make sure the file_id does not exist in this tree 507 rename_from = None 508 file_id_entry = self._get_entry( 509 0, fileid_utf8=file_id, include_deleted=True) 510 if file_id_entry != (None, None): 511 if file_id_entry[1][0][0] == b'a': 512 if file_id_entry[0] != (dirname, basename, file_id): 513 # set the old name's current operation to rename 514 self.update_minimal(file_id_entry[0], 515 b'r', 516 path_utf8=b'', 517 packed_stat=b'', 518 fingerprint=utf8path 519 ) 520 rename_from = file_id_entry[0][0:2] 521 else: 522 path = osutils.pathjoin( 523 file_id_entry[0][0], file_id_entry[0][1]) 524 kind = DirState._minikind_to_kind[file_id_entry[1][0][0]] 525 info = '%s:%s' % (kind, path) 526 raise inventory.DuplicateFileId(file_id, info) 527 first_key = (dirname, basename, b'') 528 block_index, present = self._find_block_index_from_key(first_key) 529 if present: 530 # check the path is not in the tree 531 block = self._dirblocks[block_index][1] 532 entry_index, _ = self._find_entry_index(first_key, block) 533 while (entry_index < len(block) and 534 block[entry_index][0][0:2] == first_key[0:2]): 535 if block[entry_index][1][0][0] not in (b'a', b'r'): 536 # this path is in the dirstate in the current tree. 537 raise Exception("adding already added path!") 538 entry_index += 1 539 else: 540 # The block where we want to put the file is not present. But it 541 # might be because the directory was empty, or not loaded yet. Look 542 # for a parent entry, if not found, raise NotVersionedError 543 parent_dir, parent_base = osutils.split(dirname) 544 parent_block_idx, parent_entry_idx, _, parent_present = \ 545 self._get_block_entry_index(parent_dir, parent_base, 0) 546 if not parent_present: 547 raise errors.NotVersionedError(path, str(self)) 548 self._ensure_block(parent_block_idx, parent_entry_idx, dirname) 549 block = self._dirblocks[block_index][1] 550 entry_key = (dirname, basename, file_id) 551 if stat is None: 552 size = 0 553 packed_stat = DirState.NULLSTAT 554 else: 555 size = stat.st_size 556 packed_stat = pack_stat(stat) 557 parent_info = self._empty_parent_info() 558 minikind = DirState._kind_to_minikind[kind] 559 if rename_from is not None: 560 if rename_from[0]: 561 old_path_utf8 = b'%s/%s' % rename_from 562 else: 563 old_path_utf8 = rename_from[1] 564 parent_info[0] = (b'r', old_path_utf8, 0, False, b'') 565 if kind == 'file': 566 entry_data = entry_key, [ 567 (minikind, fingerprint, size, False, packed_stat), 568 ] + parent_info 569 elif kind == 'directory': 570 entry_data = entry_key, [ 571 (minikind, b'', 0, False, packed_stat), 572 ] + parent_info 573 elif kind == 'symlink': 574 entry_data = entry_key, [ 575 (minikind, fingerprint, size, False, packed_stat), 576 ] + parent_info 577 elif kind == 'tree-reference': 578 entry_data = entry_key, [ 579 (minikind, fingerprint, 0, False, packed_stat), 580 ] + parent_info 581 else: 582 raise errors.BzrError('unknown kind %r' % kind) 583 entry_index, present = self._find_entry_index(entry_key, block) 584 if not present: 585 block.insert(entry_index, entry_data) 586 else: 587 if block[entry_index][1][0][0] != b'a': 588 raise AssertionError(" %r(%r) already added" % 589 (basename, file_id)) 590 block[entry_index][1][0] = entry_data[1][0] 591 592 if kind == 'directory': 593 # insert a new dirblock 594 self._ensure_block(block_index, entry_index, utf8path) 595 self._mark_modified() 596 if self._id_index: 597 self._add_to_id_index(self._id_index, entry_key) 598 599 def _bisect(self, paths): 600 """Bisect through the disk structure for specific rows. 601 602 :param paths: A list of paths to find 603 :return: A dict mapping path => entries for found entries. Missing 604 entries will not be in the map. 605 The list is not sorted, and entries will be populated 606 based on when they were read. 607 """ 608 self._requires_lock() 609 # We need the file pointer to be right after the initial header block 610 self._read_header_if_needed() 611 # If _dirblock_state was in memory, we should just return info from 612 # there, this function is only meant to handle when we want to read 613 # part of the disk. 614 if self._dirblock_state != DirState.NOT_IN_MEMORY: 615 raise AssertionError("bad dirblock state %r" % 616 self._dirblock_state) 617 618 # The disk representation is generally info + '\0\n\0' at the end. But 619 # for bisecting, it is easier to treat this as '\0' + info + '\0\n' 620 # Because it means we can sync on the '\n' 621 state_file = self._state_file 622 file_size = os.fstat(state_file.fileno()).st_size 623 # We end up with 2 extra fields, we should have a trailing '\n' to 624 # ensure that we read the whole record, and we should have a precursur 625 # b'' which ensures that we start after the previous '\n' 626 entry_field_count = self._fields_per_entry() + 1 627 628 low = self._end_of_header 629 high = file_size - 1 # Ignore the final '\0' 630 # Map from (dir, name) => entry 631 found = {} 632 633 # Avoid infinite seeking 634 max_count = 30 * len(paths) 635 count = 0 636 # pending is a list of places to look. 637 # each entry is a tuple of low, high, dir_names 638 # low -> the first byte offset to read (inclusive) 639 # high -> the last byte offset (inclusive) 640 # dir_names -> The list of (dir, name) pairs that should be found in 641 # the [low, high] range 642 pending = [(low, high, paths)] 643 644 page_size = self._bisect_page_size 645 646 fields_to_entry = self._get_fields_to_entry() 647 648 while pending: 649 low, high, cur_files = pending.pop() 650 651 if not cur_files or low >= high: 652 # Nothing to find 653 continue 654 655 count += 1 656 if count > max_count: 657 raise errors.BzrError('Too many seeks, most likely a bug.') 658 659 mid = max(low, (low + high - page_size) // 2) 660 661 state_file.seek(mid) 662 # limit the read size, so we don't end up reading data that we have 663 # already read. 664 read_size = min(page_size, (high - mid) + 1) 665 block = state_file.read(read_size) 666 667 start = mid 668 entries = block.split(b'\n') 669 670 if len(entries) < 2: 671 # We didn't find a '\n', so we cannot have found any records. 672 # So put this range back and try again. But we know we have to 673 # increase the page size, because a single read did not contain 674 # a record break (so records must be larger than page_size) 675 page_size *= 2 676 pending.append((low, high, cur_files)) 677 continue 678 679 # Check the first and last entries, in case they are partial, or if 680 # we don't care about the rest of this page 681 first_entry_num = 0 682 first_fields = entries[0].split(b'\0') 683 if len(first_fields) < entry_field_count: 684 # We didn't get the complete first entry 685 # so move start, and grab the next, which 686 # should be a full entry 687 start += len(entries[0]) + 1 688 first_fields = entries[1].split(b'\0') 689 first_entry_num = 1 690 691 if len(first_fields) <= 2: 692 # We didn't even get a filename here... what do we do? 693 # Try a large page size and repeat this query 694 page_size *= 2 695 pending.append((low, high, cur_files)) 696 continue 697 else: 698 # Find what entries we are looking for, which occur before and 699 # after this first record. 700 after = start 701 if first_fields[1]: 702 first_path = first_fields[1] + b'/' + first_fields[2] 703 else: 704 first_path = first_fields[2] 705 first_loc = _bisect_path_left(cur_files, first_path) 706 707 # These exist before the current location 708 pre = cur_files[:first_loc] 709 # These occur after the current location, which may be in the 710 # data we read, or might be after the last entry 711 post = cur_files[first_loc:] 712 713 if post and len(first_fields) >= entry_field_count: 714 # We have files after the first entry 715 716 # Parse the last entry 717 last_entry_num = len(entries) - 1 718 last_fields = entries[last_entry_num].split(b'\0') 719 if len(last_fields) < entry_field_count: 720 # The very last hunk was not complete, 721 # read the previous hunk 722 after = mid + len(block) - len(entries[-1]) 723 last_entry_num -= 1 724 last_fields = entries[last_entry_num].split(b'\0') 725 else: 726 after = mid + len(block) 727 728 if last_fields[1]: 729 last_path = last_fields[1] + b'/' + last_fields[2] 730 else: 731 last_path = last_fields[2] 732 last_loc = _bisect_path_right(post, last_path) 733 734 middle_files = post[:last_loc] 735 post = post[last_loc:] 736 737 if middle_files: 738 # We have files that should occur in this block 739 # (>= first, <= last) 740 # Either we will find them here, or we can mark them as 741 # missing. 742 743 if middle_files[0] == first_path: 744 # We might need to go before this location 745 pre.append(first_path) 746 if middle_files[-1] == last_path: 747 post.insert(0, last_path) 748 749 # Find out what paths we have 750 paths = {first_path: [first_fields]} 751 # last_path might == first_path so we need to be 752 # careful if we should append rather than overwrite 753 if last_entry_num != first_entry_num: 754 paths.setdefault(last_path, []).append(last_fields) 755 for num in range(first_entry_num + 1, last_entry_num): 756 # TODO: jam 20070223 We are already splitting here, so 757 # shouldn't we just split the whole thing rather 758 # than doing the split again in add_one_record? 759 fields = entries[num].split(b'\0') 760 if fields[1]: 761 path = fields[1] + b'/' + fields[2] 762 else: 763 path = fields[2] 764 paths.setdefault(path, []).append(fields) 765 766 for path in middle_files: 767 for fields in paths.get(path, []): 768 # offset by 1 because of the opening '\0' 769 # consider changing fields_to_entry to avoid the 770 # extra list slice 771 entry = fields_to_entry(fields[1:]) 772 found.setdefault(path, []).append(entry) 773 774 # Now we have split up everything into pre, middle, and post, and 775 # we have handled everything that fell in 'middle'. 776 # We add 'post' first, so that we prefer to seek towards the 777 # beginning, so that we will tend to go as early as we need, and 778 # then only seek forward after that. 779 if post: 780 pending.append((after, high, post)) 781 if pre: 782 pending.append((low, start - 1, pre)) 783 784 # Consider that we may want to return the directory entries in sorted 785 # order. For now, we just return them in whatever order we found them, 786 # and leave it up to the caller if they care if it is ordered or not. 787 return found 788 789 def _bisect_dirblocks(self, dir_list): 790 """Bisect through the disk structure to find entries in given dirs. 791 792 _bisect_dirblocks is meant to find the contents of directories, which 793 differs from _bisect, which only finds individual entries. 794 795 :param dir_list: A sorted list of directory names ['', 'dir', 'foo']. 796 :return: A map from dir => entries_for_dir 797 """ 798 # TODO: jam 20070223 A lot of the bisecting logic could be shared 799 # between this and _bisect. It would require parameterizing the 800 # inner loop with a function, though. We should evaluate the 801 # performance difference. 802 self._requires_lock() 803 # We need the file pointer to be right after the initial header block 804 self._read_header_if_needed() 805 # If _dirblock_state was in memory, we should just return info from 806 # there, this function is only meant to handle when we want to read 807 # part of the disk. 808 if self._dirblock_state != DirState.NOT_IN_MEMORY: 809 raise AssertionError("bad dirblock state %r" % 810 self._dirblock_state) 811 # The disk representation is generally info + '\0\n\0' at the end. But 812 # for bisecting, it is easier to treat this as '\0' + info + '\0\n' 813 # Because it means we can sync on the '\n' 814 state_file = self._state_file 815 file_size = os.fstat(state_file.fileno()).st_size 816 # We end up with 2 extra fields, we should have a trailing '\n' to 817 # ensure that we read the whole record, and we should have a precursur 818 # b'' which ensures that we start after the previous '\n' 819 entry_field_count = self._fields_per_entry() + 1 820 821 low = self._end_of_header 822 high = file_size - 1 # Ignore the final '\0' 823 # Map from dir => entry 824 found = {} 825 826 # Avoid infinite seeking 827 max_count = 30 * len(dir_list) 828 count = 0 829 # pending is a list of places to look. 830 # each entry is a tuple of low, high, dir_names 831 # low -> the first byte offset to read (inclusive) 832 # high -> the last byte offset (inclusive) 833 # dirs -> The list of directories that should be found in 834 # the [low, high] range 835 pending = [(low, high, dir_list)] 836 837 page_size = self._bisect_page_size 838 839 fields_to_entry = self._get_fields_to_entry() 840 841 while pending: 842 low, high, cur_dirs = pending.pop() 843 844 if not cur_dirs or low >= high: 845 # Nothing to find 846 continue 847 848 count += 1 849 if count > max_count: 850 raise errors.BzrError('Too many seeks, most likely a bug.') 851 852 mid = max(low, (low + high - page_size) // 2) 853 854 state_file.seek(mid) 855 # limit the read size, so we don't end up reading data that we have 856 # already read. 857 read_size = min(page_size, (high - mid) + 1) 858 block = state_file.read(read_size) 859 860 start = mid 861 entries = block.split(b'\n') 862 863 if len(entries) < 2: 864 # We didn't find a '\n', so we cannot have found any records. 865 # So put this range back and try again. But we know we have to 866 # increase the page size, because a single read did not contain 867 # a record break (so records must be larger than page_size) 868 page_size *= 2 869 pending.append((low, high, cur_dirs)) 870 continue 871 872 # Check the first and last entries, in case they are partial, or if 873 # we don't care about the rest of this page 874 first_entry_num = 0 875 first_fields = entries[0].split(b'\0') 876 if len(first_fields) < entry_field_count: 877 # We didn't get the complete first entry 878 # so move start, and grab the next, which 879 # should be a full entry 880 start += len(entries[0]) + 1 881 first_fields = entries[1].split(b'\0') 882 first_entry_num = 1 883 884 if len(first_fields) <= 1: 885 # We didn't even get a dirname here... what do we do? 886 # Try a large page size and repeat this query 887 page_size *= 2 888 pending.append((low, high, cur_dirs)) 889 continue 890 else: 891 # Find what entries we are looking for, which occur before and 892 # after this first record. 893 after = start 894 first_dir = first_fields[1] 895 first_loc = bisect.bisect_left(cur_dirs, first_dir) 896 897 # These exist before the current location 898 pre = cur_dirs[:first_loc] 899 # These occur after the current location, which may be in the 900 # data we read, or might be after the last entry 901 post = cur_dirs[first_loc:] 902 903 if post and len(first_fields) >= entry_field_count: 904 # We have records to look at after the first entry 905 906 # Parse the last entry 907 last_entry_num = len(entries) - 1 908 last_fields = entries[last_entry_num].split(b'\0') 909 if len(last_fields) < entry_field_count: 910 # The very last hunk was not complete, 911 # read the previous hunk 912 after = mid + len(block) - len(entries[-1]) 913 last_entry_num -= 1 914 last_fields = entries[last_entry_num].split(b'\0') 915 else: 916 after = mid + len(block) 917 918 last_dir = last_fields[1] 919 last_loc = bisect.bisect_right(post, last_dir) 920 921 middle_files = post[:last_loc] 922 post = post[last_loc:] 923 924 if middle_files: 925 # We have files that should occur in this block 926 # (>= first, <= last) 927 # Either we will find them here, or we can mark them as 928 # missing. 929 930 if middle_files[0] == first_dir: 931 # We might need to go before this location 932 pre.append(first_dir) 933 if middle_files[-1] == last_dir: 934 post.insert(0, last_dir) 935 936 # Find out what paths we have 937 paths = {first_dir: [first_fields]} 938 # last_dir might == first_dir so we need to be 939 # careful if we should append rather than overwrite 940 if last_entry_num != first_entry_num: 941 paths.setdefault(last_dir, []).append(last_fields) 942 for num in range(first_entry_num + 1, last_entry_num): 943 # TODO: jam 20070223 We are already splitting here, so 944 # shouldn't we just split the whole thing rather 945 # than doing the split again in add_one_record? 946 fields = entries[num].split(b'\0') 947 paths.setdefault(fields[1], []).append(fields) 948 949 for cur_dir in middle_files: 950 for fields in paths.get(cur_dir, []): 951 # offset by 1 because of the opening '\0' 952 # consider changing fields_to_entry to avoid the 953 # extra list slice 954 entry = fields_to_entry(fields[1:]) 955 found.setdefault(cur_dir, []).append(entry) 956 957 # Now we have split up everything into pre, middle, and post, and 958 # we have handled everything that fell in 'middle'. 959 # We add 'post' first, so that we prefer to seek towards the 960 # beginning, so that we will tend to go as early as we need, and 961 # then only seek forward after that. 962 if post: 963 pending.append((after, high, post)) 964 if pre: 965 pending.append((low, start - 1, pre)) 966 967 return found 968 969 def _bisect_recursive(self, paths): 970 """Bisect for entries for all paths and their children. 971 972 This will use bisect to find all records for the supplied paths. It 973 will then continue to bisect for any records which are marked as 974 directories. (and renames?) 975 976 :param paths: A sorted list of (dir, name) pairs 977 eg: [('', b'a'), ('', b'f'), ('a/b', b'c')] 978 :return: A dictionary mapping (dir, name, file_id) => [tree_info] 979 """ 980 # Map from (dir, name, file_id) => [tree_info] 981 found = {} 982 983 found_dir_names = set() 984 985 # Directories that have been read 986 processed_dirs = set() 987 # Get the ball rolling with the first bisect for all entries. 988 newly_found = self._bisect(paths) 989 990 while newly_found: 991 # Directories that need to be read 992 pending_dirs = set() 993 paths_to_search = set() 994 for entry_list in newly_found.values(): 995 for dir_name_id, trees_info in entry_list: 996 found[dir_name_id] = trees_info 997 found_dir_names.add(dir_name_id[:2]) 998 is_dir = False 999 for tree_info in trees_info: 1000 minikind = tree_info[0] 1001 if minikind == b'd': 1002 if is_dir: 1003 # We already processed this one as a directory, 1004 # we don't need to do the extra work again. 1005 continue 1006 subdir, name, file_id = dir_name_id 1007 path = osutils.pathjoin(subdir, name) 1008 is_dir = True 1009 if path not in processed_dirs: 1010 pending_dirs.add(path) 1011 elif minikind == b'r': 1012 # Rename, we need to directly search the target 1013 # which is contained in the fingerprint column 1014 dir_name = osutils.split(tree_info[1]) 1015 if dir_name[0] in pending_dirs: 1016 # This entry will be found in the dir search 1017 continue 1018 if dir_name not in found_dir_names: 1019 paths_to_search.add(tree_info[1]) 1020 # Now we have a list of paths to look for directly, and 1021 # directory blocks that need to be read. 1022 # newly_found is mixing the keys between (dir, name) and path 1023 # entries, but that is okay, because we only really care about the 1024 # targets. 1025 newly_found = self._bisect(sorted(paths_to_search)) 1026 newly_found.update(self._bisect_dirblocks(sorted(pending_dirs))) 1027 processed_dirs.update(pending_dirs) 1028 return found 1029 1030 def _discard_merge_parents(self): 1031 """Discard any parents trees beyond the first. 1032 1033 Note that if this fails the dirstate is corrupted. 1034 1035 After this function returns the dirstate contains 2 trees, neither of 1036 which are ghosted. 1037 """ 1038 self._read_header_if_needed() 1039 parents = self.get_parent_ids() 1040 if len(parents) < 1: 1041 return 1042 # only require all dirblocks if we are doing a full-pass removal. 1043 self._read_dirblocks_if_needed() 1044 dead_patterns = {(b'a', b'r'), (b'a', b'a'), 1045 (b'r', b'r'), (b'r', b'a')} 1046 1047 def iter_entries_removable(): 1048 for block in self._dirblocks: 1049 deleted_positions = [] 1050 for pos, entry in enumerate(block[1]): 1051 yield entry 1052 if (entry[1][0][0], entry[1][1][0]) in dead_patterns: 1053 deleted_positions.append(pos) 1054 if deleted_positions: 1055 if len(deleted_positions) == len(block[1]): 1056 del block[1][:] 1057 else: 1058 for pos in reversed(deleted_positions): 1059 del block[1][pos] 1060 # if the first parent is a ghost: 1061 if parents[0] in self.get_ghosts(): 1062 empty_parent = [DirState.NULL_PARENT_DETAILS] 1063 for entry in iter_entries_removable(): 1064 entry[1][1:] = empty_parent 1065 else: 1066 for entry in iter_entries_removable(): 1067 del entry[1][2:] 1068 1069 self._ghosts = [] 1070 self._parents = [parents[0]] 1071 self._mark_modified(header_modified=True) 1072 1073 def _empty_parent_info(self): 1074 return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) - 1075 len(self._ghosts)) 1076 1077 def _ensure_block(self, parent_block_index, parent_row_index, dirname): 1078 """Ensure a block for dirname exists. 1079 1080 This function exists to let callers which know that there is a 1081 directory dirname ensure that the block for it exists. This block can 1082 fail to exist because of demand loading, or because a directory had no 1083 children. In either case it is not an error. It is however an error to 1084 call this if there is no parent entry for the directory, and thus the 1085 function requires the coordinates of such an entry to be provided. 1086 1087 The root row is special cased and can be indicated with a parent block 1088 and row index of -1 1089 1090 :param parent_block_index: The index of the block in which dirname's row 1091 exists. 1092 :param parent_row_index: The index in the parent block where the row 1093 exists. 1094 :param dirname: The utf8 dirname to ensure there is a block for. 1095 :return: The index for the block. 1096 """ 1097 if dirname == b'' and parent_row_index == 0 and parent_block_index == 0: 1098 # This is the signature of the root row, and the 1099 # contents-of-root row is always index 1 1100 return 1 1101 # the basename of the directory must be the end of its full name. 1102 if not (parent_block_index == -1 and 1103 parent_block_index == -1 and dirname == b''): 1104 if not dirname.endswith( 1105 self._dirblocks[parent_block_index][1][parent_row_index][0][1]): 1106 raise AssertionError("bad dirname %r" % dirname) 1107 block_index, present = self._find_block_index_from_key( 1108 (dirname, b'', b'')) 1109 if not present: 1110 # In future, when doing partial parsing, this should load and 1111 # populate the entire block. 1112 self._dirblocks.insert(block_index, (dirname, [])) 1113 return block_index 1114 1115 def _entries_to_current_state(self, new_entries): 1116 """Load new_entries into self.dirblocks. 1117 1118 Process new_entries into the current state object, making them the active 1119 state. The entries are grouped together by directory to form dirblocks. 1120 1121 :param new_entries: A sorted list of entries. This function does not sort 1122 to prevent unneeded overhead when callers have a sorted list already. 1123 :return: Nothing. 1124 """ 1125 if new_entries[0][0][0:2] != (b'', b''): 1126 raise AssertionError( 1127 "Missing root row %r" % (new_entries[0][0],)) 1128 # The two blocks here are deliberate: the root block and the 1129 # contents-of-root block. 1130 self._dirblocks = [(b'', []), (b'', [])] 1131 current_block = self._dirblocks[0][1] 1132 current_dirname = b'' 1133 root_key = (b'', b'') 1134 append_entry = current_block.append 1135 for entry in new_entries: 1136 if entry[0][0] != current_dirname: 1137 # new block - different dirname 1138 current_block = [] 1139 current_dirname = entry[0][0] 1140 self._dirblocks.append((current_dirname, current_block)) 1141 append_entry = current_block.append 1142 # append the entry to the current block 1143 append_entry(entry) 1144 self._split_root_dirblock_into_contents() 1145 1146 def _split_root_dirblock_into_contents(self): 1147 """Split the root dirblocks into root and contents-of-root. 1148 1149 After parsing by path, we end up with root entries and contents-of-root 1150 entries in the same block. This loop splits them out again. 1151 """ 1152 # The above loop leaves the "root block" entries mixed with the 1153 # "contents-of-root block". But we don't want an if check on 1154 # all entries, so instead we just fix it up here. 1155 if self._dirblocks[1] != (b'', []): 1156 raise ValueError("bad dirblock start %r" % (self._dirblocks[1],)) 1157 root_block = [] 1158 contents_of_root_block = [] 1159 for entry in self._dirblocks[0][1]: 1160 if not entry[0][1]: # This is a root entry 1161 root_block.append(entry) 1162 else: 1163 contents_of_root_block.append(entry) 1164 self._dirblocks[0] = (b'', root_block) 1165 self._dirblocks[1] = (b'', contents_of_root_block) 1166 1167 def _entries_for_path(self, path): 1168 """Return a list with all the entries that match path for all ids.""" 1169 dirname, basename = os.path.split(path) 1170 key = (dirname, basename, b'') 1171 block_index, present = self._find_block_index_from_key(key) 1172 if not present: 1173 # the block which should contain path is absent. 1174 return [] 1175 result = [] 1176 block = self._dirblocks[block_index][1] 1177 entry_index, _ = self._find_entry_index(key, block) 1178 # we may need to look at multiple entries at this path: walk while the specific_files match. 1179 while (entry_index < len(block) and 1180 block[entry_index][0][0:2] == key[0:2]): 1181 result.append(block[entry_index]) 1182 entry_index += 1 1183 return result 1184 1185 def _entry_to_line(self, entry): 1186 """Serialize entry to a NULL delimited line ready for _get_output_lines. 1187 1188 :param entry: An entry_tuple as defined in the module docstring. 1189 """ 1190 entire_entry = list(entry[0]) 1191 for tree_number, tree_data in enumerate(entry[1]): 1192 # (minikind, fingerprint, size, executable, tree_specific_string) 1193 entire_entry.extend(tree_data) 1194 # 3 for the key, 5 for the fields per tree. 1195 tree_offset = 3 + tree_number * 5 1196 # minikind 1197 entire_entry[tree_offset + 0] = tree_data[0] 1198 # size 1199 entire_entry[tree_offset + 2] = b'%d' % tree_data[2] 1200 # executable 1201 entire_entry[tree_offset + 3] = DirState._to_yesno[tree_data[3]] 1202 return b'\0'.join(entire_entry) 1203 1204 def _fields_per_entry(self): 1205 """How many null separated fields should be in each entry row. 1206 1207 Each line now has an extra '\\n' field which is not used 1208 so we just skip over it 1209 1210 entry size:: 1211 3 fields for the key 1212 + number of fields per tree_data (5) * tree count 1213 + newline 1214 """ 1215 tree_count = 1 + self._num_present_parents() 1216 return 3 + 5 * tree_count + 1 1217 1218 def _find_block(self, key, add_if_missing=False): 1219 """Return the block that key should be present in. 1220 1221 :param key: A dirstate entry key. 1222 :return: The block tuple. 1223 """ 1224 block_index, present = self._find_block_index_from_key(key) 1225 if not present: 1226 if not add_if_missing: 1227 # check to see if key is versioned itself - we might want to 1228 # add it anyway, because dirs with no entries dont get a 1229 # dirblock at parse time. 1230 # This is an uncommon branch to take: most dirs have children, 1231 # and most code works with versioned paths. 1232 parent_base, parent_name = osutils.split(key[0]) 1233 if not self._get_block_entry_index(parent_base, parent_name, 0)[3]: 1234 # some parent path has not been added - its an error to add 1235 # this child 1236 raise errors.NotVersionedError(key[0:2], str(self)) 1237 self._dirblocks.insert(block_index, (key[0], [])) 1238 return self._dirblocks[block_index] 1239 1240 def _find_block_index_from_key(self, key): 1241 """Find the dirblock index for a key. 1242 1243 :return: The block index, True if the block for the key is present. 1244 """ 1245 if key[0:2] == (b'', b''): 1246 return 0, True 1247 try: 1248 if (self._last_block_index is not None and 1249 self._dirblocks[self._last_block_index][0] == key[0]): 1250 return self._last_block_index, True 1251 except IndexError: 1252 pass 1253 block_index = bisect_dirblock(self._dirblocks, key[0], 1, 1254 cache=self._split_path_cache) 1255 # _right returns one-past-where-key is so we have to subtract 1256 # one to use it. we use _right here because there are two 1257 # b'' blocks - the root, and the contents of root 1258 # we always have a minimum of 2 in self._dirblocks: root and 1259 # root-contents, and for b'', we get 2 back, so this is 1260 # simple and correct: 1261 present = (block_index < len(self._dirblocks) and 1262 self._dirblocks[block_index][0] == key[0]) 1263 self._last_block_index = block_index 1264 # Reset the entry index cache to the beginning of the block. 1265 self._last_entry_index = -1 1266 return block_index, present 1267 1268 def _find_entry_index(self, key, block): 1269 """Find the entry index for a key in a block. 1270 1271 :return: The entry index, True if the entry for the key is present. 1272 """ 1273 len_block = len(block) 1274 try: 1275 if self._last_entry_index is not None: 1276 # mini-bisect here. 1277 entry_index = self._last_entry_index + 1 1278 # A hit is when the key is after the last slot, and before or 1279 # equal to the next slot. 1280 if ((entry_index > 0 and block[entry_index - 1][0] < key) and 1281 key <= block[entry_index][0]): 1282 self._last_entry_index = entry_index 1283 present = (block[entry_index][0] == key) 1284 return entry_index, present 1285 except IndexError: 1286 pass 1287 entry_index = bisect.bisect_left(block, (key, [])) 1288 present = (entry_index < len_block and 1289 block[entry_index][0] == key) 1290 self._last_entry_index = entry_index 1291 return entry_index, present 1292 1293 @staticmethod 1294 def from_tree(tree, dir_state_filename, sha1_provider=None): 1295 """Create a dirstate from a bzr Tree. 1296 1297 :param tree: The tree which should provide parent information and 1298 inventory ids. 1299 :param sha1_provider: an object meeting the SHA1Provider interface. 1300 If None, a DefaultSHA1Provider is used. 1301 :return: a DirState object which is currently locked for writing. 1302 (it was locked by DirState.initialize) 1303 """ 1304 result = DirState.initialize(dir_state_filename, 1305 sha1_provider=sha1_provider) 1306 try: 1307 with contextlib.ExitStack() as exit_stack: 1308 exit_stack.enter_context(tree.lock_read()) 1309 parent_ids = tree.get_parent_ids() 1310 num_parents = len(parent_ids) 1311 parent_trees = [] 1312 for parent_id in parent_ids: 1313 parent_tree = tree.branch.repository.revision_tree( 1314 parent_id) 1315 parent_trees.append((parent_id, parent_tree)) 1316 exit_stack.enter_context(parent_tree.lock_read()) 1317 result.set_parent_trees(parent_trees, []) 1318 result.set_state_from_inventory(tree.root_inventory) 1319 except: 1320 # The caller won't have a chance to unlock this, so make sure we 1321 # cleanup ourselves 1322 result.unlock() 1323 raise 1324 return result 1325 1326 def _check_delta_is_valid(self, delta): 1327 delta = list(inventory._check_delta_unique_ids( 1328 inventory._check_delta_unique_old_paths( 1329 inventory._check_delta_unique_new_paths( 1330 inventory._check_delta_ids_match_entry( 1331 inventory._check_delta_ids_are_valid( 1332 inventory._check_delta_new_path_entry_both_or_None(delta))))))) 1333 1334 def delta_key(d): 1335 (old_path, new_path, file_id, new_entry) = d 1336 if old_path is None: 1337 old_path = '' 1338 if new_path is None: 1339 new_path = '' 1340 return (old_path, new_path, file_id, new_entry) 1341 delta.sort(key=delta_key, reverse=True) 1342 return delta 1343 1344 def update_by_delta(self, delta): 1345 """Apply an inventory delta to the dirstate for tree 0 1346 1347 This is the workhorse for apply_inventory_delta in dirstate based 1348 trees. 1349 1350 :param delta: An inventory delta. See Inventory.apply_delta for 1351 details. 1352 """ 1353 self._read_dirblocks_if_needed() 1354 encode = cache_utf8.encode 1355 insertions = {} 1356 removals = {} 1357 # Accumulate parent references (path_utf8, id), to check for parentless 1358 # items or items placed under files/links/tree-references. We get 1359 # references from every item in the delta that is not a deletion and 1360 # is not itself the root. 1361 parents = set() 1362 # Added ids must not be in the dirstate already. This set holds those 1363 # ids. 1364 new_ids = set() 1365 # This loop transforms the delta to single atomic operations that can 1366 # be executed and validated. 1367 delta = self._check_delta_is_valid(delta) 1368 for old_path, new_path, file_id, inv_entry in delta: 1369 if not isinstance(file_id, bytes): 1370 raise AssertionError( 1371 "must be a utf8 file_id not %s" % (type(file_id), )) 1372 if (file_id in insertions) or (file_id in removals): 1373 self._raise_invalid(old_path or new_path, file_id, 1374 "repeated file_id") 1375 if old_path is not None: 1376 old_path = old_path.encode('utf-8') 1377 removals[file_id] = old_path 1378 else: 1379 new_ids.add(file_id) 1380 if new_path is not None: 1381 if inv_entry is None: 1382 self._raise_invalid(new_path, file_id, 1383 "new_path with no entry") 1384 new_path = new_path.encode('utf-8') 1385 dirname_utf8, basename = osutils.split(new_path) 1386 if basename: 1387 parents.add((dirname_utf8, inv_entry.parent_id)) 1388 key = (dirname_utf8, basename, file_id) 1389 minikind = DirState._kind_to_minikind[inv_entry.kind] 1390 if minikind == b't': 1391 fingerprint = inv_entry.reference_revision or b'' 1392 else: 1393 fingerprint = b'' 1394 insertions[file_id] = (key, minikind, inv_entry.executable, 1395 fingerprint, new_path) 1396 # Transform moves into delete+add pairs 1397 if None not in (old_path, new_path): 1398 for child in self._iter_child_entries(0, old_path): 1399 if child[0][2] in insertions or child[0][2] in removals: 1400 continue 1401 child_dirname = child[0][0] 1402 child_basename = child[0][1] 1403 minikind = child[1][0][0] 1404 fingerprint = child[1][0][4] 1405 executable = child[1][0][3] 1406 old_child_path = osutils.pathjoin(child_dirname, 1407 child_basename) 1408 removals[child[0][2]] = old_child_path 1409 child_suffix = child_dirname[len(old_path):] 1410 new_child_dirname = (new_path + child_suffix) 1411 key = (new_child_dirname, child_basename, child[0][2]) 1412 new_child_path = osutils.pathjoin(new_child_dirname, 1413 child_basename) 1414 insertions[child[0][2]] = (key, minikind, executable, 1415 fingerprint, new_child_path) 1416 self._check_delta_ids_absent(new_ids, delta, 0) 1417 try: 1418 self._apply_removals(removals.items()) 1419 self._apply_insertions(insertions.values()) 1420 # Validate parents 1421 self._after_delta_check_parents(parents, 0) 1422 except errors.BzrError as e: 1423 self._changes_aborted = True 1424 if 'integrity error' not in str(e): 1425 raise 1426 # _get_entry raises BzrError when a request is inconsistent; we 1427 # want such errors to be shown as InconsistentDelta - and that 1428 # fits the behaviour we trigger. 1429 raise errors.InconsistentDeltaDelta(delta, 1430 "error from _get_entry. %s" % (e,)) 1431 1432 def _apply_removals(self, removals): 1433 for file_id, path in sorted(removals, reverse=True, 1434 key=operator.itemgetter(1)): 1435 dirname, basename = osutils.split(path) 1436 block_i, entry_i, d_present, f_present = \ 1437 self._get_block_entry_index(dirname, basename, 0) 1438 try: 1439 entry = self._dirblocks[block_i][1][entry_i] 1440 except IndexError: 1441 self._raise_invalid(path, file_id, 1442 "Wrong path for old path.") 1443 if not f_present or entry[1][0][0] in (b'a', b'r'): 1444 self._raise_invalid(path, file_id, 1445 "Wrong path for old path.") 1446 if file_id != entry[0][2]: 1447 self._raise_invalid(path, file_id, 1448 "Attempt to remove path has wrong id - found %r." 1449 % entry[0][2]) 1450 self._make_absent(entry) 1451 # See if we have a malformed delta: deleting a directory must not 1452 # leave crud behind. This increases the number of bisects needed 1453 # substantially, but deletion or renames of large numbers of paths 1454 # is rare enough it shouldn't be an issue (famous last words?) RBC 1455 # 20080730. 1456 block_i, entry_i, d_present, f_present = \ 1457 self._get_block_entry_index(path, b'', 0) 1458 if d_present: 1459 # The dir block is still present in the dirstate; this could 1460 # be due to it being in a parent tree, or a corrupt delta. 1461 for child_entry in self._dirblocks[block_i][1]: 1462 if child_entry[1][0][0] not in (b'r', b'a'): 1463 self._raise_invalid(path, entry[0][2], 1464 "The file id was deleted but its children were " 1465 "not deleted.") 1466 1467 def _apply_insertions(self, adds): 1468 try: 1469 for key, minikind, executable, fingerprint, path_utf8 in sorted(adds): 1470 self.update_minimal(key, minikind, executable, fingerprint, 1471 path_utf8=path_utf8) 1472 except errors.NotVersionedError: 1473 self._raise_invalid(path_utf8.decode('utf8'), key[2], 1474 "Missing parent") 1475 1476 def update_basis_by_delta(self, delta, new_revid): 1477 """Update the parents of this tree after a commit. 1478 1479 This gives the tree one parent, with revision id new_revid. The 1480 inventory delta is applied to the current basis tree to generate the 1481 inventory for the parent new_revid, and all other parent trees are 1482 discarded. 1483 1484 Note that an exception during the operation of this method will leave 1485 the dirstate in a corrupt state where it should not be saved. 1486 1487 :param new_revid: The new revision id for the trees parent. 1488 :param delta: An inventory delta (see apply_inventory_delta) describing 1489 the changes from the current left most parent revision to new_revid. 1490 """ 1491 self._read_dirblocks_if_needed() 1492 self._discard_merge_parents() 1493 if self._ghosts != []: 1494 raise NotImplementedError(self.update_basis_by_delta) 1495 if len(self._parents) == 0: 1496 # setup a blank tree, the most simple way. 1497 empty_parent = DirState.NULL_PARENT_DETAILS 1498 for entry in self._iter_entries(): 1499 entry[1].append(empty_parent) 1500 self._parents.append(new_revid) 1501 1502 self._parents[0] = new_revid 1503 1504 delta = self._check_delta_is_valid(delta) 1505 adds = [] 1506 changes = [] 1507 deletes = [] 1508 # The paths this function accepts are unicode and must be encoded as we 1509 # go. 1510 encode = cache_utf8.encode 1511 inv_to_entry = self._inv_entry_to_details 1512 # delta is now (deletes, changes), (adds) in reverse lexographical 1513 # order. 1514 # deletes in reverse lexographic order are safe to process in situ. 1515 # renames are not, as a rename from any path could go to a path 1516 # lexographically lower, so we transform renames into delete, add pairs, 1517 # expanding them recursively as needed. 1518 # At the same time, to reduce interface friction we convert the input 1519 # inventory entries to dirstate. 1520 root_only = ('', '') 1521 # Accumulate parent references (path_utf8, id), to check for parentless 1522 # items or items placed under files/links/tree-references. We get 1523 # references from every item in the delta that is not a deletion and 1524 # is not itself the root. 1525 parents = set() 1526 # Added ids must not be in the dirstate already. This set holds those 1527 # ids. 1528 new_ids = set() 1529 for old_path, new_path, file_id, inv_entry in delta: 1530 if file_id.__class__ is not bytes: 1531 raise AssertionError( 1532 "must be a utf8 file_id not %s" % (type(file_id), )) 1533 if inv_entry is not None and file_id != inv_entry.file_id: 1534 self._raise_invalid(new_path, file_id, 1535 "mismatched entry file_id %r" % inv_entry) 1536 if new_path is None: 1537 new_path_utf8 = None 1538 else: 1539 if inv_entry is None: 1540 self._raise_invalid(new_path, file_id, 1541 "new_path with no entry") 1542 new_path_utf8 = encode(new_path) 1543 # note the parent for validation 1544 dirname_utf8, basename_utf8 = osutils.split(new_path_utf8) 1545 if basename_utf8: 1546 parents.add((dirname_utf8, inv_entry.parent_id)) 1547 if old_path is None: 1548 old_path_utf8 = None 1549 else: 1550 old_path_utf8 = encode(old_path) 1551 if old_path is None: 1552 adds.append((None, new_path_utf8, file_id, 1553 inv_to_entry(inv_entry), True)) 1554 new_ids.add(file_id) 1555 elif new_path is None: 1556 deletes.append((old_path_utf8, None, file_id, None, True)) 1557 elif (old_path, new_path) == root_only: 1558 # change things in-place 1559 # Note: the case of a parent directory changing its file_id 1560 # tends to break optimizations here, because officially 1561 # the file has actually been moved, it just happens to 1562 # end up at the same path. If we can figure out how to 1563 # handle that case, we can avoid a lot of add+delete 1564 # pairs for objects that stay put. 1565 # elif old_path == new_path: 1566 changes.append((old_path_utf8, new_path_utf8, file_id, 1567 inv_to_entry(inv_entry))) 1568 else: 1569 # Renames: 1570 # Because renames must preserve their children we must have 1571 # processed all relocations and removes before hand. The sort 1572 # order ensures we've examined the child paths, but we also 1573 # have to execute the removals, or the split to an add/delete 1574 # pair will result in the deleted item being reinserted, or 1575 # renamed items being reinserted twice - and possibly at the 1576 # wrong place. Splitting into a delete/add pair also simplifies 1577 # the handling of entries with (b'f', ...), (b'r' ...) because 1578 # the target of the b'r' is old_path here, and we add that to 1579 # deletes, meaning that the add handler does not need to check 1580 # for b'r' items on every pass. 1581 self._update_basis_apply_deletes(deletes) 1582 deletes = [] 1583 # Split into an add/delete pair recursively. 1584 adds.append((old_path_utf8, new_path_utf8, file_id, 1585 inv_to_entry(inv_entry), False)) 1586 # Expunge deletes that we've seen so that deleted/renamed 1587 # children of a rename directory are handled correctly. 1588 new_deletes = reversed(list( 1589 self._iter_child_entries(1, old_path_utf8))) 1590 # Remove the current contents of the tree at orig_path, and 1591 # reinsert at the correct new path. 1592 for entry in new_deletes: 1593 child_dirname, child_basename, child_file_id = entry[0] 1594 if child_dirname: 1595 source_path = child_dirname + b'/' + child_basename 1596 else: 1597 source_path = child_basename 1598 if new_path_utf8: 1599 target_path = \ 1600 new_path_utf8 + source_path[len(old_path_utf8):] 1601 else: 1602 if old_path_utf8 == b'': 1603 raise AssertionError("cannot rename directory to" 1604 " itself") 1605 target_path = source_path[len(old_path_utf8) + 1:] 1606 adds.append( 1607 (None, target_path, entry[0][2], entry[1][1], False)) 1608 deletes.append( 1609 (source_path, target_path, entry[0][2], None, False)) 1610 deletes.append( 1611 (old_path_utf8, new_path_utf8, file_id, None, False)) 1612 1613 self._check_delta_ids_absent(new_ids, delta, 1) 1614 try: 1615 # Finish expunging deletes/first half of renames. 1616 self._update_basis_apply_deletes(deletes) 1617 # Reinstate second half of renames and new paths. 1618 self._update_basis_apply_adds(adds) 1619 # Apply in-situ changes. 1620 self._update_basis_apply_changes(changes) 1621 # Validate parents 1622 self._after_delta_check_parents(parents, 1) 1623 except errors.BzrError as e: 1624 self._changes_aborted = True 1625 if 'integrity error' not in str(e): 1626 raise 1627 # _get_entry raises BzrError when a request is inconsistent; we 1628 # want such errors to be shown as InconsistentDelta - and that 1629 # fits the behaviour we trigger. 1630 raise errors.InconsistentDeltaDelta(delta, 1631 "error from _get_entry. %s" % (e,)) 1632 1633 self._mark_modified(header_modified=True) 1634 self._id_index = None 1635 return 1636 1637 def _check_delta_ids_absent(self, new_ids, delta, tree_index): 1638 """Check that none of the file_ids in new_ids are present in a tree.""" 1639 if not new_ids: 1640 return 1641 id_index = self._get_id_index() 1642 for file_id in new_ids: 1643 for key in id_index.get(file_id, ()): 1644 block_i, entry_i, d_present, f_present = \ 1645 self._get_block_entry_index(key[0], key[1], tree_index) 1646 if not f_present: 1647 # In a different tree 1648 continue 1649 entry = self._dirblocks[block_i][1][entry_i] 1650 if entry[0][2] != file_id: 1651 # Different file_id, so not what we want. 1652 continue 1653 self._raise_invalid((b"%s/%s" % key[0:2]).decode('utf8'), file_id, 1654 "This file_id is new in the delta but already present in " 1655 "the target") 1656 1657 def _raise_invalid(self, path, file_id, reason): 1658 self._changes_aborted = True 1659 raise errors.InconsistentDelta(path, file_id, reason) 1660 1661 def _update_basis_apply_adds(self, adds): 1662 """Apply a sequence of adds to tree 1 during update_basis_by_delta. 1663 1664 They may be adds, or renames that have been split into add/delete 1665 pairs. 1666 1667 :param adds: A sequence of adds. Each add is a tuple: 1668 (None, new_path_utf8, file_id, (entry_details), real_add). real_add 1669 is False when the add is the second half of a remove-and-reinsert 1670 pair created to handle renames and deletes. 1671 """ 1672 # Adds are accumulated partly from renames, so can be in any input 1673 # order - sort it. 1674 # TODO: we may want to sort in dirblocks order. That way each entry 1675 # will end up in the same directory, allowing the _get_entry 1676 # fast-path for looking up 2 items in the same dir work. 1677 adds.sort(key=lambda x: x[1]) 1678 # adds is now in lexographic order, which places all parents before 1679 # their children, so we can process it linearly. 1680 st = static_tuple.StaticTuple 1681 for old_path, new_path, file_id, new_details, real_add in adds: 1682 dirname, basename = osutils.split(new_path) 1683 entry_key = st(dirname, basename, file_id) 1684 block_index, present = self._find_block_index_from_key(entry_key) 1685 if not present: 1686 # The block where we want to put the file is not present. 1687 # However, it might have just been an empty directory. Look for 1688 # the parent in the basis-so-far before throwing an error. 1689 parent_dir, parent_base = osutils.split(dirname) 1690 parent_block_idx, parent_entry_idx, _, parent_present = \ 1691 self._get_block_entry_index(parent_dir, parent_base, 1) 1692 if not parent_present: 1693 self._raise_invalid(new_path, file_id, 1694 "Unable to find block for this record." 1695 " Was the parent added?") 1696 self._ensure_block(parent_block_idx, parent_entry_idx, dirname) 1697 1698 block = self._dirblocks[block_index][1] 1699 entry_index, present = self._find_entry_index(entry_key, block) 1700 if real_add: 1701 if old_path is not None: 1702 self._raise_invalid(new_path, file_id, 1703 'considered a real add but still had old_path at %s' 1704 % (old_path,)) 1705 if present: 1706 entry = block[entry_index] 1707 basis_kind = entry[1][1][0] 1708 if basis_kind == b'a': 1709 entry[1][1] = new_details 1710 elif basis_kind == b'r': 1711 raise NotImplementedError() 1712 else: 1713 self._raise_invalid(new_path, file_id, 1714 "An entry was marked as a new add" 1715 " but the basis target already existed") 1716 else: 1717 # The exact key was not found in the block. However, we need to 1718 # check if there is a key next to us that would have matched. 1719 # We only need to check 2 locations, because there are only 2 1720 # trees present. 1721 for maybe_index in range(entry_index - 1, entry_index + 1): 1722 if maybe_index < 0 or maybe_index >= len(block): 1723 continue 1724 maybe_entry = block[maybe_index] 1725 if maybe_entry[0][:2] != (dirname, basename): 1726 # Just a random neighbor 1727 continue 1728 if maybe_entry[0][2] == file_id: 1729 raise AssertionError( 1730 '_find_entry_index didnt find a key match' 1731 ' but walking the data did, for %s' 1732 % (entry_key,)) 1733 basis_kind = maybe_entry[1][1][0] 1734 if basis_kind not in (b'a', b'r'): 1735 self._raise_invalid(new_path, file_id, 1736 "we have an add record for path, but the path" 1737 " is already present with another file_id %s" 1738 % (maybe_entry[0][2],)) 1739 1740 entry = (entry_key, [DirState.NULL_PARENT_DETAILS, 1741 new_details]) 1742 block.insert(entry_index, entry) 1743 1744 active_kind = entry[1][0][0] 1745 if active_kind == b'a': 1746 # The active record shows up as absent, this could be genuine, 1747 # or it could be present at some other location. We need to 1748 # verify. 1749 id_index = self._get_id_index() 1750 # The id_index may not be perfectly accurate for tree1, because 1751 # we haven't been keeping it updated. However, it should be 1752 # fine for tree0, and that gives us enough info for what we 1753 # need 1754 keys = id_index.get(file_id, ()) 1755 for key in keys: 1756 block_i, entry_i, d_present, f_present = \ 1757 self._get_block_entry_index(key[0], key[1], 0) 1758 if not f_present: 1759 continue 1760 active_entry = self._dirblocks[block_i][1][entry_i] 1761 if (active_entry[0][2] != file_id): 1762 # Some other file is at this path, we don't need to 1763 # link it. 1764 continue 1765 real_active_kind = active_entry[1][0][0] 1766 if real_active_kind in (b'a', b'r'): 1767 # We found a record, which was not *this* record, 1768 # which matches the file_id, but is not actually 1769 # present. Something seems *really* wrong. 1770 self._raise_invalid(new_path, file_id, 1771 "We found a tree0 entry that doesnt make sense") 1772 # Now, we've found a tree0 entry which matches the file_id 1773 # but is at a different location. So update them to be 1774 # rename records. 1775 active_dir, active_name = active_entry[0][:2] 1776 if active_dir: 1777 active_path = active_dir + b'/' + active_name 1778 else: 1779 active_path = active_name 1780 active_entry[1][1] = st(b'r', new_path, 0, False, b'') 1781 entry[1][0] = st(b'r', active_path, 0, False, b'') 1782 elif active_kind == b'r': 1783 raise NotImplementedError() 1784 1785 new_kind = new_details[0] 1786 if new_kind == b'd': 1787 self._ensure_block(block_index, entry_index, new_path) 1788 1789 def _update_basis_apply_changes(self, changes): 1790 """Apply a sequence of changes to tree 1 during update_basis_by_delta. 1791 1792 :param adds: A sequence of changes. Each change is a tuple: 1793 (path_utf8, path_utf8, file_id, (entry_details)) 1794 """ 1795 for old_path, new_path, file_id, new_details in changes: 1796 # the entry for this file_id must be in tree 0. 1797 entry = self._get_entry(1, file_id, new_path) 1798 if entry[0] is None or entry[1][1][0] in (b'a', b'r'): 1799 self._raise_invalid(new_path, file_id, 1800 'changed entry considered not present') 1801 entry[1][1] = new_details 1802 1803 def _update_basis_apply_deletes(self, deletes): 1804 """Apply a sequence of deletes to tree 1 during update_basis_by_delta. 1805 1806 They may be deletes, or renames that have been split into add/delete 1807 pairs. 1808 1809 :param deletes: A sequence of deletes. Each delete is a tuple: 1810 (old_path_utf8, new_path_utf8, file_id, None, real_delete). 1811 real_delete is True when the desired outcome is an actual deletion 1812 rather than the rename handling logic temporarily deleting a path 1813 during the replacement of a parent. 1814 """ 1815 null = DirState.NULL_PARENT_DETAILS 1816 for old_path, new_path, file_id, _, real_delete in deletes: 1817 if real_delete != (new_path is None): 1818 self._raise_invalid(old_path, file_id, "bad delete delta") 1819 # the entry for this file_id must be in tree 1. 1820 dirname, basename = osutils.split(old_path) 1821 block_index, entry_index, dir_present, file_present = \ 1822 self._get_block_entry_index(dirname, basename, 1) 1823 if not file_present: 1824 self._raise_invalid(old_path, file_id, 1825 'basis tree does not contain removed entry') 1826 entry = self._dirblocks[block_index][1][entry_index] 1827 # The state of the entry in the 'active' WT 1828 active_kind = entry[1][0][0] 1829 if entry[0][2] != file_id: 1830 self._raise_invalid(old_path, file_id, 1831 'mismatched file_id in tree 1') 1832 dir_block = () 1833 old_kind = entry[1][1][0] 1834 if active_kind in b'ar': 1835 # The active tree doesn't have this file_id. 1836 # The basis tree is changing this record. If this is a 1837 # rename, then we don't want the record here at all 1838 # anymore. If it is just an in-place change, we want the 1839 # record here, but we'll add it if we need to. So we just 1840 # delete it 1841 if active_kind == b'r': 1842 active_path = entry[1][0][1] 1843 active_entry = self._get_entry(0, file_id, active_path) 1844 if active_entry[1][1][0] != b'r': 1845 self._raise_invalid(old_path, file_id, 1846 "Dirstate did not have matching rename entries") 1847 elif active_entry[1][0][0] in b'ar': 1848 self._raise_invalid(old_path, file_id, 1849 "Dirstate had a rename pointing at an inactive" 1850 " tree0") 1851 active_entry[1][1] = null 1852 del self._dirblocks[block_index][1][entry_index] 1853 if old_kind == b'd': 1854 # This was a directory, and the active tree says it 1855 # doesn't exist, and now the basis tree says it doesn't 1856 # exist. Remove its dirblock if present 1857 (dir_block_index, 1858 present) = self._find_block_index_from_key( 1859 (old_path, b'', b'')) 1860 if present: 1861 dir_block = self._dirblocks[dir_block_index][1] 1862 if not dir_block: 1863 # This entry is empty, go ahead and just remove it 1864 del self._dirblocks[dir_block_index] 1865 else: 1866 # There is still an active record, so just mark this 1867 # removed. 1868 entry[1][1] = null 1869 block_i, entry_i, d_present, f_present = \ 1870 self._get_block_entry_index(old_path, b'', 1) 1871 if d_present: 1872 dir_block = self._dirblocks[block_i][1] 1873 for child_entry in dir_block: 1874 child_basis_kind = child_entry[1][1][0] 1875 if child_basis_kind not in b'ar': 1876 self._raise_invalid(old_path, file_id, 1877 "The file id was deleted but its children were " 1878 "not deleted.") 1879 1880 def _after_delta_check_parents(self, parents, index): 1881 """Check that parents required by the delta are all intact. 1882 1883 :param parents: An iterable of (path_utf8, file_id) tuples which are 1884 required to be present in tree 'index' at path_utf8 with id file_id 1885 and be a directory. 1886 :param index: The column in the dirstate to check for parents in. 1887 """ 1888 for dirname_utf8, file_id in parents: 1889 # Get the entry - the ensures that file_id, dirname_utf8 exists and 1890 # has the right file id. 1891 entry = self._get_entry(index, file_id, dirname_utf8) 1892 if entry[1] is None: 1893 self._raise_invalid(dirname_utf8.decode('utf8'), 1894 file_id, "This parent is not present.") 1895 # Parents of things must be directories 1896 if entry[1][index][0] != b'd': 1897 self._raise_invalid(dirname_utf8.decode('utf8'), 1898 file_id, "This parent is not a directory.") 1899 1900 def _observed_sha1(self, entry, sha1, stat_value, 1901 _stat_to_minikind=_stat_to_minikind): 1902 """Note the sha1 of a file. 1903 1904 :param entry: The entry the sha1 is for. 1905 :param sha1: The observed sha1. 1906 :param stat_value: The os.lstat for the file. 1907 """ 1908 try: 1909 minikind = _stat_to_minikind[stat_value.st_mode & 0o170000] 1910 except KeyError: 1911 # Unhandled kind 1912 return None 1913 if minikind == b'f': 1914 if self._cutoff_time is None: 1915 self._sha_cutoff_time() 1916 if (stat_value.st_mtime < self._cutoff_time 1917 and stat_value.st_ctime < self._cutoff_time): 1918 entry[1][0] = (b'f', sha1, stat_value.st_size, entry[1][0][3], 1919 pack_stat(stat_value)) 1920 self._mark_modified([entry]) 1921 1922 def _sha_cutoff_time(self): 1923 """Return cutoff time. 1924 1925 Files modified more recently than this time are at risk of being 1926 undetectably modified and so can't be cached. 1927 """ 1928 # Cache the cutoff time as long as we hold a lock. 1929 # time.time() isn't super expensive (approx 3.38us), but 1930 # when you call it 50,000 times it adds up. 1931 # For comparison, os.lstat() costs 7.2us if it is hot. 1932 self._cutoff_time = int(time.time()) - 3 1933 return self._cutoff_time 1934 1935 def _lstat(self, abspath, entry): 1936 """Return the os.lstat value for this path.""" 1937 return os.lstat(abspath) 1938 1939 def _sha1_file_and_mutter(self, abspath): 1940 # when -Dhashcache is turned on, this is monkey-patched in to log 1941 # file reads 1942 trace.mutter("dirstate sha1 " + abspath) 1943 return self._sha1_provider.sha1(abspath) 1944 1945 def _is_executable(self, mode, old_executable): 1946 """Is this file executable?""" 1947 if self._use_filesystem_for_exec: 1948 return bool(S_IEXEC & mode) 1949 else: 1950 return old_executable 1951 1952 def _read_link(self, abspath, old_link): 1953 """Read the target of a symlink""" 1954 # TODO: jam 200700301 On Win32, this could just return the value 1955 # already in memory. However, this really needs to be done at a 1956 # higher level, because there either won't be anything on disk, 1957 # or the thing on disk will be a file. 1958 fs_encoding = osutils._fs_enc 1959 if isinstance(abspath, str): 1960 # abspath is defined as the path to pass to lstat. readlink is 1961 # buggy in python < 2.6 (it doesn't encode unicode path into FS 1962 # encoding), so we need to encode ourselves knowing that unicode 1963 # paths are produced by UnicodeDirReader on purpose. 1964 abspath = abspath.encode(fs_encoding) 1965 target = os.readlink(abspath) 1966 if fs_encoding not in ('utf-8', 'ascii'): 1967 # Change encoding if needed 1968 target = target.decode(fs_encoding).encode('UTF-8') 1969 return target 1970 1971 def get_ghosts(self): 1972 """Return a list of the parent tree revision ids that are ghosts.""" 1973 self._read_header_if_needed() 1974 return self._ghosts 1975 1976 def get_lines(self): 1977 """Serialise the entire dirstate to a sequence of lines.""" 1978 if (self._header_state == DirState.IN_MEMORY_UNMODIFIED and 1979 self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED): 1980 # read what's on disk. 1981 self._state_file.seek(0) 1982 return self._state_file.readlines() 1983 lines = [] 1984 lines.append(self._get_parents_line(self.get_parent_ids())) 1985 lines.append(self._get_ghosts_line(self._ghosts)) 1986 lines.extend(self._iter_entry_lines()) 1987 return self._get_output_lines(lines) 1988 1989 def _get_ghosts_line(self, ghost_ids): 1990 """Create a line for the state file for ghost information.""" 1991 return b'\0'.join([b'%d' % len(ghost_ids)] + ghost_ids) 1992 1993 def _get_parents_line(self, parent_ids): 1994 """Create a line for the state file for parents information.""" 1995 return b'\0'.join([b'%d' % len(parent_ids)] + parent_ids) 1996 1997 def _iter_entry_lines(self): 1998 """Create lines for entries.""" 1999 return map(self._entry_to_line, self._iter_entries()) 2000 2001 def _get_fields_to_entry(self): 2002 """Get a function which converts entry fields into a entry record. 2003 2004 This handles size and executable, as well as parent records. 2005 2006 :return: A function which takes a list of fields, and returns an 2007 appropriate record for storing in memory. 2008 """ 2009 # This is intentionally unrolled for performance 2010 num_present_parents = self._num_present_parents() 2011 if num_present_parents == 0: 2012 def fields_to_entry_0_parents(fields, _int=int): 2013 path_name_file_id_key = (fields[0], fields[1], fields[2]) 2014 return (path_name_file_id_key, [ 2015 ( # Current tree 2016 fields[3], # minikind 2017 fields[4], # fingerprint 2018 _int(fields[5]), # size 2019 fields[6] == b'y', # executable 2020 fields[7], # packed_stat or revision_id 2021 )]) 2022 return fields_to_entry_0_parents 2023 elif num_present_parents == 1: 2024 def fields_to_entry_1_parent(fields, _int=int): 2025 path_name_file_id_key = (fields[0], fields[1], fields[2]) 2026 return (path_name_file_id_key, [ 2027 ( # Current tree 2028 fields[3], # minikind 2029 fields[4], # fingerprint 2030 _int(fields[5]), # size 2031 fields[6] == b'y', # executable 2032 fields[7], # packed_stat or revision_id 2033 ), 2034 ( # Parent 1 2035 fields[8], # minikind 2036 fields[9], # fingerprint 2037 _int(fields[10]), # size 2038 fields[11] == b'y', # executable 2039 fields[12], # packed_stat or revision_id 2040 ), 2041 ]) 2042 return fields_to_entry_1_parent 2043 elif num_present_parents == 2: 2044 def fields_to_entry_2_parents(fields, _int=int): 2045 path_name_file_id_key = (fields[0], fields[1], fields[2]) 2046 return (path_name_file_id_key, [ 2047 ( # Current tree 2048 fields[3], # minikind 2049 fields[4], # fingerprint 2050 _int(fields[5]), # size 2051 fields[6] == b'y', # executable 2052 fields[7], # packed_stat or revision_id 2053 ), 2054 ( # Parent 1 2055 fields[8], # minikind 2056 fields[9], # fingerprint 2057 _int(fields[10]), # size 2058 fields[11] == b'y', # executable 2059 fields[12], # packed_stat or revision_id 2060 ), 2061 ( # Parent 2 2062 fields[13], # minikind 2063 fields[14], # fingerprint 2064 _int(fields[15]), # size 2065 fields[16] == b'y', # executable 2066 fields[17], # packed_stat or revision_id 2067 ), 2068 ]) 2069 return fields_to_entry_2_parents 2070 else: 2071 def fields_to_entry_n_parents(fields, _int=int): 2072 path_name_file_id_key = (fields[0], fields[1], fields[2]) 2073 trees = [(fields[cur], # minikind 2074 fields[cur + 1], # fingerprint 2075 _int(fields[cur + 2]), # size 2076 fields[cur + 3] == b'y', # executable 2077 fields[cur + 4], # stat or revision_id 2078 ) for cur in range(3, len(fields) - 1, 5)] 2079 return path_name_file_id_key, trees 2080 return fields_to_entry_n_parents 2081 2082 def get_parent_ids(self): 2083 """Return a list of the parent tree ids for the directory state.""" 2084 self._read_header_if_needed() 2085 return list(self._parents) 2086 2087 def _get_block_entry_index(self, dirname, basename, tree_index): 2088 """Get the coordinates for a path in the state structure. 2089 2090 :param dirname: The utf8 dirname to lookup. 2091 :param basename: The utf8 basename to lookup. 2092 :param tree_index: The index of the tree for which this lookup should 2093 be attempted. 2094 :return: A tuple describing where the path is located, or should be 2095 inserted. The tuple contains four fields: the block index, the row 2096 index, the directory is present (boolean), the entire path is 2097 present (boolean). There is no guarantee that either 2098 coordinate is currently reachable unless the found field for it is 2099 True. For instance, a directory not present in the searched tree 2100 may be returned with a value one greater than the current highest 2101 block offset. The directory present field will always be True when 2102 the path present field is True. The directory present field does 2103 NOT indicate that the directory is present in the searched tree, 2104 rather it indicates that there are at least some files in some 2105 tree present there. 2106 """ 2107 self._read_dirblocks_if_needed() 2108 key = dirname, basename, b'' 2109 block_index, present = self._find_block_index_from_key(key) 2110 if not present: 2111 # no such directory - return the dir index and 0 for the row. 2112 return block_index, 0, False, False 2113 block = self._dirblocks[block_index][1] # access the entries only 2114 entry_index, present = self._find_entry_index(key, block) 2115 # linear search through entries at this path to find the one 2116 # requested. 2117 while entry_index < len(block) and block[entry_index][0][1] == basename: 2118 if block[entry_index][1][tree_index][0] not in (b'a', b'r'): 2119 # neither absent or relocated 2120 return block_index, entry_index, True, True 2121 entry_index += 1 2122 return block_index, entry_index, True, False 2123 2124 def _get_entry(self, tree_index, fileid_utf8=None, path_utf8=None, 2125 include_deleted=False): 2126 """Get the dirstate entry for path in tree tree_index. 2127 2128 If either file_id or path is supplied, it is used as the key to lookup. 2129 If both are supplied, the fastest lookup is used, and an error is 2130 raised if they do not both point at the same row. 2131 2132 :param tree_index: The index of the tree we wish to locate this path 2133 in. If the path is present in that tree, the entry containing its 2134 details is returned, otherwise (None, None) is returned 2135 0 is the working tree, higher indexes are successive parent 2136 trees. 2137 :param fileid_utf8: A utf8 file_id to look up. 2138 :param path_utf8: An utf8 path to be looked up. 2139 :param include_deleted: If True, and performing a lookup via 2140 fileid_utf8 rather than path_utf8, return an entry for deleted 2141 (absent) paths. 2142 :return: The dirstate entry tuple for path, or (None, None) 2143 """ 2144 self._read_dirblocks_if_needed() 2145 if path_utf8 is not None: 2146 if not isinstance(path_utf8, bytes): 2147 raise errors.BzrError('path_utf8 is not bytes: %s %r' 2148 % (type(path_utf8), path_utf8)) 2149 # path lookups are faster 2150 dirname, basename = osutils.split(path_utf8) 2151 block_index, entry_index, dir_present, file_present = \ 2152 self._get_block_entry_index(dirname, basename, tree_index) 2153 if not file_present: 2154 return None, None 2155 entry = self._dirblocks[block_index][1][entry_index] 2156 if not (entry[0][2] and entry[1][tree_index][0] not in (b'a', b'r')): 2157 raise AssertionError('unversioned entry?') 2158 if fileid_utf8: 2159 if entry[0][2] != fileid_utf8: 2160 self._changes_aborted = True 2161 raise errors.BzrError('integrity error ? : mismatching' 2162 ' tree_index, file_id and path') 2163 return entry 2164 else: 2165 possible_keys = self._get_id_index().get(fileid_utf8, ()) 2166 if not possible_keys: 2167 return None, None 2168 for key in possible_keys: 2169 block_index, present = \ 2170 self._find_block_index_from_key(key) 2171 # strange, probably indicates an out of date 2172 # id index - for now, allow this. 2173 if not present: 2174 continue 2175 # WARNING: DO not change this code to use _get_block_entry_index 2176 # as that function is not suitable: it does not use the key 2177 # to lookup, and thus the wrong coordinates are returned. 2178 block = self._dirblocks[block_index][1] 2179 entry_index, present = self._find_entry_index(key, block) 2180 if present: 2181 entry = self._dirblocks[block_index][1][entry_index] 2182 # TODO: We might want to assert that entry[0][2] == 2183 # fileid_utf8. 2184 # GZ 2017-06-09: Hoist set of minkinds somewhere 2185 if entry[1][tree_index][0] in {b'f', b'd', b'l', b't'}: 2186 # this is the result we are looking for: the 2187 # real home of this file_id in this tree. 2188 return entry 2189 if entry[1][tree_index][0] == b'a': 2190 # there is no home for this entry in this tree 2191 if include_deleted: 2192 return entry 2193 return None, None 2194 if entry[1][tree_index][0] != b'r': 2195 raise AssertionError( 2196 "entry %r has invalid minikind %r for tree %r" 2197 % (entry, 2198 entry[1][tree_index][0], 2199 tree_index)) 2200 real_path = entry[1][tree_index][1] 2201 return self._get_entry(tree_index, fileid_utf8=fileid_utf8, 2202 path_utf8=real_path) 2203 return None, None 2204 2205 @classmethod 2206 def initialize(cls, path, sha1_provider=None): 2207 """Create a new dirstate on path. 2208 2209 The new dirstate will be an empty tree - that is it has no parents, 2210 and only a root node - which has id ROOT_ID. 2211 2212 :param path: The name of the file for the dirstate. 2213 :param sha1_provider: an object meeting the SHA1Provider interface. 2214 If None, a DefaultSHA1Provider is used. 2215 :return: A write-locked DirState object. 2216 """ 2217 # This constructs a new DirState object on a path, sets the _state_file 2218 # to a new empty file for that path. It then calls _set_data() with our 2219 # stock empty dirstate information - a root with ROOT_ID, no children, 2220 # and no parents. Finally it calls save() to ensure that this data will 2221 # persist. 2222 if sha1_provider is None: 2223 sha1_provider = DefaultSHA1Provider() 2224 result = cls(path, sha1_provider) 2225 # root dir and root dir contents with no children. 2226 empty_tree_dirblocks = [(b'', []), (b'', [])] 2227 # a new root directory, with a NULLSTAT. 2228 empty_tree_dirblocks[0][1].append( 2229 ((b'', b'', inventory.ROOT_ID), [ 2230 (b'd', b'', 0, False, DirState.NULLSTAT), 2231 ])) 2232 result.lock_write() 2233 try: 2234 result._set_data([], empty_tree_dirblocks) 2235 result.save() 2236 except: 2237 result.unlock() 2238 raise 2239 return result 2240 2241 @staticmethod 2242 def _inv_entry_to_details(inv_entry): 2243 """Convert an inventory entry (from a revision tree) to state details. 2244 2245 :param inv_entry: An inventory entry whose sha1 and link targets can be 2246 relied upon, and which has a revision set. 2247 :return: A details tuple - the details for a single tree at a path + 2248 id. 2249 """ 2250 kind = inv_entry.kind 2251 minikind = DirState._kind_to_minikind[kind] 2252 tree_data = inv_entry.revision 2253 if kind == 'directory': 2254 fingerprint = b'' 2255 size = 0 2256 executable = False 2257 elif kind == 'symlink': 2258 if inv_entry.symlink_target is None: 2259 fingerprint = b'' 2260 else: 2261 fingerprint = inv_entry.symlink_target.encode('utf8') 2262 size = 0 2263 executable = False 2264 elif kind == 'file': 2265 fingerprint = inv_entry.text_sha1 or b'' 2266 size = inv_entry.text_size or 0 2267 executable = inv_entry.executable 2268 elif kind == 'tree-reference': 2269 fingerprint = inv_entry.reference_revision or b'' 2270 size = 0 2271 executable = False 2272 else: 2273 raise Exception("can't pack %s" % inv_entry) 2274 return static_tuple.StaticTuple(minikind, fingerprint, size, 2275 executable, tree_data) 2276 2277 def _iter_child_entries(self, tree_index, path_utf8): 2278 """Iterate over all the entries that are children of path_utf. 2279 2280 This only returns entries that are present (not in b'a', b'r') in 2281 tree_index. tree_index data is not refreshed, so if tree 0 is used, 2282 results may differ from that obtained if paths were statted to 2283 determine what ones were directories. 2284 2285 Asking for the children of a non-directory will return an empty 2286 iterator. 2287 """ 2288 pending_dirs = [] 2289 next_pending_dirs = [path_utf8] 2290 absent = (b'a', b'r') 2291 while next_pending_dirs: 2292 pending_dirs = next_pending_dirs 2293 next_pending_dirs = [] 2294 for path in pending_dirs: 2295 block_index, present = self._find_block_index_from_key( 2296 (path, b'', b'')) 2297 if block_index == 0: 2298 block_index = 1 2299 if len(self._dirblocks) == 1: 2300 # asked for the children of the root with no other 2301 # contents. 2302 return 2303 if not present: 2304 # children of a non-directory asked for. 2305 continue 2306 block = self._dirblocks[block_index] 2307 for entry in block[1]: 2308 kind = entry[1][tree_index][0] 2309 if kind not in absent: 2310 yield entry 2311 if kind == b'd': 2312 if entry[0][0]: 2313 path = entry[0][0] + b'/' + entry[0][1] 2314 else: 2315 path = entry[0][1] 2316 next_pending_dirs.append(path) 2317 2318 def _iter_entries(self): 2319 """Iterate over all the entries in the dirstate. 2320 2321 Each yelt item is an entry in the standard format described in the 2322 docstring of breezy.dirstate. 2323 """ 2324 self._read_dirblocks_if_needed() 2325 for directory in self._dirblocks: 2326 for entry in directory[1]: 2327 yield entry 2328 2329 def _get_id_index(self): 2330 """Get an id index of self._dirblocks. 2331 2332 This maps from file_id => [(directory, name, file_id)] entries where 2333 that file_id appears in one of the trees. 2334 """ 2335 if self._id_index is None: 2336 id_index = {} 2337 for key, tree_details in self._iter_entries(): 2338 self._add_to_id_index(id_index, key) 2339 self._id_index = id_index 2340 return self._id_index 2341 2342 def _add_to_id_index(self, id_index, entry_key): 2343 """Add this entry to the _id_index mapping.""" 2344 # This code used to use a set for every entry in the id_index. However, 2345 # it is *rare* to have more than one entry. So a set is a large 2346 # overkill. And even when we do, we won't ever have more than the 2347 # number of parent trees. Which is still a small number (rarely >2). As 2348 # such, we use a simple tuple, and do our own uniqueness checks. While 2349 # the 'in' check is O(N) since N is nicely bounded it shouldn't ever 2350 # cause quadratic failure. 2351 file_id = entry_key[2] 2352 entry_key = static_tuple.StaticTuple.from_sequence(entry_key) 2353 if file_id not in id_index: 2354 id_index[file_id] = static_tuple.StaticTuple(entry_key,) 2355 else: 2356 entry_keys = id_index[file_id] 2357 if entry_key not in entry_keys: 2358 id_index[file_id] = entry_keys + (entry_key,) 2359 2360 def _remove_from_id_index(self, id_index, entry_key): 2361 """Remove this entry from the _id_index mapping. 2362 2363 It is an programming error to call this when the entry_key is not 2364 already present. 2365 """ 2366 file_id = entry_key[2] 2367 entry_keys = list(id_index[file_id]) 2368 entry_keys.remove(entry_key) 2369 id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys) 2370 2371 def _get_output_lines(self, lines): 2372 """Format lines for final output. 2373 2374 :param lines: A sequence of lines containing the parents list and the 2375 path lines. 2376 """ 2377 output_lines = [DirState.HEADER_FORMAT_3] 2378 lines.append(b'') # a final newline 2379 inventory_text = b'\0\n\0'.join(lines) 2380 output_lines.append(b'crc32: %d\n' % (zlib.crc32(inventory_text),)) 2381 # -3, 1 for num parents, 1 for ghosts, 1 for final newline 2382 num_entries = len(lines) - 3 2383 output_lines.append(b'num_entries: %d\n' % (num_entries,)) 2384 output_lines.append(inventory_text) 2385 return output_lines 2386 2387 def _make_deleted_row(self, fileid_utf8, parents): 2388 """Return a deleted row for fileid_utf8.""" 2389 return (b'/', b'RECYCLED.BIN', b'file', fileid_utf8, 0, DirState.NULLSTAT, 2390 b''), parents 2391 2392 def _num_present_parents(self): 2393 """The number of parent entries in each record row.""" 2394 return len(self._parents) - len(self._ghosts) 2395 2396 @classmethod 2397 def on_file(cls, path, sha1_provider=None, worth_saving_limit=0, 2398 use_filesystem_for_exec=True): 2399 """Construct a DirState on the file at path "path". 2400 2401 :param path: The path at which the dirstate file on disk should live. 2402 :param sha1_provider: an object meeting the SHA1Provider interface. 2403 If None, a DefaultSHA1Provider is used. 2404 :param worth_saving_limit: when the exact number of hash changed 2405 entries is known, only bother saving the dirstate if more than 2406 this count of entries have changed. -1 means never save. 2407 :param use_filesystem_for_exec: Whether to trust the filesystem 2408 for executable bit information 2409 :return: An unlocked DirState object, associated with the given path. 2410 """ 2411 if sha1_provider is None: 2412 sha1_provider = DefaultSHA1Provider() 2413 result = cls(path, sha1_provider, 2414 worth_saving_limit=worth_saving_limit, 2415 use_filesystem_for_exec=use_filesystem_for_exec) 2416 return result 2417 2418 def _read_dirblocks_if_needed(self): 2419 """Read in all the dirblocks from the file if they are not in memory. 2420 2421 This populates self._dirblocks, and sets self._dirblock_state to 2422 IN_MEMORY_UNMODIFIED. It is not currently ready for incremental block 2423 loading. 2424 """ 2425 self._read_header_if_needed() 2426 if self._dirblock_state == DirState.NOT_IN_MEMORY: 2427 _read_dirblocks(self) 2428 2429 def _read_header(self): 2430 """This reads in the metadata header, and the parent ids. 2431 2432 After reading in, the file should be positioned at the null 2433 just before the start of the first record in the file. 2434 2435 :return: (expected crc checksum, number of entries, parent list) 2436 """ 2437 self._read_prelude() 2438 parent_line = self._state_file.readline() 2439 info = parent_line.split(b'\0') 2440 num_parents = int(info[0]) 2441 self._parents = info[1:-1] 2442 ghost_line = self._state_file.readline() 2443 info = ghost_line.split(b'\0') 2444 num_ghosts = int(info[1]) 2445 self._ghosts = info[2:-1] 2446 self._header_state = DirState.IN_MEMORY_UNMODIFIED 2447 self._end_of_header = self._state_file.tell() 2448 2449 def _read_header_if_needed(self): 2450 """Read the header of the dirstate file if needed.""" 2451 # inline this as it will be called a lot 2452 if not self._lock_token: 2453 raise errors.ObjectNotLocked(self) 2454 if self._header_state == DirState.NOT_IN_MEMORY: 2455 self._read_header() 2456 2457 def _read_prelude(self): 2458 """Read in the prelude header of the dirstate file. 2459 2460 This only reads in the stuff that is not connected to the crc 2461 checksum. The position will be correct to read in the rest of 2462 the file and check the checksum after this point. 2463 The next entry in the file should be the number of parents, 2464 and their ids. Followed by a newline. 2465 """ 2466 header = self._state_file.readline() 2467 if header != DirState.HEADER_FORMAT_3: 2468 raise errors.BzrError( 2469 'invalid header line: %r' % (header,)) 2470 crc_line = self._state_file.readline() 2471 if not crc_line.startswith(b'crc32: '): 2472 raise errors.BzrError('missing crc32 checksum: %r' % crc_line) 2473 self.crc_expected = int(crc_line[len(b'crc32: '):-1]) 2474 num_entries_line = self._state_file.readline() 2475 if not num_entries_line.startswith(b'num_entries: '): 2476 raise errors.BzrError('missing num_entries line') 2477 self._num_entries = int(num_entries_line[len(b'num_entries: '):-1]) 2478 2479 def sha1_from_stat(self, path, stat_result): 2480 """Find a sha1 given a stat lookup.""" 2481 return self._get_packed_stat_index().get(pack_stat(stat_result), None) 2482 2483 def _get_packed_stat_index(self): 2484 """Get a packed_stat index of self._dirblocks.""" 2485 if self._packed_stat_index is None: 2486 index = {} 2487 for key, tree_details in self._iter_entries(): 2488 if tree_details[0][0] == b'f': 2489 index[tree_details[0][4]] = tree_details[0][1] 2490 self._packed_stat_index = index 2491 return self._packed_stat_index 2492 2493 def save(self): 2494 """Save any pending changes created during this session. 2495 2496 We reuse the existing file, because that prevents race conditions with 2497 file creation, and use oslocks on it to prevent concurrent modification 2498 and reads - because dirstate's incremental data aggregation is not 2499 compatible with reading a modified file, and replacing a file in use by 2500 another process is impossible on Windows. 2501 2502 A dirstate in read only mode should be smart enough though to validate 2503 that the file has not changed, and otherwise discard its cache and 2504 start over, to allow for fine grained read lock duration, so 'status' 2505 wont block 'commit' - for example. 2506 """ 2507 if self._changes_aborted: 2508 # Should this be a warning? For now, I'm expecting that places that 2509 # mark it inconsistent will warn, making a warning here redundant. 2510 trace.mutter('Not saving DirState because ' 2511 '_changes_aborted is set.') 2512 return 2513 # TODO: Since we now distinguish IN_MEMORY_MODIFIED from 2514 # IN_MEMORY_HASH_MODIFIED, we should only fail quietly if we fail 2515 # to save an IN_MEMORY_HASH_MODIFIED, and fail *noisily* if we 2516 # fail to save IN_MEMORY_MODIFIED 2517 if not self._worth_saving(): 2518 return 2519 2520 grabbed_write_lock = False 2521 if self._lock_state != 'w': 2522 grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock() 2523 # Switch over to the new lock, as the old one may be closed. 2524 # TODO: jam 20070315 We should validate the disk file has 2525 # not changed contents, since temporary_write_lock may 2526 # not be an atomic operation. 2527 self._lock_token = new_lock 2528 self._state_file = new_lock.f 2529 if not grabbed_write_lock: 2530 # We couldn't grab a write lock, so we switch back to a read one 2531 return 2532 try: 2533 lines = self.get_lines() 2534 self._state_file.seek(0) 2535 self._state_file.writelines(lines) 2536 self._state_file.truncate() 2537 self._state_file.flush() 2538 self._maybe_fdatasync() 2539 self._mark_unmodified() 2540 finally: 2541 if grabbed_write_lock: 2542 self._lock_token = self._lock_token.restore_read_lock() 2543 self._state_file = self._lock_token.f 2544 # TODO: jam 20070315 We should validate the disk file has 2545 # not changed contents. Since restore_read_lock may 2546 # not be an atomic operation. 2547 2548 def _maybe_fdatasync(self): 2549 """Flush to disk if possible and if not configured off.""" 2550 if self._config_stack.get('dirstate.fdatasync'): 2551 osutils.fdatasync(self._state_file.fileno()) 2552 2553 def _worth_saving(self): 2554 """Is it worth saving the dirstate or not?""" 2555 if (self._header_state == DirState.IN_MEMORY_MODIFIED 2556 or self._dirblock_state == DirState.IN_MEMORY_MODIFIED): 2557 return True 2558 if self._dirblock_state == DirState.IN_MEMORY_HASH_MODIFIED: 2559 if self._worth_saving_limit == -1: 2560 # We never save hash changes when the limit is -1 2561 return False 2562 # If we're using smart saving and only a small number of 2563 # entries have changed their hash, don't bother saving. John has 2564 # suggested using a heuristic here based on the size of the 2565 # changed files and/or tree. For now, we go with a configurable 2566 # number of changes, keeping the calculation time 2567 # as low overhead as possible. (This also keeps all existing 2568 # tests passing as the default is 0, i.e. always save.) 2569 if len(self._known_hash_changes) >= self._worth_saving_limit: 2570 return True 2571 return False 2572 2573 def _set_data(self, parent_ids, dirblocks): 2574 """Set the full dirstate data in memory. 2575 2576 This is an internal function used to completely replace the objects 2577 in memory state. It puts the dirstate into state 'full-dirty'. 2578 2579 :param parent_ids: A list of parent tree revision ids. 2580 :param dirblocks: A list containing one tuple for each directory in the 2581 tree. Each tuple contains the directory path and a list of entries 2582 found in that directory. 2583 """ 2584 # our memory copy is now authoritative. 2585 self._dirblocks = dirblocks 2586 self._mark_modified(header_modified=True) 2587 self._parents = list(parent_ids) 2588 self._id_index = None 2589 self._packed_stat_index = None 2590 2591 def set_path_id(self, path, new_id): 2592 """Change the id of path to new_id in the current working tree. 2593 2594 :param path: The path inside the tree to set - b'' is the root, 'foo' 2595 is the path foo in the root. 2596 :param new_id: The new id to assign to the path. This must be a utf8 2597 file id (not unicode, and not None). 2598 """ 2599 self._read_dirblocks_if_needed() 2600 if len(path): 2601 # TODO: logic not written 2602 raise NotImplementedError(self.set_path_id) 2603 # TODO: check new id is unique 2604 entry = self._get_entry(0, path_utf8=path) 2605 if entry[0][2] == new_id: 2606 # Nothing to change. 2607 return 2608 if new_id.__class__ != bytes: 2609 raise AssertionError( 2610 "must be a utf8 file_id not %s" % (type(new_id), )) 2611 # mark the old path absent, and insert a new root path 2612 self._make_absent(entry) 2613 self.update_minimal((b'', b'', new_id), b'd', 2614 path_utf8=b'', packed_stat=entry[1][0][4]) 2615 self._mark_modified() 2616 2617 def set_parent_trees(self, trees, ghosts): 2618 """Set the parent trees for the dirstate. 2619 2620 :param trees: A list of revision_id, tree tuples. tree must be provided 2621 even if the revision_id refers to a ghost: supply an empty tree in 2622 this case. 2623 :param ghosts: A list of the revision_ids that are ghosts at the time 2624 of setting. 2625 """ 2626 # TODO: generate a list of parent indexes to preserve to save 2627 # processing specific parent trees. In the common case one tree will 2628 # be preserved - the left most parent. 2629 # TODO: if the parent tree is a dirstate, we might want to walk them 2630 # all by path in parallel for 'optimal' common-case performance. 2631 # generate new root row. 2632 self._read_dirblocks_if_needed() 2633 # TODO future sketch: Examine the existing parents to generate a change 2634 # map and then walk the new parent trees only, mapping them into the 2635 # dirstate. Walk the dirstate at the same time to remove unreferenced 2636 # entries. 2637 # for now: 2638 # sketch: loop over all entries in the dirstate, cherry picking 2639 # entries from the parent trees, if they are not ghost trees. 2640 # after we finish walking the dirstate, all entries not in the dirstate 2641 # are deletes, so we want to append them to the end as per the design 2642 # discussions. So do a set difference on ids with the parents to 2643 # get deletes, and add them to the end. 2644 # During the update process we need to answer the following questions: 2645 # - find other keys containing a fileid in order to create cross-path 2646 # links. We dont't trivially use the inventory from other trees 2647 # because this leads to either double touching, or to accessing 2648 # missing keys, 2649 # - find other keys containing a path 2650 # We accumulate each entry via this dictionary, including the root 2651 by_path = {} 2652 id_index = {} 2653 # we could do parallel iterators, but because file id data may be 2654 # scattered throughout, we dont save on index overhead: we have to look 2655 # at everything anyway. We can probably save cycles by reusing parent 2656 # data and doing an incremental update when adding an additional 2657 # parent, but for now the common cases are adding a new parent (merge), 2658 # and replacing completely (commit), and commit is more common: so 2659 # optimise merge later. 2660 2661 # ---- start generation of full tree mapping data 2662 # what trees should we use? 2663 parent_trees = [tree for rev_id, tree in trees if rev_id not in ghosts] 2664 # how many trees do we end up with 2665 parent_count = len(parent_trees) 2666 st = static_tuple.StaticTuple 2667 2668 # one: the current tree 2669 for entry in self._iter_entries(): 2670 # skip entries not in the current tree 2671 if entry[1][0][0] in (b'a', b'r'): # absent, relocated 2672 continue 2673 by_path[entry[0]] = [entry[1][0]] + \ 2674 [DirState.NULL_PARENT_DETAILS] * parent_count 2675 # TODO: Possibly inline this, since we know it isn't present yet 2676 # id_index[entry[0][2]] = (entry[0],) 2677 self._add_to_id_index(id_index, entry[0]) 2678 2679 # now the parent trees: 2680 for tree_index, tree in enumerate(parent_trees): 2681 # the index is off by one, adjust it. 2682 tree_index = tree_index + 1 2683 # when we add new locations for a fileid we need these ranges for 2684 # any fileid in this tree as we set the by_path[id] to: 2685 # already_processed_tree_details + new_details + new_location_suffix 2686 # the suffix is from tree_index+1:parent_count+1. 2687 new_location_suffix = [ 2688 DirState.NULL_PARENT_DETAILS] * (parent_count - tree_index) 2689 # now stitch in all the entries from this tree 2690 last_dirname = None 2691 for path, entry in tree.iter_entries_by_dir(): 2692 # here we process each trees details for each item in the tree. 2693 # we first update any existing entries for the id at other paths, 2694 # then we either create or update the entry for the id at the 2695 # right path, and finally we add (if needed) a mapping from 2696 # file_id to this path. We do it in this order to allow us to 2697 # avoid checking all known paths for the id when generating a 2698 # new entry at this path: by adding the id->path mapping last, 2699 # all the mappings are valid and have correct relocation 2700 # records where needed. 2701 file_id = entry.file_id 2702 path_utf8 = path.encode('utf8') 2703 dirname, basename = osutils.split(path_utf8) 2704 if dirname == last_dirname: 2705 # Try to re-use objects as much as possible 2706 dirname = last_dirname 2707 else: 2708 last_dirname = dirname 2709 new_entry_key = st(dirname, basename, file_id) 2710 # tree index consistency: All other paths for this id in this tree 2711 # index must point to the correct path. 2712 entry_keys = id_index.get(file_id, ()) 2713 for entry_key in entry_keys: 2714 # TODO:PROFILING: It might be faster to just update 2715 # rather than checking if we need to, and then overwrite 2716 # the one we are located at. 2717 if entry_key != new_entry_key: 2718 # this file id is at a different path in one of the 2719 # other trees, so put absent pointers there 2720 # This is the vertical axis in the matrix, all pointing 2721 # to the real path. 2722 by_path[entry_key][tree_index] = st(b'r', path_utf8, 0, 2723 False, b'') 2724 # by path consistency: Insert into an existing path record 2725 # (trivial), or add a new one with relocation pointers for the 2726 # other tree indexes. 2727 if new_entry_key in entry_keys: 2728 # there is already an entry where this data belongs, just 2729 # insert it. 2730 by_path[new_entry_key][tree_index] = \ 2731 self._inv_entry_to_details(entry) 2732 else: 2733 # add relocated entries to the horizontal axis - this row 2734 # mapping from path,id. We need to look up the correct path 2735 # for the indexes from 0 to tree_index -1 2736 new_details = [] 2737 for lookup_index in range(tree_index): 2738 # boundary case: this is the first occurence of file_id 2739 # so there are no id_indexes, possibly take this out of 2740 # the loop? 2741 if not len(entry_keys): 2742 new_details.append(DirState.NULL_PARENT_DETAILS) 2743 else: 2744 # grab any one entry, use it to find the right path. 2745 a_key = next(iter(entry_keys)) 2746 if by_path[a_key][lookup_index][0] in (b'r', b'a'): 2747 # its a pointer or missing statement, use it as 2748 # is. 2749 new_details.append( 2750 by_path[a_key][lookup_index]) 2751 else: 2752 # we have the right key, make a pointer to it. 2753 real_path = (b'/'.join(a_key[0:2])).strip(b'/') 2754 new_details.append(st(b'r', real_path, 0, False, 2755 b'')) 2756 new_details.append(self._inv_entry_to_details(entry)) 2757 new_details.extend(new_location_suffix) 2758 by_path[new_entry_key] = new_details 2759 self._add_to_id_index(id_index, new_entry_key) 2760 # --- end generation of full tree mappings 2761 2762 # sort and output all the entries 2763 new_entries = self._sort_entries(by_path.items()) 2764 self._entries_to_current_state(new_entries) 2765 self._parents = [rev_id for rev_id, tree in trees] 2766 self._ghosts = list(ghosts) 2767 self._mark_modified(header_modified=True) 2768 self._id_index = id_index 2769 2770 def _sort_entries(self, entry_list): 2771 """Given a list of entries, sort them into the right order. 2772 2773 This is done when constructing a new dirstate from trees - normally we 2774 try to keep everything in sorted blocks all the time, but sometimes 2775 it's easier to sort after the fact. 2776 """ 2777 # When sorting, we usually have 10x more entries than directories. (69k 2778 # total entries, 4k directories). So cache the results of splitting. 2779 # Saving time and objects. Also, use StaticTuple to avoid putting all 2780 # of these object into python's garbage collector. 2781 split_dirs = {} 2782 2783 def _key(entry, _split_dirs=split_dirs, _st=static_tuple.StaticTuple): 2784 # sort by: directory parts, file name, file id 2785 dirpath, fname, file_id = entry[0] 2786 try: 2787 split = _split_dirs[dirpath] 2788 except KeyError: 2789 split = _st.from_sequence(dirpath.split(b'/')) 2790 _split_dirs[dirpath] = split 2791 return _st(split, fname, file_id) 2792 return sorted(entry_list, key=_key) 2793 2794 def set_state_from_inventory(self, new_inv): 2795 """Set new_inv as the current state. 2796 2797 This API is called by tree transform, and will usually occur with 2798 existing parent trees. 2799 2800 :param new_inv: The inventory object to set current state from. 2801 """ 2802 if 'evil' in debug.debug_flags: 2803 trace.mutter_callsite(1, 2804 "set_state_from_inventory called; please mutate the tree instead") 2805 tracing = 'dirstate' in debug.debug_flags 2806 if tracing: 2807 trace.mutter("set_state_from_inventory trace:") 2808 self._read_dirblocks_if_needed() 2809 # sketch: 2810 # Two iterators: current data and new data, both in dirblock order. 2811 # We zip them together, which tells about entries that are new in the 2812 # inventory, or removed in the inventory, or present in both and 2813 # possibly changed. 2814 # 2815 # You might think we could just synthesize a new dirstate directly 2816 # since we're processing it in the right order. However, we need to 2817 # also consider there may be any number of parent trees and relocation 2818 # pointers, and we don't want to duplicate that here. 2819 new_iterator = new_inv.iter_entries_by_dir() 2820 # we will be modifying the dirstate, so we need a stable iterator. In 2821 # future we might write one, for now we just clone the state into a 2822 # list using a copy so that we see every original item and don't have 2823 # to adjust the position when items are inserted or deleted in the 2824 # underlying dirstate. 2825 old_iterator = iter(list(self._iter_entries())) 2826 # both must have roots so this is safe: 2827 current_new = next(new_iterator) 2828 current_old = next(old_iterator) 2829 2830 def advance(iterator): 2831 try: 2832 return next(iterator) 2833 except StopIteration: 2834 return None 2835 while current_new or current_old: 2836 # skip entries in old that are not really there 2837 if current_old and current_old[1][0][0] in (b'a', b'r'): 2838 # relocated or absent 2839 current_old = advance(old_iterator) 2840 continue 2841 if current_new: 2842 # convert new into dirblock style 2843 new_path_utf8 = current_new[0].encode('utf8') 2844 new_dirname, new_basename = osutils.split(new_path_utf8) 2845 new_id = current_new[1].file_id 2846 new_entry_key = (new_dirname, new_basename, new_id) 2847 current_new_minikind = \ 2848 DirState._kind_to_minikind[current_new[1].kind] 2849 if current_new_minikind == b't': 2850 fingerprint = current_new[1].reference_revision or b'' 2851 else: 2852 # We normally only insert or remove records, or update 2853 # them when it has significantly changed. Then we want to 2854 # erase its fingerprint. Unaffected records should 2855 # normally not be updated at all. 2856 fingerprint = b'' 2857 else: 2858 # for safety disable variables 2859 new_path_utf8 = new_dirname = new_basename = new_id = \ 2860 new_entry_key = None 2861 # 5 cases, we dont have a value that is strictly greater than everything, so 2862 # we make both end conditions explicit 2863 if not current_old: 2864 # old is finished: insert current_new into the state. 2865 if tracing: 2866 trace.mutter("Appending from new '%s'.", 2867 new_path_utf8.decode('utf8')) 2868 self.update_minimal(new_entry_key, current_new_minikind, 2869 executable=current_new[1].executable, 2870 path_utf8=new_path_utf8, fingerprint=fingerprint, 2871 fullscan=True) 2872 current_new = advance(new_iterator) 2873 elif not current_new: 2874 # new is finished 2875 if tracing: 2876 trace.mutter("Truncating from old '%s/%s'.", 2877 current_old[0][0].decode('utf8'), 2878 current_old[0][1].decode('utf8')) 2879 self._make_absent(current_old) 2880 current_old = advance(old_iterator) 2881 elif new_entry_key == current_old[0]: 2882 # same - common case 2883 # We're looking at the same path and id in both the dirstate 2884 # and inventory, so just need to update the fields in the 2885 # dirstate from the one in the inventory. 2886 # TODO: update the record if anything significant has changed. 2887 # the minimal required trigger is if the execute bit or cached 2888 # kind has changed. 2889 if (current_old[1][0][3] != current_new[1].executable or 2890 current_old[1][0][0] != current_new_minikind): 2891 if tracing: 2892 trace.mutter("Updating in-place change '%s'.", 2893 new_path_utf8.decode('utf8')) 2894 self.update_minimal(current_old[0], current_new_minikind, 2895 executable=current_new[1].executable, 2896 path_utf8=new_path_utf8, fingerprint=fingerprint, 2897 fullscan=True) 2898 # both sides are dealt with, move on 2899 current_old = advance(old_iterator) 2900 current_new = advance(new_iterator) 2901 elif (lt_by_dirs(new_dirname, current_old[0][0]) 2902 or (new_dirname == current_old[0][0] and 2903 new_entry_key[1:] < current_old[0][1:])): 2904 # new comes before: 2905 # add a entry for this and advance new 2906 if tracing: 2907 trace.mutter("Inserting from new '%s'.", 2908 new_path_utf8.decode('utf8')) 2909 self.update_minimal(new_entry_key, current_new_minikind, 2910 executable=current_new[1].executable, 2911 path_utf8=new_path_utf8, fingerprint=fingerprint, 2912 fullscan=True) 2913 current_new = advance(new_iterator) 2914 else: 2915 # we've advanced past the place where the old key would be, 2916 # without seeing it in the new list. so it must be gone. 2917 if tracing: 2918 trace.mutter("Deleting from old '%s/%s'.", 2919 current_old[0][0].decode('utf8'), 2920 current_old[0][1].decode('utf8')) 2921 self._make_absent(current_old) 2922 current_old = advance(old_iterator) 2923 self._mark_modified() 2924 self._id_index = None 2925 self._packed_stat_index = None 2926 if tracing: 2927 trace.mutter("set_state_from_inventory complete.") 2928 2929 def set_state_from_scratch(self, working_inv, parent_trees, parent_ghosts): 2930 """Wipe the currently stored state and set it to something new. 2931 2932 This is a hard-reset for the data we are working with. 2933 """ 2934 # Technically, we really want a write lock, but until we write, we 2935 # don't really need it. 2936 self._requires_lock() 2937 # root dir and root dir contents with no children. We have to have a 2938 # root for set_state_from_inventory to work correctly. 2939 empty_root = ((b'', b'', inventory.ROOT_ID), 2940 [(b'd', b'', 0, False, DirState.NULLSTAT)]) 2941 empty_tree_dirblocks = [(b'', [empty_root]), (b'', [])] 2942 self._set_data([], empty_tree_dirblocks) 2943 self.set_state_from_inventory(working_inv) 2944 self.set_parent_trees(parent_trees, parent_ghosts) 2945 2946 def _make_absent(self, current_old): 2947 """Mark current_old - an entry - as absent for tree 0. 2948 2949 :return: True if this was the last details entry for the entry key: 2950 that is, if the underlying block has had the entry removed, thus 2951 shrinking in length. 2952 """ 2953 # build up paths that this id will be left at after the change is made, 2954 # so we can update their cross references in tree 0 2955 all_remaining_keys = set() 2956 # Dont check the working tree, because it's going. 2957 for details in current_old[1][1:]: 2958 if details[0] not in (b'a', b'r'): # absent, relocated 2959 all_remaining_keys.add(current_old[0]) 2960 elif details[0] == b'r': # relocated 2961 # record the key for the real path. 2962 all_remaining_keys.add( 2963 tuple(osutils.split(details[1])) + (current_old[0][2],)) 2964 # absent rows are not present at any path. 2965 last_reference = current_old[0] not in all_remaining_keys 2966 if last_reference: 2967 # the current row consists entire of the current item (being marked 2968 # absent), and relocated or absent entries for the other trees: 2969 # Remove it, its meaningless. 2970 block = self._find_block(current_old[0]) 2971 entry_index, present = self._find_entry_index( 2972 current_old[0], block[1]) 2973 if not present: 2974 raise AssertionError( 2975 'could not find entry for %s' % (current_old,)) 2976 block[1].pop(entry_index) 2977 # if we have an id_index in use, remove this key from it for this id. 2978 if self._id_index is not None: 2979 self._remove_from_id_index(self._id_index, current_old[0]) 2980 # update all remaining keys for this id to record it as absent. The 2981 # existing details may either be the record we are marking as deleted 2982 # (if there were other trees with the id present at this path), or may 2983 # be relocations. 2984 for update_key in all_remaining_keys: 2985 update_block_index, present = \ 2986 self._find_block_index_from_key(update_key) 2987 if not present: 2988 raise AssertionError( 2989 'could not find block for %s' % (update_key,)) 2990 update_entry_index, present = \ 2991 self._find_entry_index( 2992 update_key, self._dirblocks[update_block_index][1]) 2993 if not present: 2994 raise AssertionError( 2995 'could not find entry for %s' % (update_key,)) 2996 update_tree_details = self._dirblocks[update_block_index][1][update_entry_index][1] 2997 # it must not be absent at the moment 2998 if update_tree_details[0][0] == b'a': # absent 2999 raise AssertionError('bad row %r' % (update_tree_details,)) 3000 update_tree_details[0] = DirState.NULL_PARENT_DETAILS 3001 self._mark_modified() 3002 return last_reference 3003 3004 def update_minimal(self, key, minikind, executable=False, fingerprint=b'', 3005 packed_stat=None, size=0, path_utf8=None, fullscan=False): 3006 """Update an entry to the state in tree 0. 3007 3008 This will either create a new entry at 'key' or update an existing one. 3009 It also makes sure that any other records which might mention this are 3010 updated as well. 3011 3012 :param key: (dir, name, file_id) for the new entry 3013 :param minikind: The type for the entry (b'f' == 'file', b'd' == 3014 'directory'), etc. 3015 :param executable: Should the executable bit be set? 3016 :param fingerprint: Simple fingerprint for new entry: canonical-form 3017 sha1 for files, referenced revision id for subtrees, etc. 3018 :param packed_stat: Packed stat value for new entry. 3019 :param size: Size information for new entry 3020 :param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing 3021 extra computation. 3022 :param fullscan: If True then a complete scan of the dirstate is being 3023 done and checking for duplicate rows should not be done. This 3024 should only be set by set_state_from_inventory and similar methods. 3025 3026 If packed_stat and fingerprint are not given, they're invalidated in 3027 the entry. 3028 """ 3029 block = self._find_block(key)[1] 3030 if packed_stat is None: 3031 packed_stat = DirState.NULLSTAT 3032 # XXX: Some callers pass b'' as the packed_stat, and it seems to be 3033 # sometimes present in the dirstate - this seems oddly inconsistent. 3034 # mbp 20071008 3035 entry_index, present = self._find_entry_index(key, block) 3036 new_details = (minikind, fingerprint, size, executable, packed_stat) 3037 id_index = self._get_id_index() 3038 if not present: 3039 # New record. Check there isn't a entry at this path already. 3040 if not fullscan: 3041 low_index, _ = self._find_entry_index(key[0:2] + (b'',), block) 3042 while low_index < len(block): 3043 entry = block[low_index] 3044 if entry[0][0:2] == key[0:2]: 3045 if entry[1][0][0] not in (b'a', b'r'): 3046 # This entry has the same path (but a different id) as 3047 # the new entry we're adding, and is present in ths 3048 # tree. 3049 self._raise_invalid( 3050 (b"%s/%s" % key[0:2]).decode('utf8'), key[2], 3051 "Attempt to add item at path already occupied by " 3052 "id %r" % entry[0][2]) 3053 low_index += 1 3054 else: 3055 break 3056 # new entry, synthesis cross reference here, 3057 existing_keys = id_index.get(key[2], ()) 3058 if not existing_keys: 3059 # not currently in the state, simplest case 3060 new_entry = key, [new_details] + self._empty_parent_info() 3061 else: 3062 # present at one or more existing other paths. 3063 # grab one of them and use it to generate parent 3064 # relocation/absent entries. 3065 new_entry = key, [new_details] 3066 # existing_keys can be changed as we iterate. 3067 for other_key in tuple(existing_keys): 3068 # change the record at other to be a pointer to this new 3069 # record. The loop looks similar to the change to 3070 # relocations when updating an existing record but its not: 3071 # the test for existing kinds is different: this can be 3072 # factored out to a helper though. 3073 other_block_index, present = self._find_block_index_from_key( 3074 other_key) 3075 if not present: 3076 raise AssertionError('could not find block for %s' % ( 3077 other_key,)) 3078 other_block = self._dirblocks[other_block_index][1] 3079 other_entry_index, present = self._find_entry_index( 3080 other_key, other_block) 3081 if not present: 3082 raise AssertionError( 3083 'update_minimal: could not find other entry for %s' 3084 % (other_key,)) 3085 if path_utf8 is None: 3086 raise AssertionError('no path') 3087 # Turn this other location into a reference to the new 3088 # location. This also updates the aliased iterator 3089 # (current_old in set_state_from_inventory) so that the old 3090 # entry, if not already examined, is skipped over by that 3091 # loop. 3092 other_entry = other_block[other_entry_index] 3093 other_entry[1][0] = (b'r', path_utf8, 0, False, b'') 3094 if self._maybe_remove_row(other_block, other_entry_index, 3095 id_index): 3096 # If the row holding this was removed, we need to 3097 # recompute where this entry goes 3098 entry_index, _ = self._find_entry_index(key, block) 3099 3100 # This loop: 3101 # adds a tuple to the new details for each column 3102 # - either by copying an existing relocation pointer inside that column 3103 # - or by creating a new pointer to the right row inside that column 3104 num_present_parents = self._num_present_parents() 3105 if num_present_parents: 3106 # TODO: This re-evaluates the existing_keys set, do we need 3107 # to do that ourselves? 3108 other_key = list(existing_keys)[0] 3109 for lookup_index in range(1, num_present_parents + 1): 3110 # grab any one entry, use it to find the right path. 3111 # TODO: optimise this to reduce memory use in highly 3112 # fragmented situations by reusing the relocation 3113 # records. 3114 update_block_index, present = \ 3115 self._find_block_index_from_key(other_key) 3116 if not present: 3117 raise AssertionError( 3118 'could not find block for %s' % (other_key,)) 3119 update_entry_index, present = \ 3120 self._find_entry_index( 3121 other_key, self._dirblocks[update_block_index][1]) 3122 if not present: 3123 raise AssertionError( 3124 'update_minimal: could not find entry for %s' % (other_key,)) 3125 update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index] 3126 if update_details[0] in (b'a', b'r'): # relocated, absent 3127 # its a pointer or absent in lookup_index's tree, use 3128 # it as is. 3129 new_entry[1].append(update_details) 3130 else: 3131 # we have the right key, make a pointer to it. 3132 pointer_path = osutils.pathjoin(*other_key[0:2]) 3133 new_entry[1].append( 3134 (b'r', pointer_path, 0, False, b'')) 3135 block.insert(entry_index, new_entry) 3136 self._add_to_id_index(id_index, key) 3137 else: 3138 # Does the new state matter? 3139 block[entry_index][1][0] = new_details 3140 # parents cannot be affected by what we do. 3141 # other occurences of this id can be found 3142 # from the id index. 3143 # --- 3144 # tree index consistency: All other paths for this id in this tree 3145 # index must point to the correct path. We have to loop here because 3146 # we may have passed entries in the state with this file id already 3147 # that were absent - where parent entries are - and they need to be 3148 # converted to relocated. 3149 if path_utf8 is None: 3150 raise AssertionError('no path') 3151 existing_keys = id_index.get(key[2], ()) 3152 if key not in existing_keys: 3153 raise AssertionError('We found the entry in the blocks, but' 3154 ' the key is not in the id_index.' 3155 ' key: %s, existing_keys: %s' % (key, existing_keys)) 3156 for entry_key in existing_keys: 3157 # TODO:PROFILING: It might be faster to just update 3158 # rather than checking if we need to, and then overwrite 3159 # the one we are located at. 3160 if entry_key != key: 3161 # this file id is at a different path in one of the 3162 # other trees, so put absent pointers there 3163 # This is the vertical axis in the matrix, all pointing 3164 # to the real path. 3165 block_index, present = self._find_block_index_from_key( 3166 entry_key) 3167 if not present: 3168 raise AssertionError('not present: %r', entry_key) 3169 entry_index, present = self._find_entry_index( 3170 entry_key, self._dirblocks[block_index][1]) 3171 if not present: 3172 raise AssertionError('not present: %r', entry_key) 3173 self._dirblocks[block_index][1][entry_index][1][0] = \ 3174 (b'r', path_utf8, 0, False, b'') 3175 # add a containing dirblock if needed. 3176 if new_details[0] == b'd': 3177 # GZ 2017-06-09: Using pathjoin why? 3178 subdir_key = (osutils.pathjoin(*key[0:2]), b'', b'') 3179 block_index, present = self._find_block_index_from_key(subdir_key) 3180 if not present: 3181 self._dirblocks.insert(block_index, (subdir_key[0], [])) 3182 3183 self._mark_modified() 3184 3185 def _maybe_remove_row(self, block, index, id_index): 3186 """Remove index if it is absent or relocated across the row. 3187 3188 id_index is updated accordingly. 3189 :return: True if we removed the row, False otherwise 3190 """ 3191 present_in_row = False 3192 entry = block[index] 3193 for column in entry[1]: 3194 if column[0] not in (b'a', b'r'): 3195 present_in_row = True 3196 break 3197 if not present_in_row: 3198 block.pop(index) 3199 self._remove_from_id_index(id_index, entry[0]) 3200 return True 3201 return False 3202 3203 def _validate(self): 3204 """Check that invariants on the dirblock are correct. 3205 3206 This can be useful in debugging; it shouldn't be necessary in 3207 normal code. 3208 3209 This must be called with a lock held. 3210 """ 3211 # NOTE: This must always raise AssertionError not just assert, 3212 # otherwise it may not behave properly under python -O 3213 # 3214 # TODO: All entries must have some content that's not b'a' or b'r', 3215 # otherwise it could just be removed. 3216 # 3217 # TODO: All relocations must point directly to a real entry. 3218 # 3219 # TODO: No repeated keys. 3220 # 3221 # -- mbp 20070325 3222 from pprint import pformat 3223 self._read_dirblocks_if_needed() 3224 if len(self._dirblocks) > 0: 3225 if not self._dirblocks[0][0] == b'': 3226 raise AssertionError( 3227 "dirblocks don't start with root block:\n" 3228 + pformat(self._dirblocks)) 3229 if len(self._dirblocks) > 1: 3230 if not self._dirblocks[1][0] == b'': 3231 raise AssertionError( 3232 "dirblocks missing root directory:\n" 3233 + pformat(self._dirblocks)) 3234 # the dirblocks are sorted by their path components, name, and dir id 3235 dir_names = [d[0].split(b'/') 3236 for d in self._dirblocks[1:]] 3237 if dir_names != sorted(dir_names): 3238 raise AssertionError( 3239 "dir names are not in sorted order:\n" + 3240 pformat(self._dirblocks) + 3241 "\nkeys:\n" + 3242 pformat(dir_names)) 3243 for dirblock in self._dirblocks: 3244 # within each dirblock, the entries are sorted by filename and 3245 # then by id. 3246 for entry in dirblock[1]: 3247 if dirblock[0] != entry[0][0]: 3248 raise AssertionError( 3249 "entry key for %r" 3250 "doesn't match directory name in\n%r" % 3251 (entry, pformat(dirblock))) 3252 if dirblock[1] != sorted(dirblock[1]): 3253 raise AssertionError( 3254 "dirblock for %r is not sorted:\n%s" % 3255 (dirblock[0], pformat(dirblock))) 3256 3257 def check_valid_parent(): 3258 """Check that the current entry has a valid parent. 3259 3260 This makes sure that the parent has a record, 3261 and that the parent isn't marked as "absent" in the 3262 current tree. (It is invalid to have a non-absent file in an absent 3263 directory.) 3264 """ 3265 if entry[0][0:2] == (b'', b''): 3266 # There should be no parent for the root row 3267 return 3268 parent_entry = self._get_entry(tree_index, path_utf8=entry[0][0]) 3269 if parent_entry == (None, None): 3270 raise AssertionError( 3271 "no parent entry for: %s in tree %s" 3272 % (this_path, tree_index)) 3273 if parent_entry[1][tree_index][0] != b'd': 3274 raise AssertionError( 3275 "Parent entry for %s is not marked as a valid" 3276 " directory. %s" % (this_path, parent_entry,)) 3277 3278 # For each file id, for each tree: either 3279 # the file id is not present at all; all rows with that id in the 3280 # key have it marked as 'absent' 3281 # OR the file id is present under exactly one name; any other entries 3282 # that mention that id point to the correct name. 3283 # 3284 # We check this with a dict per tree pointing either to the present 3285 # name, or None if absent. 3286 tree_count = self._num_present_parents() + 1 3287 id_path_maps = [{} for _ in range(tree_count)] 3288 # Make sure that all renamed entries point to the correct location. 3289 for entry in self._iter_entries(): 3290 file_id = entry[0][2] 3291 this_path = osutils.pathjoin(entry[0][0], entry[0][1]) 3292 if len(entry[1]) != tree_count: 3293 raise AssertionError( 3294 "wrong number of entry details for row\n%s" 3295 ",\nexpected %d" % 3296 (pformat(entry), tree_count)) 3297 absent_positions = 0 3298 for tree_index, tree_state in enumerate(entry[1]): 3299 this_tree_map = id_path_maps[tree_index] 3300 minikind = tree_state[0] 3301 if minikind in (b'a', b'r'): 3302 absent_positions += 1 3303 # have we seen this id before in this column? 3304 if file_id in this_tree_map: 3305 previous_path, previous_loc = this_tree_map[file_id] 3306 # any later mention of this file must be consistent with 3307 # what was said before 3308 if minikind == b'a': 3309 if previous_path is not None: 3310 raise AssertionError( 3311 "file %s is absent in row %r but also present " 3312 "at %r" % 3313 (file_id.decode('utf-8'), entry, previous_path)) 3314 elif minikind == b'r': 3315 target_location = tree_state[1] 3316 if previous_path != target_location: 3317 raise AssertionError( 3318 "file %s relocation in row %r but also at %r" 3319 % (file_id, entry, previous_path)) 3320 else: 3321 # a file, directory, etc - may have been previously 3322 # pointed to by a relocation, which must point here 3323 if previous_path != this_path: 3324 raise AssertionError( 3325 "entry %r inconsistent with previous path %r " 3326 "seen at %r" % 3327 (entry, previous_path, previous_loc)) 3328 check_valid_parent() 3329 else: 3330 if minikind == b'a': 3331 # absent; should not occur anywhere else 3332 this_tree_map[file_id] = None, this_path 3333 elif minikind == b'r': 3334 # relocation, must occur at expected location 3335 this_tree_map[file_id] = tree_state[1], this_path 3336 else: 3337 this_tree_map[file_id] = this_path, this_path 3338 check_valid_parent() 3339 if absent_positions == tree_count: 3340 raise AssertionError( 3341 "entry %r has no data for any tree." % (entry,)) 3342 if self._id_index is not None: 3343 for file_id, entry_keys in self._id_index.items(): 3344 for entry_key in entry_keys: 3345 # Check that the entry in the map is pointing to the same 3346 # file_id 3347 if entry_key[2] != file_id: 3348 raise AssertionError( 3349 'file_id %r did not match entry key %s' 3350 % (file_id, entry_key)) 3351 # And that from this entry key, we can look up the original 3352 # record 3353 block_index, present = self._find_block_index_from_key( 3354 entry_key) 3355 if not present: 3356 raise AssertionError( 3357 'missing block for entry key: %r', entry_key) 3358 entry_index, present = self._find_entry_index( 3359 entry_key, self._dirblocks[block_index][1]) 3360 if not present: 3361 raise AssertionError( 3362 'missing entry for key: %r', entry_key) 3363 if len(entry_keys) != len(set(entry_keys)): 3364 raise AssertionError( 3365 'id_index contained non-unique data for %s' 3366 % (entry_keys,)) 3367 3368 def _wipe_state(self): 3369 """Forget all state information about the dirstate.""" 3370 self._header_state = DirState.NOT_IN_MEMORY 3371 self._dirblock_state = DirState.NOT_IN_MEMORY 3372 self._changes_aborted = False 3373 self._parents = [] 3374 self._ghosts = [] 3375 self._dirblocks = [] 3376 self._id_index = None 3377 self._packed_stat_index = None 3378 self._end_of_header = None 3379 self._cutoff_time = None 3380 self._split_path_cache = {} 3381 3382 def lock_read(self): 3383 """Acquire a read lock on the dirstate.""" 3384 if self._lock_token is not None: 3385 raise errors.LockContention(self._lock_token) 3386 # TODO: jam 20070301 Rather than wiping completely, if the blocks are 3387 # already in memory, we could read just the header and check for 3388 # any modification. If not modified, we can just leave things 3389 # alone 3390 self._lock_token = lock.ReadLock(self._filename) 3391 self._lock_state = 'r' 3392 self._state_file = self._lock_token.f 3393 self._wipe_state() 3394 return lock.LogicalLockResult(self.unlock) 3395 3396 def lock_write(self): 3397 """Acquire a write lock on the dirstate.""" 3398 if self._lock_token is not None: 3399 raise errors.LockContention(self._lock_token) 3400 # TODO: jam 20070301 Rather than wiping completely, if the blocks are 3401 # already in memory, we could read just the header and check for 3402 # any modification. If not modified, we can just leave things 3403 # alone 3404 self._lock_token = lock.WriteLock(self._filename) 3405 self._lock_state = 'w' 3406 self._state_file = self._lock_token.f 3407 self._wipe_state() 3408 return lock.LogicalLockResult(self.unlock, self._lock_token) 3409 3410 def unlock(self): 3411 """Drop any locks held on the dirstate.""" 3412 if self._lock_token is None: 3413 raise errors.LockNotHeld(self) 3414 # TODO: jam 20070301 Rather than wiping completely, if the blocks are 3415 # already in memory, we could read just the header and check for 3416 # any modification. If not modified, we can just leave things 3417 # alone 3418 self._state_file = None 3419 self._lock_state = None 3420 self._lock_token.unlock() 3421 self._lock_token = None 3422 self._split_path_cache = {} 3423 3424 def _requires_lock(self): 3425 """Check that a lock is currently held by someone on the dirstate.""" 3426 if not self._lock_token: 3427 raise errors.ObjectNotLocked(self) 3428 3429 3430def py_update_entry(state, entry, abspath, stat_value, 3431 _stat_to_minikind=DirState._stat_to_minikind): 3432 """Update the entry based on what is actually on disk. 3433 3434 This function only calculates the sha if it needs to - if the entry is 3435 uncachable, or clearly different to the first parent's entry, no sha 3436 is calculated, and None is returned. 3437 3438 :param state: The dirstate this entry is in. 3439 :param entry: This is the dirblock entry for the file in question. 3440 :param abspath: The path on disk for this file. 3441 :param stat_value: The stat value done on the path. 3442 :return: None, or The sha1 hexdigest of the file (40 bytes) or link 3443 target of a symlink. 3444 """ 3445 try: 3446 minikind = _stat_to_minikind[stat_value.st_mode & 0o170000] 3447 except KeyError: 3448 # Unhandled kind 3449 return None 3450 packed_stat = pack_stat(stat_value) 3451 (saved_minikind, saved_link_or_sha1, saved_file_size, 3452 saved_executable, saved_packed_stat) = entry[1][0] 3453 if not isinstance(saved_minikind, bytes): 3454 raise TypeError(saved_minikind) 3455 3456 if minikind == b'd' and saved_minikind == b't': 3457 minikind = b't' 3458 if (minikind == saved_minikind 3459 and packed_stat == saved_packed_stat): 3460 # The stat hasn't changed since we saved, so we can re-use the 3461 # saved sha hash. 3462 if minikind == b'd': 3463 return None 3464 3465 # size should also be in packed_stat 3466 if saved_file_size == stat_value.st_size: 3467 return saved_link_or_sha1 3468 3469 # If we have gotten this far, that means that we need to actually 3470 # process this entry. 3471 link_or_sha1 = None 3472 worth_saving = True 3473 if minikind == b'f': 3474 executable = state._is_executable(stat_value.st_mode, 3475 saved_executable) 3476 if state._cutoff_time is None: 3477 state._sha_cutoff_time() 3478 if (stat_value.st_mtime < state._cutoff_time 3479 and stat_value.st_ctime < state._cutoff_time 3480 and len(entry[1]) > 1 3481 and entry[1][1][0] != b'a'): 3482 # Could check for size changes for further optimised 3483 # avoidance of sha1's. However the most prominent case of 3484 # over-shaing is during initial add, which this catches. 3485 # Besides, if content filtering happens, size and sha 3486 # are calculated at the same time, so checking just the size 3487 # gains nothing w.r.t. performance. 3488 link_or_sha1 = state._sha1_file(abspath) 3489 entry[1][0] = (b'f', link_or_sha1, stat_value.st_size, 3490 executable, packed_stat) 3491 else: 3492 entry[1][0] = (b'f', b'', stat_value.st_size, 3493 executable, DirState.NULLSTAT) 3494 worth_saving = False 3495 elif minikind == b'd': 3496 link_or_sha1 = None 3497 entry[1][0] = (b'd', b'', 0, False, packed_stat) 3498 if saved_minikind != b'd': 3499 # This changed from something into a directory. Make sure we 3500 # have a directory block for it. This doesn't happen very 3501 # often, so this doesn't have to be super fast. 3502 block_index, entry_index, dir_present, file_present = \ 3503 state._get_block_entry_index(entry[0][0], entry[0][1], 0) 3504 state._ensure_block(block_index, entry_index, 3505 osutils.pathjoin(entry[0][0], entry[0][1])) 3506 else: 3507 worth_saving = False 3508 elif minikind == b'l': 3509 if saved_minikind == b'l': 3510 worth_saving = False 3511 link_or_sha1 = state._read_link(abspath, saved_link_or_sha1) 3512 if state._cutoff_time is None: 3513 state._sha_cutoff_time() 3514 if (stat_value.st_mtime < state._cutoff_time 3515 and stat_value.st_ctime < state._cutoff_time): 3516 entry[1][0] = (b'l', link_or_sha1, stat_value.st_size, 3517 False, packed_stat) 3518 else: 3519 entry[1][0] = (b'l', b'', stat_value.st_size, 3520 False, DirState.NULLSTAT) 3521 if worth_saving: 3522 state._mark_modified([entry]) 3523 return link_or_sha1 3524 3525 3526class ProcessEntryPython(object): 3527 3528 __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", 3529 "last_source_parent", "last_target_parent", "include_unchanged", 3530 "partial", "use_filesystem_for_exec", "utf8_decode", 3531 "searched_specific_files", "search_specific_files", 3532 "searched_exact_paths", "search_specific_file_parents", "seen_ids", 3533 "state", "source_index", "target_index", "want_unversioned", "tree"] 3534 3535 def __init__(self, include_unchanged, use_filesystem_for_exec, 3536 search_specific_files, state, source_index, target_index, 3537 want_unversioned, tree): 3538 self.old_dirname_to_file_id = {} 3539 self.new_dirname_to_file_id = {} 3540 # Are we doing a partial iter_changes? 3541 self.partial = search_specific_files != {''} 3542 # Using a list so that we can access the values and change them in 3543 # nested scope. Each one is [path, file_id, entry] 3544 self.last_source_parent = [None, None] 3545 self.last_target_parent = [None, None] 3546 self.include_unchanged = include_unchanged 3547 self.use_filesystem_for_exec = use_filesystem_for_exec 3548 self.utf8_decode = cache_utf8._utf8_decode 3549 # for all search_indexs in each path at or under each element of 3550 # search_specific_files, if the detail is relocated: add the id, and 3551 # add the relocated path as one to search if its not searched already. 3552 # If the detail is not relocated, add the id. 3553 self.searched_specific_files = set() 3554 # When we search exact paths without expanding downwards, we record 3555 # that here. 3556 self.searched_exact_paths = set() 3557 self.search_specific_files = search_specific_files 3558 # The parents up to the root of the paths we are searching. 3559 # After all normal paths are returned, these specific items are returned. 3560 self.search_specific_file_parents = set() 3561 # The ids we've sent out in the delta. 3562 self.seen_ids = set() 3563 self.state = state 3564 self.source_index = source_index 3565 self.target_index = target_index 3566 if target_index != 0: 3567 # A lot of code in here depends on target_index == 0 3568 raise errors.BzrError('unsupported target index') 3569 self.want_unversioned = want_unversioned 3570 self.tree = tree 3571 3572 def _process_entry(self, entry, path_info, pathjoin=osutils.pathjoin): 3573 """Compare an entry and real disk to generate delta information. 3574 3575 :param path_info: top_relpath, basename, kind, lstat, abspath for 3576 the path of entry. If None, then the path is considered absent in 3577 the target (Perhaps we should pass in a concrete entry for this ?) 3578 Basename is returned as a utf8 string because we expect this 3579 tuple will be ignored, and don't want to take the time to 3580 decode. 3581 :return: (iter_changes_result, changed). If the entry has not been 3582 handled then changed is None. Otherwise it is False if no content 3583 or metadata changes have occurred, and True if any content or 3584 metadata change has occurred. If self.include_unchanged is True then 3585 if changed is not None, iter_changes_result will always be a result 3586 tuple. Otherwise, iter_changes_result is None unless changed is 3587 True. 3588 """ 3589 if self.source_index is None: 3590 source_details = DirState.NULL_PARENT_DETAILS 3591 else: 3592 source_details = entry[1][self.source_index] 3593 # GZ 2017-06-09: Eck, more sets. 3594 _fdltr = {b'f', b'd', b'l', b't', b'r'} 3595 _fdlt = {b'f', b'd', b'l', b't'} 3596 _ra = (b'r', b'a') 3597 target_details = entry[1][self.target_index] 3598 target_minikind = target_details[0] 3599 if path_info is not None and target_minikind in _fdlt: 3600 if not (self.target_index == 0): 3601 raise AssertionError() 3602 link_or_sha1 = update_entry(self.state, entry, 3603 abspath=path_info[4], stat_value=path_info[3]) 3604 # The entry may have been modified by update_entry 3605 target_details = entry[1][self.target_index] 3606 target_minikind = target_details[0] 3607 else: 3608 link_or_sha1 = None 3609 file_id = entry[0][2] 3610 source_minikind = source_details[0] 3611 if source_minikind in _fdltr and target_minikind in _fdlt: 3612 # claimed content in both: diff 3613 # r | fdlt | | add source to search, add id path move and perform 3614 # | | | diff check on source-target 3615 # r | fdlt | a | dangling file that was present in the basis. 3616 # | | | ??? 3617 if source_minikind == b'r': 3618 # add the source to the search path to find any children it 3619 # has. TODO ? : only add if it is a container ? 3620 if not osutils.is_inside_any(self.searched_specific_files, 3621 source_details[1]): 3622 self.search_specific_files.add(source_details[1]) 3623 # generate the old path; this is needed for stating later 3624 # as well. 3625 old_path = source_details[1] 3626 old_dirname, old_basename = os.path.split(old_path) 3627 path = pathjoin(entry[0][0], entry[0][1]) 3628 old_entry = self.state._get_entry(self.source_index, 3629 path_utf8=old_path) 3630 # update the source details variable to be the real 3631 # location. 3632 if old_entry == (None, None): 3633 raise DirstateCorrupt(self.state._filename, 3634 "entry '%s/%s' is considered renamed from %r" 3635 " but source does not exist\n" 3636 "entry: %s" % (entry[0][0], entry[0][1], old_path, entry)) 3637 source_details = old_entry[1][self.source_index] 3638 source_minikind = source_details[0] 3639 else: 3640 old_dirname = entry[0][0] 3641 old_basename = entry[0][1] 3642 old_path = path = None 3643 if path_info is None: 3644 # the file is missing on disk, show as removed. 3645 content_change = True 3646 target_kind = None 3647 target_exec = False 3648 else: 3649 # source and target are both versioned and disk file is present. 3650 target_kind = path_info[2] 3651 if target_kind == 'directory': 3652 if path is None: 3653 old_path = path = pathjoin(old_dirname, old_basename) 3654 self.new_dirname_to_file_id[path] = file_id 3655 if source_minikind != b'd': 3656 content_change = True 3657 else: 3658 # directories have no fingerprint 3659 content_change = False 3660 target_exec = False 3661 elif target_kind == 'file': 3662 if source_minikind != b'f': 3663 content_change = True 3664 else: 3665 # Check the sha. We can't just rely on the size as 3666 # content filtering may mean differ sizes actually 3667 # map to the same content 3668 if link_or_sha1 is None: 3669 # Stat cache miss: 3670 statvalue, link_or_sha1 = \ 3671 self.state._sha1_provider.stat_and_sha1( 3672 path_info[4]) 3673 self.state._observed_sha1(entry, link_or_sha1, 3674 statvalue) 3675 content_change = (link_or_sha1 != source_details[1]) 3676 # Target details is updated at update_entry time 3677 if self.use_filesystem_for_exec: 3678 # We don't need S_ISREG here, because we are sure 3679 # we are dealing with a file. 3680 target_exec = bool(stat.S_IEXEC & path_info[3].st_mode) 3681 else: 3682 target_exec = target_details[3] 3683 elif target_kind == 'symlink': 3684 if source_minikind != b'l': 3685 content_change = True 3686 else: 3687 content_change = (link_or_sha1 != source_details[1]) 3688 target_exec = False 3689 elif target_kind == 'tree-reference': 3690 if source_minikind != b't': 3691 content_change = True 3692 else: 3693 content_change = False 3694 target_exec = False 3695 else: 3696 if path is None: 3697 path = pathjoin(old_dirname, old_basename) 3698 raise errors.BadFileKindError(path, path_info[2]) 3699 if source_minikind == b'd': 3700 if path is None: 3701 old_path = path = pathjoin(old_dirname, old_basename) 3702 self.old_dirname_to_file_id[old_path] = file_id 3703 # parent id is the entry for the path in the target tree 3704 if old_basename and old_dirname == self.last_source_parent[0]: 3705 source_parent_id = self.last_source_parent[1] 3706 else: 3707 try: 3708 source_parent_id = self.old_dirname_to_file_id[old_dirname] 3709 except KeyError: 3710 source_parent_entry = self.state._get_entry(self.source_index, 3711 path_utf8=old_dirname) 3712 source_parent_id = source_parent_entry[0][2] 3713 if source_parent_id == entry[0][2]: 3714 # This is the root, so the parent is None 3715 source_parent_id = None 3716 else: 3717 self.last_source_parent[0] = old_dirname 3718 self.last_source_parent[1] = source_parent_id 3719 new_dirname = entry[0][0] 3720 if entry[0][1] and new_dirname == self.last_target_parent[0]: 3721 target_parent_id = self.last_target_parent[1] 3722 else: 3723 try: 3724 target_parent_id = self.new_dirname_to_file_id[new_dirname] 3725 except KeyError: 3726 # TODO: We don't always need to do the lookup, because the 3727 # parent entry will be the same as the source entry. 3728 target_parent_entry = self.state._get_entry(self.target_index, 3729 path_utf8=new_dirname) 3730 if target_parent_entry == (None, None): 3731 raise AssertionError( 3732 "Could not find target parent in wt: %s\nparent of: %s" 3733 % (new_dirname, entry)) 3734 target_parent_id = target_parent_entry[0][2] 3735 if target_parent_id == entry[0][2]: 3736 # This is the root, so the parent is None 3737 target_parent_id = None 3738 else: 3739 self.last_target_parent[0] = new_dirname 3740 self.last_target_parent[1] = target_parent_id 3741 3742 source_exec = source_details[3] 3743 changed = (content_change 3744 or source_parent_id != target_parent_id 3745 or old_basename != entry[0][1] 3746 or source_exec != target_exec 3747 ) 3748 if not changed and not self.include_unchanged: 3749 return None, False 3750 else: 3751 if old_path is None: 3752 old_path = path = pathjoin(old_dirname, old_basename) 3753 old_path_u = self.utf8_decode(old_path)[0] 3754 path_u = old_path_u 3755 else: 3756 old_path_u = self.utf8_decode(old_path)[0] 3757 if old_path == path: 3758 path_u = old_path_u 3759 else: 3760 path_u = self.utf8_decode(path)[0] 3761 source_kind = DirState._minikind_to_kind[source_minikind] 3762 return InventoryTreeChange( 3763 entry[0][2], 3764 (old_path_u, path_u), 3765 content_change, 3766 (True, True), 3767 (source_parent_id, target_parent_id), 3768 (self.utf8_decode(old_basename)[ 3769 0], self.utf8_decode(entry[0][1])[0]), 3770 (source_kind, target_kind), 3771 (source_exec, target_exec)), changed 3772 elif source_minikind in b'a' and target_minikind in _fdlt: 3773 # looks like a new file 3774 path = pathjoin(entry[0][0], entry[0][1]) 3775 # parent id is the entry for the path in the target tree 3776 # TODO: these are the same for an entire directory: cache em. 3777 parent_id = self.state._get_entry(self.target_index, 3778 path_utf8=entry[0][0])[0][2] 3779 if parent_id == entry[0][2]: 3780 parent_id = None 3781 if path_info is not None: 3782 # Present on disk: 3783 if self.use_filesystem_for_exec: 3784 # We need S_ISREG here, because we aren't sure if this 3785 # is a file or not. 3786 target_exec = bool( 3787 stat.S_ISREG(path_info[3].st_mode) 3788 and stat.S_IEXEC & path_info[3].st_mode) 3789 else: 3790 target_exec = target_details[3] 3791 return InventoryTreeChange( 3792 entry[0][2], 3793 (None, self.utf8_decode(path)[0]), 3794 True, 3795 (False, True), 3796 (None, parent_id), 3797 (None, self.utf8_decode(entry[0][1])[0]), 3798 (None, path_info[2]), 3799 (None, target_exec)), True 3800 else: 3801 # Its a missing file, report it as such. 3802 return InventoryTreeChange( 3803 entry[0][2], 3804 (None, self.utf8_decode(path)[0]), 3805 False, 3806 (False, True), 3807 (None, parent_id), 3808 (None, self.utf8_decode(entry[0][1])[0]), 3809 (None, None), 3810 (None, False)), True 3811 elif source_minikind in _fdlt and target_minikind in b'a': 3812 # unversioned, possibly, or possibly not deleted: we dont care. 3813 # if its still on disk, *and* theres no other entry at this 3814 # path [we dont know this in this routine at the moment - 3815 # perhaps we should change this - then it would be an unknown. 3816 old_path = pathjoin(entry[0][0], entry[0][1]) 3817 # parent id is the entry for the path in the target tree 3818 parent_id = self.state._get_entry( 3819 self.source_index, path_utf8=entry[0][0])[0][2] 3820 if parent_id == entry[0][2]: 3821 parent_id = None 3822 return InventoryTreeChange( 3823 entry[0][2], 3824 (self.utf8_decode(old_path)[0], None), 3825 True, 3826 (True, False), 3827 (parent_id, None), 3828 (self.utf8_decode(entry[0][1])[0], None), 3829 (DirState._minikind_to_kind[source_minikind], None), 3830 (source_details[3], None)), True 3831 elif source_minikind in _fdlt and target_minikind in b'r': 3832 # a rename; could be a true rename, or a rename inherited from 3833 # a renamed parent. TODO: handle this efficiently. Its not 3834 # common case to rename dirs though, so a correct but slow 3835 # implementation will do. 3836 if not osutils.is_inside_any(self.searched_specific_files, 3837 target_details[1]): 3838 self.search_specific_files.add(target_details[1]) 3839 elif source_minikind in _ra and target_minikind in _ra: 3840 # neither of the selected trees contain this file, 3841 # so skip over it. This is not currently directly tested, but 3842 # is indirectly via test_too_much.TestCommands.test_conflicts. 3843 pass 3844 else: 3845 raise AssertionError("don't know how to compare " 3846 "source_minikind=%r, target_minikind=%r" 3847 % (source_minikind, target_minikind)) 3848 return None, None 3849 3850 def __iter__(self): 3851 return self 3852 3853 def _gather_result_for_consistency(self, result): 3854 """Check a result we will yield to make sure we are consistent later. 3855 3856 This gathers result's parents into a set to output later. 3857 3858 :param result: A result tuple. 3859 """ 3860 if not self.partial or not result.file_id: 3861 return 3862 self.seen_ids.add(result.file_id) 3863 new_path = result.path[1] 3864 if new_path: 3865 # Not the root and not a delete: queue up the parents of the path. 3866 self.search_specific_file_parents.update( 3867 p.encode('utf8') for p in osutils.parent_directories(new_path)) 3868 # Add the root directory which parent_directories does not 3869 # provide. 3870 self.search_specific_file_parents.add(b'') 3871 3872 def iter_changes(self): 3873 """Iterate over the changes.""" 3874 utf8_decode = cache_utf8._utf8_decode 3875 _lt_by_dirs = lt_by_dirs 3876 _process_entry = self._process_entry 3877 search_specific_files = self.search_specific_files 3878 searched_specific_files = self.searched_specific_files 3879 splitpath = osutils.splitpath 3880 # sketch: 3881 # compare source_index and target_index at or under each element of search_specific_files. 3882 # follow the following comparison table. Note that we only want to do diff operations when 3883 # the target is fdl because thats when the walkdirs logic will have exposed the pathinfo 3884 # for the target. 3885 # cases: 3886 # 3887 # Source | Target | disk | action 3888 # r | fdlt | | add source to search, add id path move and perform 3889 # | | | diff check on source-target 3890 # r | fdlt | a | dangling file that was present in the basis. 3891 # | | | ??? 3892 # r | a | | add source to search 3893 # r | a | a | 3894 # r | r | | this path is present in a non-examined tree, skip. 3895 # r | r | a | this path is present in a non-examined tree, skip. 3896 # a | fdlt | | add new id 3897 # a | fdlt | a | dangling locally added file, skip 3898 # a | a | | not present in either tree, skip 3899 # a | a | a | not present in any tree, skip 3900 # a | r | | not present in either tree at this path, skip as it 3901 # | | | may not be selected by the users list of paths. 3902 # a | r | a | not present in either tree at this path, skip as it 3903 # | | | may not be selected by the users list of paths. 3904 # fdlt | fdlt | | content in both: diff them 3905 # fdlt | fdlt | a | deleted locally, but not unversioned - show as deleted ? 3906 # fdlt | a | | unversioned: output deleted id for now 3907 # fdlt | a | a | unversioned and deleted: output deleted id 3908 # fdlt | r | | relocated in this tree, so add target to search. 3909 # | | | Dont diff, we will see an r,fd; pair when we reach 3910 # | | | this id at the other path. 3911 # fdlt | r | a | relocated in this tree, so add target to search. 3912 # | | | Dont diff, we will see an r,fd; pair when we reach 3913 # | | | this id at the other path. 3914 3915 # TODO: jam 20070516 - Avoid the _get_entry lookup overhead by 3916 # keeping a cache of directories that we have seen. 3917 3918 while search_specific_files: 3919 # TODO: the pending list should be lexically sorted? the 3920 # interface doesn't require it. 3921 current_root = search_specific_files.pop() 3922 current_root_unicode = current_root.decode('utf8') 3923 searched_specific_files.add(current_root) 3924 # process the entries for this containing directory: the rest will be 3925 # found by their parents recursively. 3926 root_entries = self.state._entries_for_path(current_root) 3927 root_abspath = self.tree.abspath(current_root_unicode) 3928 try: 3929 root_stat = os.lstat(root_abspath) 3930 except OSError as e: 3931 if e.errno == errno.ENOENT: 3932 # the path does not exist: let _process_entry know that. 3933 root_dir_info = None 3934 else: 3935 # some other random error: hand it up. 3936 raise 3937 else: 3938 root_dir_info = (b'', current_root, 3939 osutils.file_kind_from_stat_mode( 3940 root_stat.st_mode), root_stat, 3941 root_abspath) 3942 if root_dir_info[2] == 'directory': 3943 if self.tree._directory_is_tree_reference( 3944 current_root.decode('utf8')): 3945 root_dir_info = root_dir_info[:2] + \ 3946 ('tree-reference',) + root_dir_info[3:] 3947 3948 if not root_entries and not root_dir_info: 3949 # this specified path is not present at all, skip it. 3950 continue 3951 path_handled = False 3952 for entry in root_entries: 3953 result, changed = _process_entry(entry, root_dir_info) 3954 if changed is not None: 3955 path_handled = True 3956 if changed: 3957 self._gather_result_for_consistency(result) 3958 if changed or self.include_unchanged: 3959 yield result 3960 if self.want_unversioned and not path_handled and root_dir_info: 3961 new_executable = bool( 3962 stat.S_ISREG(root_dir_info[3].st_mode) 3963 and stat.S_IEXEC & root_dir_info[3].st_mode) 3964 yield InventoryTreeChange( 3965 None, 3966 (None, current_root_unicode), 3967 True, 3968 (False, False), 3969 (None, None), 3970 (None, splitpath(current_root_unicode)[-1]), 3971 (None, root_dir_info[2]), 3972 (None, new_executable) 3973 ) 3974 initial_key = (current_root, b'', b'') 3975 block_index, _ = self.state._find_block_index_from_key(initial_key) 3976 if block_index == 0: 3977 # we have processed the total root already, but because the 3978 # initial key matched it we should skip it here. 3979 block_index += 1 3980 if root_dir_info and root_dir_info[2] == 'tree-reference': 3981 current_dir_info = None 3982 else: 3983 dir_iterator = osutils._walkdirs_utf8( 3984 root_abspath, prefix=current_root) 3985 try: 3986 current_dir_info = next(dir_iterator) 3987 except OSError as e: 3988 # on win32, python2.4 has e.errno == ERROR_DIRECTORY, but 3989 # python 2.5 has e.errno == EINVAL, 3990 # and e.winerror == ERROR_DIRECTORY 3991 e_winerror = getattr(e, 'winerror', None) 3992 win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND) 3993 # there may be directories in the inventory even though 3994 # this path is not a file on disk: so mark it as end of 3995 # iterator 3996 if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL): 3997 current_dir_info = None 3998 elif (sys.platform == 'win32' 3999 and (e.errno in win_errors or 4000 e_winerror in win_errors)): 4001 current_dir_info = None 4002 else: 4003 raise 4004 else: 4005 if current_dir_info[0][0] == b'': 4006 # remove .bzr from iteration 4007 bzr_index = bisect.bisect_left( 4008 current_dir_info[1], (b'.bzr',)) 4009 if current_dir_info[1][bzr_index][0] != b'.bzr': 4010 raise AssertionError() 4011 del current_dir_info[1][bzr_index] 4012 # walk until both the directory listing and the versioned metadata 4013 # are exhausted. 4014 if (block_index < len(self.state._dirblocks) and 4015 osutils.is_inside(current_root, 4016 self.state._dirblocks[block_index][0])): 4017 current_block = self.state._dirblocks[block_index] 4018 else: 4019 current_block = None 4020 while (current_dir_info is not None or 4021 current_block is not None): 4022 if (current_dir_info and current_block 4023 and current_dir_info[0][0] != current_block[0]): 4024 if _lt_by_dirs(current_dir_info[0][0], current_block[0]): 4025 # filesystem data refers to paths not covered by the dirblock. 4026 # this has two possibilities: 4027 # A) it is versioned but empty, so there is no block for it 4028 # B) it is not versioned. 4029 4030 # if (A) then we need to recurse into it to check for 4031 # new unknown files or directories. 4032 # if (B) then we should ignore it, because we don't 4033 # recurse into unknown directories. 4034 path_index = 0 4035 while path_index < len(current_dir_info[1]): 4036 current_path_info = current_dir_info[1][path_index] 4037 if self.want_unversioned: 4038 if current_path_info[2] == 'directory': 4039 if self.tree._directory_is_tree_reference( 4040 current_path_info[0].decode('utf8')): 4041 current_path_info = current_path_info[:2] + \ 4042 ('tree-reference',) + \ 4043 current_path_info[3:] 4044 new_executable = bool( 4045 stat.S_ISREG(current_path_info[3].st_mode) 4046 and stat.S_IEXEC & current_path_info[3].st_mode) 4047 yield InventoryTreeChange( 4048 None, 4049 (None, utf8_decode(current_path_info[0])[0]), 4050 True, 4051 (False, False), 4052 (None, None), 4053 (None, utf8_decode(current_path_info[1])[0]), 4054 (None, current_path_info[2]), 4055 (None, new_executable)) 4056 # dont descend into this unversioned path if it is 4057 # a dir 4058 if current_path_info[2] in ('directory', 4059 'tree-reference'): 4060 del current_dir_info[1][path_index] 4061 path_index -= 1 4062 path_index += 1 4063 4064 # This dir info has been handled, go to the next 4065 try: 4066 current_dir_info = next(dir_iterator) 4067 except StopIteration: 4068 current_dir_info = None 4069 else: 4070 # We have a dirblock entry for this location, but there 4071 # is no filesystem path for this. This is most likely 4072 # because a directory was removed from the disk. 4073 # We don't have to report the missing directory, 4074 # because that should have already been handled, but we 4075 # need to handle all of the files that are contained 4076 # within. 4077 for current_entry in current_block[1]: 4078 # entry referring to file not present on disk. 4079 # advance the entry only, after processing. 4080 result, changed = _process_entry( 4081 current_entry, None) 4082 if changed is not None: 4083 if changed: 4084 self._gather_result_for_consistency(result) 4085 if changed or self.include_unchanged: 4086 yield result 4087 block_index += 1 4088 if (block_index < len(self.state._dirblocks) and 4089 osutils.is_inside(current_root, 4090 self.state._dirblocks[block_index][0])): 4091 current_block = self.state._dirblocks[block_index] 4092 else: 4093 current_block = None 4094 continue 4095 entry_index = 0 4096 if current_block and entry_index < len(current_block[1]): 4097 current_entry = current_block[1][entry_index] 4098 else: 4099 current_entry = None 4100 advance_entry = True 4101 path_index = 0 4102 if current_dir_info and path_index < len(current_dir_info[1]): 4103 current_path_info = current_dir_info[1][path_index] 4104 if current_path_info[2] == 'directory': 4105 if self.tree._directory_is_tree_reference( 4106 current_path_info[0].decode('utf8')): 4107 current_path_info = current_path_info[:2] + \ 4108 ('tree-reference',) + current_path_info[3:] 4109 else: 4110 current_path_info = None 4111 advance_path = True 4112 path_handled = False 4113 while (current_entry is not None or 4114 current_path_info is not None): 4115 if current_entry is None: 4116 # the check for path_handled when the path is advanced 4117 # will yield this path if needed. 4118 pass 4119 elif current_path_info is None: 4120 # no path is fine: the per entry code will handle it. 4121 result, changed = _process_entry( 4122 current_entry, current_path_info) 4123 if changed is not None: 4124 if changed: 4125 self._gather_result_for_consistency(result) 4126 if changed or self.include_unchanged: 4127 yield result 4128 elif (current_entry[0][1] != current_path_info[1] 4129 or current_entry[1][self.target_index][0] in (b'a', b'r')): 4130 # The current path on disk doesn't match the dirblock 4131 # record. Either the dirblock is marked as absent, or 4132 # the file on disk is not present at all in the 4133 # dirblock. Either way, report about the dirblock 4134 # entry, and let other code handle the filesystem one. 4135 4136 # Compare the basename for these files to determine 4137 # which comes first 4138 if current_path_info[1] < current_entry[0][1]: 4139 # extra file on disk: pass for now, but only 4140 # increment the path, not the entry 4141 advance_entry = False 4142 else: 4143 # entry referring to file not present on disk. 4144 # advance the entry only, after processing. 4145 result, changed = _process_entry( 4146 current_entry, None) 4147 if changed is not None: 4148 if changed: 4149 self._gather_result_for_consistency(result) 4150 if changed or self.include_unchanged: 4151 yield result 4152 advance_path = False 4153 else: 4154 result, changed = _process_entry( 4155 current_entry, current_path_info) 4156 if changed is not None: 4157 path_handled = True 4158 if changed: 4159 self._gather_result_for_consistency(result) 4160 if changed or self.include_unchanged: 4161 yield result 4162 if advance_entry and current_entry is not None: 4163 entry_index += 1 4164 if entry_index < len(current_block[1]): 4165 current_entry = current_block[1][entry_index] 4166 else: 4167 current_entry = None 4168 else: 4169 advance_entry = True # reset the advance flaga 4170 if advance_path and current_path_info is not None: 4171 if not path_handled: 4172 # unversioned in all regards 4173 if self.want_unversioned: 4174 new_executable = bool( 4175 stat.S_ISREG(current_path_info[3].st_mode) 4176 and stat.S_IEXEC & current_path_info[3].st_mode) 4177 try: 4178 relpath_unicode = utf8_decode( 4179 current_path_info[0])[0] 4180 except UnicodeDecodeError: 4181 raise errors.BadFilenameEncoding( 4182 current_path_info[0], osutils._fs_enc) 4183 yield InventoryTreeChange( 4184 None, 4185 (None, relpath_unicode), 4186 True, 4187 (False, False), 4188 (None, None), 4189 (None, utf8_decode(current_path_info[1])[0]), 4190 (None, current_path_info[2]), 4191 (None, new_executable)) 4192 # dont descend into this unversioned path if it is 4193 # a dir 4194 if current_path_info[2] in ('directory'): 4195 del current_dir_info[1][path_index] 4196 path_index -= 1 4197 # dont descend the disk iterator into any tree 4198 # paths. 4199 if current_path_info[2] == 'tree-reference': 4200 del current_dir_info[1][path_index] 4201 path_index -= 1 4202 path_index += 1 4203 if path_index < len(current_dir_info[1]): 4204 current_path_info = current_dir_info[1][path_index] 4205 if current_path_info[2] == 'directory': 4206 if self.tree._directory_is_tree_reference( 4207 current_path_info[0].decode('utf8')): 4208 current_path_info = current_path_info[:2] + \ 4209 ('tree-reference',) + \ 4210 current_path_info[3:] 4211 else: 4212 current_path_info = None 4213 path_handled = False 4214 else: 4215 advance_path = True # reset the advance flagg. 4216 if current_block is not None: 4217 block_index += 1 4218 if (block_index < len(self.state._dirblocks) and 4219 osutils.is_inside(current_root, 4220 self.state._dirblocks[block_index][0])): 4221 current_block = self.state._dirblocks[block_index] 4222 else: 4223 current_block = None 4224 if current_dir_info is not None: 4225 try: 4226 current_dir_info = next(dir_iterator) 4227 except StopIteration: 4228 current_dir_info = None 4229 for result in self._iter_specific_file_parents(): 4230 yield result 4231 4232 def _iter_specific_file_parents(self): 4233 """Iter over the specific file parents.""" 4234 while self.search_specific_file_parents: 4235 # Process the parent directories for the paths we were iterating. 4236 # Even in extremely large trees this should be modest, so currently 4237 # no attempt is made to optimise. 4238 path_utf8 = self.search_specific_file_parents.pop() 4239 if osutils.is_inside_any(self.searched_specific_files, path_utf8): 4240 # We've examined this path. 4241 continue 4242 if path_utf8 in self.searched_exact_paths: 4243 # We've examined this path. 4244 continue 4245 path_entries = self.state._entries_for_path(path_utf8) 4246 # We need either one or two entries. If the path in 4247 # self.target_index has moved (so the entry in source_index is in 4248 # 'ar') then we need to also look for the entry for this path in 4249 # self.source_index, to output the appropriate delete-or-rename. 4250 selected_entries = [] 4251 found_item = False 4252 for candidate_entry in path_entries: 4253 # Find entries present in target at this path: 4254 if candidate_entry[1][self.target_index][0] not in (b'a', b'r'): 4255 found_item = True 4256 selected_entries.append(candidate_entry) 4257 # Find entries present in source at this path: 4258 elif (self.source_index is not None and 4259 candidate_entry[1][self.source_index][0] not in (b'a', b'r')): 4260 found_item = True 4261 if candidate_entry[1][self.target_index][0] == b'a': 4262 # Deleted, emit it here. 4263 selected_entries.append(candidate_entry) 4264 else: 4265 # renamed, emit it when we process the directory it 4266 # ended up at. 4267 self.search_specific_file_parents.add( 4268 candidate_entry[1][self.target_index][1]) 4269 if not found_item: 4270 raise AssertionError( 4271 "Missing entry for specific path parent %r, %r" % ( 4272 path_utf8, path_entries)) 4273 path_info = self._path_info(path_utf8, path_utf8.decode('utf8')) 4274 for entry in selected_entries: 4275 if entry[0][2] in self.seen_ids: 4276 continue 4277 result, changed = self._process_entry(entry, path_info) 4278 if changed is None: 4279 raise AssertionError( 4280 "Got entry<->path mismatch for specific path " 4281 "%r entry %r path_info %r " % ( 4282 path_utf8, entry, path_info)) 4283 # Only include changes - we're outside the users requested 4284 # expansion. 4285 if changed: 4286 self._gather_result_for_consistency(result) 4287 if (result.kind[0] == 'directory' and 4288 result.kind[1] != 'directory'): 4289 # This stopped being a directory, the old children have 4290 # to be included. 4291 if entry[1][self.source_index][0] == b'r': 4292 # renamed, take the source path 4293 entry_path_utf8 = entry[1][self.source_index][1] 4294 else: 4295 entry_path_utf8 = path_utf8 4296 initial_key = (entry_path_utf8, b'', b'') 4297 block_index, _ = self.state._find_block_index_from_key( 4298 initial_key) 4299 if block_index == 0: 4300 # The children of the root are in block index 1. 4301 block_index += 1 4302 current_block = None 4303 if block_index < len(self.state._dirblocks): 4304 current_block = self.state._dirblocks[block_index] 4305 if not osutils.is_inside( 4306 entry_path_utf8, current_block[0]): 4307 # No entries for this directory at all. 4308 current_block = None 4309 if current_block is not None: 4310 for entry in current_block[1]: 4311 if entry[1][self.source_index][0] in (b'a', b'r'): 4312 # Not in the source tree, so doesn't have to be 4313 # included. 4314 continue 4315 # Path of the entry itself. 4316 4317 self.search_specific_file_parents.add( 4318 osutils.pathjoin(*entry[0][:2])) 4319 if changed or self.include_unchanged: 4320 yield result 4321 self.searched_exact_paths.add(path_utf8) 4322 4323 def _path_info(self, utf8_path, unicode_path): 4324 """Generate path_info for unicode_path. 4325 4326 :return: None if unicode_path does not exist, or a path_info tuple. 4327 """ 4328 abspath = self.tree.abspath(unicode_path) 4329 try: 4330 stat = os.lstat(abspath) 4331 except OSError as e: 4332 if e.errno == errno.ENOENT: 4333 # the path does not exist. 4334 return None 4335 else: 4336 raise 4337 utf8_basename = utf8_path.rsplit(b'/', 1)[-1] 4338 dir_info = (utf8_path, utf8_basename, 4339 osutils.file_kind_from_stat_mode(stat.st_mode), stat, 4340 abspath) 4341 if dir_info[2] == 'directory': 4342 if self.tree._directory_is_tree_reference( 4343 unicode_path): 4344 self.root_dir_info = self.root_dir_info[:2] + \ 4345 ('tree-reference',) + self.root_dir_info[3:] 4346 return dir_info 4347 4348 4349# Try to load the compiled form if possible 4350try: 4351 from ._dirstate_helpers_pyx import ( 4352 _read_dirblocks, 4353 bisect_dirblock, 4354 _bisect_path_left, 4355 _bisect_path_right, 4356 lt_by_dirs, 4357 pack_stat, 4358 ProcessEntryC as _process_entry, 4359 update_entry as update_entry, 4360 ) 4361except ImportError as e: 4362 osutils.failed_to_load_extension(e) 4363 from ._dirstate_helpers_py import ( 4364 _read_dirblocks, 4365 bisect_dirblock, 4366 _bisect_path_left, 4367 _bisect_path_right, 4368 lt_by_dirs, 4369 pack_stat, 4370 ) 4371 # FIXME: It would be nice to be able to track moved lines so that the 4372 # corresponding python code can be moved to the _dirstate_helpers_py 4373 # module. I don't want to break the history for this important piece of 4374 # code so I left the code here -- vila 20090622 4375 update_entry = py_update_entry 4376 _process_entry = ProcessEntryPython 4377