1# dirstatemap.py 2# 3# This software may be used and distributed according to the terms of the 4# GNU General Public License version 2 or any later version. 5 6from __future__ import absolute_import 7 8import errno 9 10from .i18n import _ 11 12from . import ( 13 error, 14 pathutil, 15 policy, 16 pycompat, 17 txnutil, 18 util, 19) 20 21from .dirstateutils import ( 22 docket as docketmod, 23 v2, 24) 25 26parsers = policy.importmod('parsers') 27rustmod = policy.importrust('dirstate') 28 29propertycache = util.propertycache 30 31if rustmod is None: 32 DirstateItem = parsers.DirstateItem 33else: 34 DirstateItem = rustmod.DirstateItem 35 36rangemask = 0x7FFFFFFF 37 38 39class _dirstatemapcommon(object): 40 """ 41 Methods that are identical for both implementations of the dirstatemap 42 class, with and without Rust extensions enabled. 43 """ 44 45 # please pytype 46 47 _map = None 48 copymap = None 49 50 def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2): 51 self._use_dirstate_v2 = use_dirstate_v2 52 self._nodeconstants = nodeconstants 53 self._ui = ui 54 self._opener = opener 55 self._root = root 56 self._filename = b'dirstate' 57 self._nodelen = 20 # Also update Rust code when changing this! 58 self._parents = None 59 self._dirtyparents = False 60 self._docket = None 61 62 # for consistent view between _pl() and _read() invocations 63 self._pendingmode = None 64 65 def preload(self): 66 """Loads the underlying data, if it's not already loaded""" 67 self._map 68 69 def get(self, key, default=None): 70 return self._map.get(key, default) 71 72 def __len__(self): 73 return len(self._map) 74 75 def __iter__(self): 76 return iter(self._map) 77 78 def __contains__(self, key): 79 return key in self._map 80 81 def __getitem__(self, item): 82 return self._map[item] 83 84 ### sub-class utility method 85 # 86 # Use to allow for generic implementation of some method while still coping 87 # with minor difference between implementation. 88 89 def _dirs_incr(self, filename, old_entry=None): 90 """incremente the dirstate counter if applicable 91 92 This might be a no-op for some subclass who deal with directory 93 tracking in a different way. 94 """ 95 96 def _dirs_decr(self, filename, old_entry=None, remove_variant=False): 97 """decremente the dirstate counter if applicable 98 99 This might be a no-op for some subclass who deal with directory 100 tracking in a different way. 101 """ 102 103 def _refresh_entry(self, f, entry): 104 """record updated state of an entry""" 105 106 def _insert_entry(self, f, entry): 107 """add a new dirstate entry (or replace an unrelated one) 108 109 The fact it is actually new is the responsability of the caller 110 """ 111 112 def _drop_entry(self, f): 113 """remove any entry for file f 114 115 This should also drop associated copy information 116 117 The fact we actually need to drop it is the responsability of the caller""" 118 119 ### method to manipulate the entries 120 121 def set_possibly_dirty(self, filename): 122 """record that the current state of the file on disk is unknown""" 123 entry = self[filename] 124 entry.set_possibly_dirty() 125 self._refresh_entry(filename, entry) 126 127 def set_clean(self, filename, mode, size, mtime): 128 """mark a file as back to a clean state""" 129 entry = self[filename] 130 size = size & rangemask 131 entry.set_clean(mode, size, mtime) 132 self._refresh_entry(filename, entry) 133 self.copymap.pop(filename, None) 134 135 def set_tracked(self, filename): 136 new = False 137 entry = self.get(filename) 138 if entry is None: 139 self._dirs_incr(filename) 140 entry = DirstateItem( 141 wc_tracked=True, 142 ) 143 144 self._insert_entry(filename, entry) 145 new = True 146 elif not entry.tracked: 147 self._dirs_incr(filename, entry) 148 entry.set_tracked() 149 self._refresh_entry(filename, entry) 150 new = True 151 else: 152 # XXX This is probably overkill for more case, but we need this to 153 # fully replace the `normallookup` call with `set_tracked` one. 154 # Consider smoothing this in the future. 155 entry.set_possibly_dirty() 156 self._refresh_entry(filename, entry) 157 return new 158 159 def set_untracked(self, f): 160 """Mark a file as no longer tracked in the dirstate map""" 161 entry = self.get(f) 162 if entry is None: 163 return False 164 else: 165 self._dirs_decr(f, old_entry=entry, remove_variant=not entry.added) 166 if not entry.p2_info: 167 self.copymap.pop(f, None) 168 entry.set_untracked() 169 self._refresh_entry(f, entry) 170 return True 171 172 def reset_state( 173 self, 174 filename, 175 wc_tracked=False, 176 p1_tracked=False, 177 p2_info=False, 178 has_meaningful_mtime=True, 179 has_meaningful_data=True, 180 parentfiledata=None, 181 ): 182 """Set a entry to a given state, diregarding all previous state 183 184 This is to be used by the part of the dirstate API dedicated to 185 adjusting the dirstate after a update/merge. 186 187 note: calling this might result to no entry existing at all if the 188 dirstate map does not see any point at having one for this file 189 anymore. 190 """ 191 # copy information are now outdated 192 # (maybe new information should be in directly passed to this function) 193 self.copymap.pop(filename, None) 194 195 if not (p1_tracked or p2_info or wc_tracked): 196 old_entry = self._map.get(filename) 197 self._drop_entry(filename) 198 self._dirs_decr(filename, old_entry=old_entry) 199 return 200 201 old_entry = self._map.get(filename) 202 self._dirs_incr(filename, old_entry) 203 entry = DirstateItem( 204 wc_tracked=wc_tracked, 205 p1_tracked=p1_tracked, 206 p2_info=p2_info, 207 has_meaningful_mtime=has_meaningful_mtime, 208 parentfiledata=parentfiledata, 209 ) 210 self._insert_entry(filename, entry) 211 212 ### disk interaction 213 214 def _opendirstatefile(self): 215 fp, mode = txnutil.trypending(self._root, self._opener, self._filename) 216 if self._pendingmode is not None and self._pendingmode != mode: 217 fp.close() 218 raise error.Abort( 219 _(b'working directory state may be changed parallelly') 220 ) 221 self._pendingmode = mode 222 return fp 223 224 def _readdirstatefile(self, size=-1): 225 try: 226 with self._opendirstatefile() as fp: 227 return fp.read(size) 228 except IOError as err: 229 if err.errno != errno.ENOENT: 230 raise 231 # File doesn't exist, so the current state is empty 232 return b'' 233 234 @property 235 def docket(self): 236 if not self._docket: 237 if not self._use_dirstate_v2: 238 raise error.ProgrammingError( 239 b'dirstate only has a docket in v2 format' 240 ) 241 self._docket = docketmod.DirstateDocket.parse( 242 self._readdirstatefile(), self._nodeconstants 243 ) 244 return self._docket 245 246 def write_v2_no_append(self, tr, st, meta, packed): 247 old_docket = self.docket 248 new_docket = docketmod.DirstateDocket.with_new_uuid( 249 self.parents(), len(packed), meta 250 ) 251 data_filename = new_docket.data_filename() 252 if tr: 253 tr.add(data_filename, 0) 254 self._opener.write(data_filename, packed) 255 # Write the new docket after the new data file has been 256 # written. Because `st` was opened with `atomictemp=True`, 257 # the actual `.hg/dirstate` file is only affected on close. 258 st.write(new_docket.serialize()) 259 st.close() 260 # Remove the old data file after the new docket pointing to 261 # the new data file was written. 262 if old_docket.uuid: 263 data_filename = old_docket.data_filename() 264 unlink = lambda _tr=None: self._opener.unlink(data_filename) 265 if tr: 266 category = b"dirstate-v2-clean-" + old_docket.uuid 267 tr.addpostclose(category, unlink) 268 else: 269 unlink() 270 self._docket = new_docket 271 272 ### reading/setting parents 273 274 def parents(self): 275 if not self._parents: 276 if self._use_dirstate_v2: 277 self._parents = self.docket.parents 278 else: 279 read_len = self._nodelen * 2 280 st = self._readdirstatefile(read_len) 281 l = len(st) 282 if l == read_len: 283 self._parents = ( 284 st[: self._nodelen], 285 st[self._nodelen : 2 * self._nodelen], 286 ) 287 elif l == 0: 288 self._parents = ( 289 self._nodeconstants.nullid, 290 self._nodeconstants.nullid, 291 ) 292 else: 293 raise error.Abort( 294 _(b'working directory state appears damaged!') 295 ) 296 297 return self._parents 298 299 300class dirstatemap(_dirstatemapcommon): 301 """Map encapsulating the dirstate's contents. 302 303 The dirstate contains the following state: 304 305 - `identity` is the identity of the dirstate file, which can be used to 306 detect when changes have occurred to the dirstate file. 307 308 - `parents` is a pair containing the parents of the working copy. The 309 parents are updated by calling `setparents`. 310 311 - the state map maps filenames to tuples of (state, mode, size, mtime), 312 where state is a single character representing 'normal', 'added', 313 'removed', or 'merged'. It is read by treating the dirstate as a 314 dict. File state is updated by calling various methods (see each 315 documentation for details): 316 317 - `reset_state`, 318 - `set_tracked` 319 - `set_untracked` 320 - `set_clean` 321 - `set_possibly_dirty` 322 323 - `copymap` maps destination filenames to their source filename. 324 325 The dirstate also provides the following views onto the state: 326 327 - `filefoldmap` is a dict mapping normalized filenames to the denormalized 328 form that they appear as in the dirstate. 329 330 - `dirfoldmap` is a dict mapping normalized directory names to the 331 denormalized form that they appear as in the dirstate. 332 """ 333 334 ### Core data storage and access 335 336 @propertycache 337 def _map(self): 338 self._map = {} 339 self.read() 340 return self._map 341 342 @propertycache 343 def copymap(self): 344 self.copymap = {} 345 self._map 346 return self.copymap 347 348 def clear(self): 349 self._map.clear() 350 self.copymap.clear() 351 self.setparents(self._nodeconstants.nullid, self._nodeconstants.nullid) 352 util.clearcachedproperty(self, b"_dirs") 353 util.clearcachedproperty(self, b"_alldirs") 354 util.clearcachedproperty(self, b"filefoldmap") 355 util.clearcachedproperty(self, b"dirfoldmap") 356 357 def items(self): 358 return pycompat.iteritems(self._map) 359 360 # forward for python2,3 compat 361 iteritems = items 362 363 def debug_iter(self, all): 364 """ 365 Return an iterator of (filename, state, mode, size, mtime) tuples 366 367 `all` is unused when Rust is not enabled 368 """ 369 for (filename, item) in self.items(): 370 yield (filename, item.state, item.mode, item.size, item.mtime) 371 372 def keys(self): 373 return self._map.keys() 374 375 ### reading/setting parents 376 377 def setparents(self, p1, p2, fold_p2=False): 378 self._parents = (p1, p2) 379 self._dirtyparents = True 380 copies = {} 381 if fold_p2: 382 for f, s in pycompat.iteritems(self._map): 383 # Discard "merged" markers when moving away from a merge state 384 if s.p2_info: 385 source = self.copymap.pop(f, None) 386 if source: 387 copies[f] = source 388 s.drop_merge_data() 389 return copies 390 391 ### disk interaction 392 393 def read(self): 394 # ignore HG_PENDING because identity is used only for writing 395 self.identity = util.filestat.frompath( 396 self._opener.join(self._filename) 397 ) 398 399 if self._use_dirstate_v2: 400 if not self.docket.uuid: 401 return 402 st = self._opener.read(self.docket.data_filename()) 403 else: 404 st = self._readdirstatefile() 405 406 if not st: 407 return 408 409 # TODO: adjust this estimate for dirstate-v2 410 if util.safehasattr(parsers, b'dict_new_presized'): 411 # Make an estimate of the number of files in the dirstate based on 412 # its size. This trades wasting some memory for avoiding costly 413 # resizes. Each entry have a prefix of 17 bytes followed by one or 414 # two path names. Studies on various large-scale real-world repositories 415 # found 54 bytes a reasonable upper limit for the average path names. 416 # Copy entries are ignored for the sake of this estimate. 417 self._map = parsers.dict_new_presized(len(st) // 71) 418 419 # Python's garbage collector triggers a GC each time a certain number 420 # of container objects (the number being defined by 421 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple 422 # for each file in the dirstate. The C version then immediately marks 423 # them as not to be tracked by the collector. However, this has no 424 # effect on when GCs are triggered, only on what objects the GC looks 425 # into. This means that O(number of files) GCs are unavoidable. 426 # Depending on when in the process's lifetime the dirstate is parsed, 427 # this can get very expensive. As a workaround, disable GC while 428 # parsing the dirstate. 429 # 430 # (we cannot decorate the function directly since it is in a C module) 431 if self._use_dirstate_v2: 432 p = self.docket.parents 433 meta = self.docket.tree_metadata 434 parse_dirstate = util.nogc(v2.parse_dirstate) 435 parse_dirstate(self._map, self.copymap, st, meta) 436 else: 437 parse_dirstate = util.nogc(parsers.parse_dirstate) 438 p = parse_dirstate(self._map, self.copymap, st) 439 if not self._dirtyparents: 440 self.setparents(*p) 441 442 # Avoid excess attribute lookups by fast pathing certain checks 443 self.__contains__ = self._map.__contains__ 444 self.__getitem__ = self._map.__getitem__ 445 self.get = self._map.get 446 447 def write(self, tr, st, now): 448 if self._use_dirstate_v2: 449 packed, meta = v2.pack_dirstate(self._map, self.copymap, now) 450 self.write_v2_no_append(tr, st, meta, packed) 451 else: 452 packed = parsers.pack_dirstate( 453 self._map, self.copymap, self.parents(), now 454 ) 455 st.write(packed) 456 st.close() 457 self._dirtyparents = False 458 459 @propertycache 460 def identity(self): 461 self._map 462 return self.identity 463 464 ### code related to maintaining and accessing "extra" property 465 # (e.g. "has_dir") 466 467 def _dirs_incr(self, filename, old_entry=None): 468 """incremente the dirstate counter if applicable""" 469 if ( 470 old_entry is None or old_entry.removed 471 ) and "_dirs" in self.__dict__: 472 self._dirs.addpath(filename) 473 if old_entry is None and "_alldirs" in self.__dict__: 474 self._alldirs.addpath(filename) 475 476 def _dirs_decr(self, filename, old_entry=None, remove_variant=False): 477 """decremente the dirstate counter if applicable""" 478 if old_entry is not None: 479 if "_dirs" in self.__dict__ and not old_entry.removed: 480 self._dirs.delpath(filename) 481 if "_alldirs" in self.__dict__ and not remove_variant: 482 self._alldirs.delpath(filename) 483 elif remove_variant and "_alldirs" in self.__dict__: 484 self._alldirs.addpath(filename) 485 if "filefoldmap" in self.__dict__: 486 normed = util.normcase(filename) 487 self.filefoldmap.pop(normed, None) 488 489 @propertycache 490 def filefoldmap(self): 491 """Returns a dictionary mapping normalized case paths to their 492 non-normalized versions. 493 """ 494 try: 495 makefilefoldmap = parsers.make_file_foldmap 496 except AttributeError: 497 pass 498 else: 499 return makefilefoldmap( 500 self._map, util.normcasespec, util.normcasefallback 501 ) 502 503 f = {} 504 normcase = util.normcase 505 for name, s in pycompat.iteritems(self._map): 506 if not s.removed: 507 f[normcase(name)] = name 508 f[b'.'] = b'.' # prevents useless util.fspath() invocation 509 return f 510 511 @propertycache 512 def dirfoldmap(self): 513 f = {} 514 normcase = util.normcase 515 for name in self._dirs: 516 f[normcase(name)] = name 517 return f 518 519 def hastrackeddir(self, d): 520 """ 521 Returns True if the dirstate contains a tracked (not removed) file 522 in this directory. 523 """ 524 return d in self._dirs 525 526 def hasdir(self, d): 527 """ 528 Returns True if the dirstate contains a file (tracked or removed) 529 in this directory. 530 """ 531 return d in self._alldirs 532 533 @propertycache 534 def _dirs(self): 535 return pathutil.dirs(self._map, only_tracked=True) 536 537 @propertycache 538 def _alldirs(self): 539 return pathutil.dirs(self._map) 540 541 ### code related to manipulation of entries and copy-sources 542 543 def _refresh_entry(self, f, entry): 544 if not entry.any_tracked: 545 self._map.pop(f, None) 546 547 def _insert_entry(self, f, entry): 548 self._map[f] = entry 549 550 def _drop_entry(self, f): 551 self._map.pop(f, None) 552 self.copymap.pop(f, None) 553 554 555if rustmod is not None: 556 557 class dirstatemap(_dirstatemapcommon): 558 559 ### Core data storage and access 560 561 @propertycache 562 def _map(self): 563 """ 564 Fills the Dirstatemap when called. 565 """ 566 # ignore HG_PENDING because identity is used only for writing 567 self.identity = util.filestat.frompath( 568 self._opener.join(self._filename) 569 ) 570 571 if self._use_dirstate_v2: 572 if self.docket.uuid: 573 # TODO: use mmap when possible 574 data = self._opener.read(self.docket.data_filename()) 575 else: 576 data = b'' 577 self._map = rustmod.DirstateMap.new_v2( 578 data, self.docket.data_size, self.docket.tree_metadata 579 ) 580 parents = self.docket.parents 581 else: 582 self._map, parents = rustmod.DirstateMap.new_v1( 583 self._readdirstatefile() 584 ) 585 586 if parents and not self._dirtyparents: 587 self.setparents(*parents) 588 589 self.__contains__ = self._map.__contains__ 590 self.__getitem__ = self._map.__getitem__ 591 self.get = self._map.get 592 return self._map 593 594 @property 595 def copymap(self): 596 return self._map.copymap() 597 598 def debug_iter(self, all): 599 """ 600 Return an iterator of (filename, state, mode, size, mtime) tuples 601 602 `all`: also include with `state == b' '` dirstate tree nodes that 603 don't have an associated `DirstateItem`. 604 605 """ 606 return self._map.debug_iter(all) 607 608 def clear(self): 609 self._map.clear() 610 self.setparents( 611 self._nodeconstants.nullid, self._nodeconstants.nullid 612 ) 613 util.clearcachedproperty(self, b"_dirs") 614 util.clearcachedproperty(self, b"_alldirs") 615 util.clearcachedproperty(self, b"dirfoldmap") 616 617 def items(self): 618 return self._map.items() 619 620 # forward for python2,3 compat 621 iteritems = items 622 623 def keys(self): 624 return iter(self._map) 625 626 ### reading/setting parents 627 628 def setparents(self, p1, p2, fold_p2=False): 629 self._parents = (p1, p2) 630 self._dirtyparents = True 631 copies = {} 632 if fold_p2: 633 # Collect into an intermediate list to avoid a `RuntimeError` 634 # exception due to mutation during iteration. 635 # TODO: move this the whole loop to Rust where `iter_mut` 636 # enables in-place mutation of elements of a collection while 637 # iterating it, without mutating the collection itself. 638 files_with_p2_info = [ 639 f for f, s in self._map.items() if s.p2_info 640 ] 641 rust_map = self._map 642 for f in files_with_p2_info: 643 e = rust_map.get(f) 644 source = self.copymap.pop(f, None) 645 if source: 646 copies[f] = source 647 e.drop_merge_data() 648 rust_map.set_dirstate_item(f, e) 649 return copies 650 651 ### disk interaction 652 653 @propertycache 654 def identity(self): 655 self._map 656 return self.identity 657 658 def write(self, tr, st, now): 659 if not self._use_dirstate_v2: 660 p1, p2 = self.parents() 661 packed = self._map.write_v1(p1, p2, now) 662 st.write(packed) 663 st.close() 664 self._dirtyparents = False 665 return 666 667 # We can only append to an existing data file if there is one 668 can_append = self.docket.uuid is not None 669 packed, meta, append = self._map.write_v2(now, can_append) 670 if append: 671 docket = self.docket 672 data_filename = docket.data_filename() 673 if tr: 674 tr.add(data_filename, docket.data_size) 675 with self._opener(data_filename, b'r+b') as fp: 676 fp.seek(docket.data_size) 677 assert fp.tell() == docket.data_size 678 written = fp.write(packed) 679 if written is not None: # py2 may return None 680 assert written == len(packed), (written, len(packed)) 681 docket.data_size += len(packed) 682 docket.parents = self.parents() 683 docket.tree_metadata = meta 684 st.write(docket.serialize()) 685 st.close() 686 else: 687 self.write_v2_no_append(tr, st, meta, packed) 688 # Reload from the newly-written file 689 util.clearcachedproperty(self, b"_map") 690 self._dirtyparents = False 691 692 ### code related to maintaining and accessing "extra" property 693 # (e.g. "has_dir") 694 695 @propertycache 696 def filefoldmap(self): 697 """Returns a dictionary mapping normalized case paths to their 698 non-normalized versions. 699 """ 700 return self._map.filefoldmapasdict() 701 702 def hastrackeddir(self, d): 703 return self._map.hastrackeddir(d) 704 705 def hasdir(self, d): 706 return self._map.hasdir(d) 707 708 @propertycache 709 def dirfoldmap(self): 710 f = {} 711 normcase = util.normcase 712 for name in self._map.tracked_dirs(): 713 f[normcase(name)] = name 714 return f 715 716 ### code related to manipulation of entries and copy-sources 717 718 def _refresh_entry(self, f, entry): 719 if not entry.any_tracked: 720 self._map.drop_item_and_copy_source(f) 721 else: 722 self._map.addfile(f, entry) 723 724 def _insert_entry(self, f, entry): 725 self._map.addfile(f, entry) 726 727 def _drop_entry(self, f): 728 self._map.drop_item_and_copy_source(f) 729 730 def __setitem__(self, key, value): 731 assert isinstance(value, DirstateItem) 732 self._map.set_dirstate_item(key, value) 733