1# revlog.py - storage back-end for mercurial 2# coding: utf8 3# 4# Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> 5# 6# This software may be used and distributed according to the terms of the 7# GNU General Public License version 2 or any later version. 8 9"""Storage back-end for Mercurial. 10 11This provides efficient delta storage with O(1) retrieve and append 12and O(changes) merge between branches. 13""" 14 15from __future__ import absolute_import 16 17import binascii 18import collections 19import contextlib 20import errno 21import io 22import os 23import struct 24import zlib 25 26# import stuff from node for others to import from revlog 27from .node import ( 28 bin, 29 hex, 30 nullrev, 31 sha1nodeconstants, 32 short, 33 wdirrev, 34) 35from .i18n import _ 36from .pycompat import getattr 37from .revlogutils.constants import ( 38 ALL_KINDS, 39 CHANGELOGV2, 40 COMP_MODE_DEFAULT, 41 COMP_MODE_INLINE, 42 COMP_MODE_PLAIN, 43 FEATURES_BY_VERSION, 44 FLAG_GENERALDELTA, 45 FLAG_INLINE_DATA, 46 INDEX_HEADER, 47 KIND_CHANGELOG, 48 REVLOGV0, 49 REVLOGV1, 50 REVLOGV1_FLAGS, 51 REVLOGV2, 52 REVLOGV2_FLAGS, 53 REVLOG_DEFAULT_FLAGS, 54 REVLOG_DEFAULT_FORMAT, 55 REVLOG_DEFAULT_VERSION, 56 SUPPORTED_FLAGS, 57) 58from .revlogutils.flagutil import ( 59 REVIDX_DEFAULT_FLAGS, 60 REVIDX_ELLIPSIS, 61 REVIDX_EXTSTORED, 62 REVIDX_FLAGS_ORDER, 63 REVIDX_HASCOPIESINFO, 64 REVIDX_ISCENSORED, 65 REVIDX_RAWTEXT_CHANGING_FLAGS, 66) 67from .thirdparty import attr 68from . import ( 69 ancestor, 70 dagop, 71 error, 72 mdiff, 73 policy, 74 pycompat, 75 revlogutils, 76 templatefilters, 77 util, 78) 79from .interfaces import ( 80 repository, 81 util as interfaceutil, 82) 83from .revlogutils import ( 84 deltas as deltautil, 85 docket as docketutil, 86 flagutil, 87 nodemap as nodemaputil, 88 randomaccessfile, 89 revlogv0, 90 rewrite, 91 sidedata as sidedatautil, 92) 93from .utils import ( 94 storageutil, 95 stringutil, 96) 97 98# blanked usage of all the name to prevent pyflakes constraints 99# We need these name available in the module for extensions. 100 101REVLOGV0 102REVLOGV1 103REVLOGV2 104FLAG_INLINE_DATA 105FLAG_GENERALDELTA 106REVLOG_DEFAULT_FLAGS 107REVLOG_DEFAULT_FORMAT 108REVLOG_DEFAULT_VERSION 109REVLOGV1_FLAGS 110REVLOGV2_FLAGS 111REVIDX_ISCENSORED 112REVIDX_ELLIPSIS 113REVIDX_HASCOPIESINFO 114REVIDX_EXTSTORED 115REVIDX_DEFAULT_FLAGS 116REVIDX_FLAGS_ORDER 117REVIDX_RAWTEXT_CHANGING_FLAGS 118 119parsers = policy.importmod('parsers') 120rustancestor = policy.importrust('ancestor') 121rustdagop = policy.importrust('dagop') 122rustrevlog = policy.importrust('revlog') 123 124# Aliased for performance. 125_zlibdecompress = zlib.decompress 126 127# max size of revlog with inline data 128_maxinline = 131072 129 130# Flag processors for REVIDX_ELLIPSIS. 131def ellipsisreadprocessor(rl, text): 132 return text, False 133 134 135def ellipsiswriteprocessor(rl, text): 136 return text, False 137 138 139def ellipsisrawprocessor(rl, text): 140 return False 141 142 143ellipsisprocessor = ( 144 ellipsisreadprocessor, 145 ellipsiswriteprocessor, 146 ellipsisrawprocessor, 147) 148 149 150def _verify_revision(rl, skipflags, state, node): 151 """Verify the integrity of the given revlog ``node`` while providing a hook 152 point for extensions to influence the operation.""" 153 if skipflags: 154 state[b'skipread'].add(node) 155 else: 156 # Side-effect: read content and verify hash. 157 rl.revision(node) 158 159 160# True if a fast implementation for persistent-nodemap is available 161# 162# We also consider we have a "fast" implementation in "pure" python because 163# people using pure don't really have performance consideration (and a 164# wheelbarrow of other slowness source) 165HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr( 166 parsers, 'BaseIndexObject' 167) 168 169 170@interfaceutil.implementer(repository.irevisiondelta) 171@attr.s(slots=True) 172class revlogrevisiondelta(object): 173 node = attr.ib() 174 p1node = attr.ib() 175 p2node = attr.ib() 176 basenode = attr.ib() 177 flags = attr.ib() 178 baserevisionsize = attr.ib() 179 revision = attr.ib() 180 delta = attr.ib() 181 sidedata = attr.ib() 182 protocol_flags = attr.ib() 183 linknode = attr.ib(default=None) 184 185 186@interfaceutil.implementer(repository.iverifyproblem) 187@attr.s(frozen=True) 188class revlogproblem(object): 189 warning = attr.ib(default=None) 190 error = attr.ib(default=None) 191 node = attr.ib(default=None) 192 193 194def parse_index_v1(data, inline): 195 # call the C implementation to parse the index data 196 index, cache = parsers.parse_index2(data, inline) 197 return index, cache 198 199 200def parse_index_v2(data, inline): 201 # call the C implementation to parse the index data 202 index, cache = parsers.parse_index2(data, inline, revlogv2=True) 203 return index, cache 204 205 206def parse_index_cl_v2(data, inline): 207 # call the C implementation to parse the index data 208 assert not inline 209 from .pure.parsers import parse_index_cl_v2 210 211 index, cache = parse_index_cl_v2(data) 212 return index, cache 213 214 215if util.safehasattr(parsers, 'parse_index_devel_nodemap'): 216 217 def parse_index_v1_nodemap(data, inline): 218 index, cache = parsers.parse_index_devel_nodemap(data, inline) 219 return index, cache 220 221 222else: 223 parse_index_v1_nodemap = None 224 225 226def parse_index_v1_mixed(data, inline): 227 index, cache = parse_index_v1(data, inline) 228 return rustrevlog.MixedIndex(index), cache 229 230 231# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte 232# signed integer) 233_maxentrysize = 0x7FFFFFFF 234 235FILE_TOO_SHORT_MSG = _( 236 b'cannot read from revlog %s;' 237 b' expected %d bytes from offset %d, data size is %d' 238) 239 240 241class revlog(object): 242 """ 243 the underlying revision storage object 244 245 A revlog consists of two parts, an index and the revision data. 246 247 The index is a file with a fixed record size containing 248 information on each revision, including its nodeid (hash), the 249 nodeids of its parents, the position and offset of its data within 250 the data file, and the revision it's based on. Finally, each entry 251 contains a linkrev entry that can serve as a pointer to external 252 data. 253 254 The revision data itself is a linear collection of data chunks. 255 Each chunk represents a revision and is usually represented as a 256 delta against the previous chunk. To bound lookup time, runs of 257 deltas are limited to about 2 times the length of the original 258 version data. This makes retrieval of a version proportional to 259 its size, or O(1) relative to the number of revisions. 260 261 Both pieces of the revlog are written to in an append-only 262 fashion, which means we never need to rewrite a file to insert or 263 remove data, and can use some simple techniques to avoid the need 264 for locking while reading. 265 266 If checkambig, indexfile is opened with checkambig=True at 267 writing, to avoid file stat ambiguity. 268 269 If mmaplargeindex is True, and an mmapindexthreshold is set, the 270 index will be mmapped rather than read if it is larger than the 271 configured threshold. 272 273 If censorable is True, the revlog can have censored revisions. 274 275 If `upperboundcomp` is not None, this is the expected maximal gain from 276 compression for the data content. 277 278 `concurrencychecker` is an optional function that receives 3 arguments: a 279 file handle, a filename, and an expected position. It should check whether 280 the current position in the file handle is valid, and log/warn/fail (by 281 raising). 282 283 See mercurial/revlogutils/contants.py for details about the content of an 284 index entry. 285 """ 286 287 _flagserrorclass = error.RevlogError 288 289 def __init__( 290 self, 291 opener, 292 target, 293 radix, 294 postfix=None, # only exist for `tmpcensored` now 295 checkambig=False, 296 mmaplargeindex=False, 297 censorable=False, 298 upperboundcomp=None, 299 persistentnodemap=False, 300 concurrencychecker=None, 301 trypending=False, 302 ): 303 """ 304 create a revlog object 305 306 opener is a function that abstracts the file opening operation 307 and can be used to implement COW semantics or the like. 308 309 `target`: a (KIND, ID) tuple that identify the content stored in 310 this revlog. It help the rest of the code to understand what the revlog 311 is about without having to resort to heuristic and index filename 312 analysis. Note: that this must be reliably be set by normal code, but 313 that test, debug, or performance measurement code might not set this to 314 accurate value. 315 """ 316 self.upperboundcomp = upperboundcomp 317 318 self.radix = radix 319 320 self._docket_file = None 321 self._indexfile = None 322 self._datafile = None 323 self._sidedatafile = None 324 self._nodemap_file = None 325 self.postfix = postfix 326 self._trypending = trypending 327 self.opener = opener 328 if persistentnodemap: 329 self._nodemap_file = nodemaputil.get_nodemap_file(self) 330 331 assert target[0] in ALL_KINDS 332 assert len(target) == 2 333 self.target = target 334 # When True, indexfile is opened with checkambig=True at writing, to 335 # avoid file stat ambiguity. 336 self._checkambig = checkambig 337 self._mmaplargeindex = mmaplargeindex 338 self._censorable = censorable 339 # 3-tuple of (node, rev, text) for a raw revision. 340 self._revisioncache = None 341 # Maps rev to chain base rev. 342 self._chainbasecache = util.lrucachedict(100) 343 # 2-tuple of (offset, data) of raw data from the revlog at an offset. 344 self._chunkcache = (0, b'') 345 # How much data to read and cache into the raw revlog data cache. 346 self._chunkcachesize = 65536 347 self._maxchainlen = None 348 self._deltabothparents = True 349 self.index = None 350 self._docket = None 351 self._nodemap_docket = None 352 # Mapping of partial identifiers to full nodes. 353 self._pcache = {} 354 # Mapping of revision integer to full node. 355 self._compengine = b'zlib' 356 self._compengineopts = {} 357 self._maxdeltachainspan = -1 358 self._withsparseread = False 359 self._sparserevlog = False 360 self.hassidedata = False 361 self._srdensitythreshold = 0.50 362 self._srmingapsize = 262144 363 364 # Make copy of flag processors so each revlog instance can support 365 # custom flags. 366 self._flagprocessors = dict(flagutil.flagprocessors) 367 368 # 3-tuple of file handles being used for active writing. 369 self._writinghandles = None 370 # prevent nesting of addgroup 371 self._adding_group = None 372 373 self._loadindex() 374 375 self._concurrencychecker = concurrencychecker 376 377 def _init_opts(self): 378 """process options (from above/config) to setup associated default revlog mode 379 380 These values might be affected when actually reading on disk information. 381 382 The relevant values are returned for use in _loadindex(). 383 384 * newversionflags: 385 version header to use if we need to create a new revlog 386 387 * mmapindexthreshold: 388 minimal index size for start to use mmap 389 390 * force_nodemap: 391 force the usage of a "development" version of the nodemap code 392 """ 393 mmapindexthreshold = None 394 opts = self.opener.options 395 396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG: 397 new_header = CHANGELOGV2 398 elif b'revlogv2' in opts: 399 new_header = REVLOGV2 400 elif b'revlogv1' in opts: 401 new_header = REVLOGV1 | FLAG_INLINE_DATA 402 if b'generaldelta' in opts: 403 new_header |= FLAG_GENERALDELTA 404 elif b'revlogv0' in self.opener.options: 405 new_header = REVLOGV0 406 else: 407 new_header = REVLOG_DEFAULT_VERSION 408 409 if b'chunkcachesize' in opts: 410 self._chunkcachesize = opts[b'chunkcachesize'] 411 if b'maxchainlen' in opts: 412 self._maxchainlen = opts[b'maxchainlen'] 413 if b'deltabothparents' in opts: 414 self._deltabothparents = opts[b'deltabothparents'] 415 self._lazydelta = bool(opts.get(b'lazydelta', True)) 416 self._lazydeltabase = False 417 if self._lazydelta: 418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False)) 419 if b'compengine' in opts: 420 self._compengine = opts[b'compengine'] 421 if b'zlib.level' in opts: 422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level'] 423 if b'zstd.level' in opts: 424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level'] 425 if b'maxdeltachainspan' in opts: 426 self._maxdeltachainspan = opts[b'maxdeltachainspan'] 427 if self._mmaplargeindex and b'mmapindexthreshold' in opts: 428 mmapindexthreshold = opts[b'mmapindexthreshold'] 429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False)) 430 withsparseread = bool(opts.get(b'with-sparse-read', False)) 431 # sparse-revlog forces sparse-read 432 self._withsparseread = self._sparserevlog or withsparseread 433 if b'sparse-read-density-threshold' in opts: 434 self._srdensitythreshold = opts[b'sparse-read-density-threshold'] 435 if b'sparse-read-min-gap-size' in opts: 436 self._srmingapsize = opts[b'sparse-read-min-gap-size'] 437 if opts.get(b'enableellipsis'): 438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor 439 440 # revlog v0 doesn't have flag processors 441 for flag, processor in pycompat.iteritems( 442 opts.get(b'flagprocessors', {}) 443 ): 444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors) 445 446 if self._chunkcachesize <= 0: 447 raise error.RevlogError( 448 _(b'revlog chunk cache size %r is not greater than 0') 449 % self._chunkcachesize 450 ) 451 elif self._chunkcachesize & (self._chunkcachesize - 1): 452 raise error.RevlogError( 453 _(b'revlog chunk cache size %r is not a power of 2') 454 % self._chunkcachesize 455 ) 456 force_nodemap = opts.get(b'devel-force-nodemap', False) 457 return new_header, mmapindexthreshold, force_nodemap 458 459 def _get_data(self, filepath, mmap_threshold, size=None): 460 """return a file content with or without mmap 461 462 If the file is missing return the empty string""" 463 try: 464 with self.opener(filepath) as fp: 465 if mmap_threshold is not None: 466 file_size = self.opener.fstat(fp).st_size 467 if file_size >= mmap_threshold: 468 if size is not None: 469 # avoid potentiel mmap crash 470 size = min(file_size, size) 471 # TODO: should .close() to release resources without 472 # relying on Python GC 473 if size is None: 474 return util.buffer(util.mmapread(fp)) 475 else: 476 return util.buffer(util.mmapread(fp, size)) 477 if size is None: 478 return fp.read() 479 else: 480 return fp.read(size) 481 except IOError as inst: 482 if inst.errno != errno.ENOENT: 483 raise 484 return b'' 485 486 def _loadindex(self, docket=None): 487 488 new_header, mmapindexthreshold, force_nodemap = self._init_opts() 489 490 if self.postfix is not None: 491 entry_point = b'%s.i.%s' % (self.radix, self.postfix) 492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix): 493 entry_point = b'%s.i.a' % self.radix 494 else: 495 entry_point = b'%s.i' % self.radix 496 497 if docket is not None: 498 self._docket = docket 499 self._docket_file = entry_point 500 else: 501 entry_data = b'' 502 self._initempty = True 503 entry_data = self._get_data(entry_point, mmapindexthreshold) 504 if len(entry_data) > 0: 505 header = INDEX_HEADER.unpack(entry_data[:4])[0] 506 self._initempty = False 507 else: 508 header = new_header 509 510 self._format_flags = header & ~0xFFFF 511 self._format_version = header & 0xFFFF 512 513 supported_flags = SUPPORTED_FLAGS.get(self._format_version) 514 if supported_flags is None: 515 msg = _(b'unknown version (%d) in revlog %s') 516 msg %= (self._format_version, self.display_id) 517 raise error.RevlogError(msg) 518 elif self._format_flags & ~supported_flags: 519 msg = _(b'unknown flags (%#04x) in version %d revlog %s') 520 display_flag = self._format_flags >> 16 521 msg %= (display_flag, self._format_version, self.display_id) 522 raise error.RevlogError(msg) 523 524 features = FEATURES_BY_VERSION[self._format_version] 525 self._inline = features[b'inline'](self._format_flags) 526 self._generaldelta = features[b'generaldelta'](self._format_flags) 527 self.hassidedata = features[b'sidedata'] 528 529 if not features[b'docket']: 530 self._indexfile = entry_point 531 index_data = entry_data 532 else: 533 self._docket_file = entry_point 534 if self._initempty: 535 self._docket = docketutil.default_docket(self, header) 536 else: 537 self._docket = docketutil.parse_docket( 538 self, entry_data, use_pending=self._trypending 539 ) 540 541 if self._docket is not None: 542 self._indexfile = self._docket.index_filepath() 543 index_data = b'' 544 index_size = self._docket.index_end 545 if index_size > 0: 546 index_data = self._get_data( 547 self._indexfile, mmapindexthreshold, size=index_size 548 ) 549 if len(index_data) < index_size: 550 msg = _(b'too few index data for %s: got %d, expected %d') 551 msg %= (self.display_id, len(index_data), index_size) 552 raise error.RevlogError(msg) 553 554 self._inline = False 555 # generaldelta implied by version 2 revlogs. 556 self._generaldelta = True 557 # the logic for persistent nodemap will be dealt with within the 558 # main docket, so disable it for now. 559 self._nodemap_file = None 560 561 if self._docket is not None: 562 self._datafile = self._docket.data_filepath() 563 self._sidedatafile = self._docket.sidedata_filepath() 564 elif self.postfix is None: 565 self._datafile = b'%s.d' % self.radix 566 else: 567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix) 568 569 self.nodeconstants = sha1nodeconstants 570 self.nullid = self.nodeconstants.nullid 571 572 # sparse-revlog can't be on without general-delta (issue6056) 573 if not self._generaldelta: 574 self._sparserevlog = False 575 576 self._storedeltachains = True 577 578 devel_nodemap = ( 579 self._nodemap_file 580 and force_nodemap 581 and parse_index_v1_nodemap is not None 582 ) 583 584 use_rust_index = False 585 if rustrevlog is not None: 586 if self._nodemap_file is not None: 587 use_rust_index = True 588 else: 589 use_rust_index = self.opener.options.get(b'rust.index') 590 591 self._parse_index = parse_index_v1 592 if self._format_version == REVLOGV0: 593 self._parse_index = revlogv0.parse_index_v0 594 elif self._format_version == REVLOGV2: 595 self._parse_index = parse_index_v2 596 elif self._format_version == CHANGELOGV2: 597 self._parse_index = parse_index_cl_v2 598 elif devel_nodemap: 599 self._parse_index = parse_index_v1_nodemap 600 elif use_rust_index: 601 self._parse_index = parse_index_v1_mixed 602 try: 603 d = self._parse_index(index_data, self._inline) 604 index, chunkcache = d 605 use_nodemap = ( 606 not self._inline 607 and self._nodemap_file is not None 608 and util.safehasattr(index, 'update_nodemap_data') 609 ) 610 if use_nodemap: 611 nodemap_data = nodemaputil.persisted_data(self) 612 if nodemap_data is not None: 613 docket = nodemap_data[0] 614 if ( 615 len(d[0]) > docket.tip_rev 616 and d[0][docket.tip_rev][7] == docket.tip_node 617 ): 618 # no changelog tampering 619 self._nodemap_docket = docket 620 index.update_nodemap_data(*nodemap_data) 621 except (ValueError, IndexError): 622 raise error.RevlogError( 623 _(b"index %s is corrupted") % self.display_id 624 ) 625 self.index = index 626 self._segmentfile = randomaccessfile.randomaccessfile( 627 self.opener, 628 (self._indexfile if self._inline else self._datafile), 629 self._chunkcachesize, 630 chunkcache, 631 ) 632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile( 633 self.opener, 634 self._sidedatafile, 635 self._chunkcachesize, 636 ) 637 # revnum -> (chain-length, sum-delta-length) 638 self._chaininfocache = util.lrucachedict(500) 639 # revlog header -> revlog compressor 640 self._decompressors = {} 641 642 @util.propertycache 643 def revlog_kind(self): 644 return self.target[0] 645 646 @util.propertycache 647 def display_id(self): 648 """The public facing "ID" of the revlog that we use in message""" 649 # Maybe we should build a user facing representation of 650 # revlog.target instead of using `self.radix` 651 return self.radix 652 653 def _get_decompressor(self, t): 654 try: 655 compressor = self._decompressors[t] 656 except KeyError: 657 try: 658 engine = util.compengines.forrevlogheader(t) 659 compressor = engine.revlogcompressor(self._compengineopts) 660 self._decompressors[t] = compressor 661 except KeyError: 662 raise error.RevlogError( 663 _(b'unknown compression type %s') % binascii.hexlify(t) 664 ) 665 return compressor 666 667 @util.propertycache 668 def _compressor(self): 669 engine = util.compengines[self._compengine] 670 return engine.revlogcompressor(self._compengineopts) 671 672 @util.propertycache 673 def _decompressor(self): 674 """the default decompressor""" 675 if self._docket is None: 676 return None 677 t = self._docket.default_compression_header 678 c = self._get_decompressor(t) 679 return c.decompress 680 681 def _indexfp(self): 682 """file object for the revlog's index file""" 683 return self.opener(self._indexfile, mode=b"r") 684 685 def __index_write_fp(self): 686 # You should not use this directly and use `_writing` instead 687 try: 688 f = self.opener( 689 self._indexfile, mode=b"r+", checkambig=self._checkambig 690 ) 691 if self._docket is None: 692 f.seek(0, os.SEEK_END) 693 else: 694 f.seek(self._docket.index_end, os.SEEK_SET) 695 return f 696 except IOError as inst: 697 if inst.errno != errno.ENOENT: 698 raise 699 return self.opener( 700 self._indexfile, mode=b"w+", checkambig=self._checkambig 701 ) 702 703 def __index_new_fp(self): 704 # You should not use this unless you are upgrading from inline revlog 705 return self.opener( 706 self._indexfile, 707 mode=b"w", 708 checkambig=self._checkambig, 709 atomictemp=True, 710 ) 711 712 def _datafp(self, mode=b'r'): 713 """file object for the revlog's data file""" 714 return self.opener(self._datafile, mode=mode) 715 716 @contextlib.contextmanager 717 def _sidedatareadfp(self): 718 """file object suitable to read sidedata""" 719 if self._writinghandles: 720 yield self._writinghandles[2] 721 else: 722 with self.opener(self._sidedatafile) as fp: 723 yield fp 724 725 def tiprev(self): 726 return len(self.index) - 1 727 728 def tip(self): 729 return self.node(self.tiprev()) 730 731 def __contains__(self, rev): 732 return 0 <= rev < len(self) 733 734 def __len__(self): 735 return len(self.index) 736 737 def __iter__(self): 738 return iter(pycompat.xrange(len(self))) 739 740 def revs(self, start=0, stop=None): 741 """iterate over all rev in this revlog (from start to stop)""" 742 return storageutil.iterrevs(len(self), start=start, stop=stop) 743 744 @property 745 def nodemap(self): 746 msg = ( 747 b"revlog.nodemap is deprecated, " 748 b"use revlog.index.[has_node|rev|get_rev]" 749 ) 750 util.nouideprecwarn(msg, b'5.3', stacklevel=2) 751 return self.index.nodemap 752 753 @property 754 def _nodecache(self): 755 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap" 756 util.nouideprecwarn(msg, b'5.3', stacklevel=2) 757 return self.index.nodemap 758 759 def hasnode(self, node): 760 try: 761 self.rev(node) 762 return True 763 except KeyError: 764 return False 765 766 def candelta(self, baserev, rev): 767 """whether two revisions (baserev, rev) can be delta-ed or not""" 768 # Disable delta if either rev requires a content-changing flag 769 # processor (ex. LFS). This is because such flag processor can alter 770 # the rawtext content that the delta will be based on, and two clients 771 # could have a same revlog node with different flags (i.e. different 772 # rawtext contents) and the delta could be incompatible. 773 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or ( 774 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS 775 ): 776 return False 777 return True 778 779 def update_caches(self, transaction): 780 if self._nodemap_file is not None: 781 if transaction is None: 782 nodemaputil.update_persistent_nodemap(self) 783 else: 784 nodemaputil.setup_persistent_nodemap(transaction, self) 785 786 def clearcaches(self): 787 self._revisioncache = None 788 self._chainbasecache.clear() 789 self._segmentfile.clear_cache() 790 self._segmentfile_sidedata.clear_cache() 791 self._pcache = {} 792 self._nodemap_docket = None 793 self.index.clearcaches() 794 # The python code is the one responsible for validating the docket, we 795 # end up having to refresh it here. 796 use_nodemap = ( 797 not self._inline 798 and self._nodemap_file is not None 799 and util.safehasattr(self.index, 'update_nodemap_data') 800 ) 801 if use_nodemap: 802 nodemap_data = nodemaputil.persisted_data(self) 803 if nodemap_data is not None: 804 self._nodemap_docket = nodemap_data[0] 805 self.index.update_nodemap_data(*nodemap_data) 806 807 def rev(self, node): 808 try: 809 return self.index.rev(node) 810 except TypeError: 811 raise 812 except error.RevlogError: 813 # parsers.c radix tree lookup failed 814 if ( 815 node == self.nodeconstants.wdirid 816 or node in self.nodeconstants.wdirfilenodeids 817 ): 818 raise error.WdirUnsupported 819 raise error.LookupError(node, self.display_id, _(b'no node')) 820 821 # Accessors for index entries. 822 823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes 824 # are flags. 825 def start(self, rev): 826 return int(self.index[rev][0] >> 16) 827 828 def sidedata_cut_off(self, rev): 829 sd_cut_off = self.index[rev][8] 830 if sd_cut_off != 0: 831 return sd_cut_off 832 # This is some annoying dance, because entries without sidedata 833 # currently use 0 as their ofsset. (instead of previous-offset + 834 # previous-size) 835 # 836 # We should reconsider this sidedata → 0 sidata_offset policy. 837 # In the meantime, we need this. 838 while 0 <= rev: 839 e = self.index[rev] 840 if e[9] != 0: 841 return e[8] + e[9] 842 rev -= 1 843 return 0 844 845 def flags(self, rev): 846 return self.index[rev][0] & 0xFFFF 847 848 def length(self, rev): 849 return self.index[rev][1] 850 851 def sidedata_length(self, rev): 852 if not self.hassidedata: 853 return 0 854 return self.index[rev][9] 855 856 def rawsize(self, rev): 857 """return the length of the uncompressed text for a given revision""" 858 l = self.index[rev][2] 859 if l >= 0: 860 return l 861 862 t = self.rawdata(rev) 863 return len(t) 864 865 def size(self, rev): 866 """length of non-raw text (processed by a "read" flag processor)""" 867 # fast path: if no "read" flag processor could change the content, 868 # size is rawsize. note: ELLIPSIS is known to not change the content. 869 flags = self.flags(rev) 870 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0: 871 return self.rawsize(rev) 872 873 return len(self.revision(rev, raw=False)) 874 875 def chainbase(self, rev): 876 base = self._chainbasecache.get(rev) 877 if base is not None: 878 return base 879 880 index = self.index 881 iterrev = rev 882 base = index[iterrev][3] 883 while base != iterrev: 884 iterrev = base 885 base = index[iterrev][3] 886 887 self._chainbasecache[rev] = base 888 return base 889 890 def linkrev(self, rev): 891 return self.index[rev][4] 892 893 def parentrevs(self, rev): 894 try: 895 entry = self.index[rev] 896 except IndexError: 897 if rev == wdirrev: 898 raise error.WdirUnsupported 899 raise 900 901 return entry[5], entry[6] 902 903 # fast parentrevs(rev) where rev isn't filtered 904 _uncheckedparentrevs = parentrevs 905 906 def node(self, rev): 907 try: 908 return self.index[rev][7] 909 except IndexError: 910 if rev == wdirrev: 911 raise error.WdirUnsupported 912 raise 913 914 # Derived from index values. 915 916 def end(self, rev): 917 return self.start(rev) + self.length(rev) 918 919 def parents(self, node): 920 i = self.index 921 d = i[self.rev(node)] 922 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline 923 924 def chainlen(self, rev): 925 return self._chaininfo(rev)[0] 926 927 def _chaininfo(self, rev): 928 chaininfocache = self._chaininfocache 929 if rev in chaininfocache: 930 return chaininfocache[rev] 931 index = self.index 932 generaldelta = self._generaldelta 933 iterrev = rev 934 e = index[iterrev] 935 clen = 0 936 compresseddeltalen = 0 937 while iterrev != e[3]: 938 clen += 1 939 compresseddeltalen += e[1] 940 if generaldelta: 941 iterrev = e[3] 942 else: 943 iterrev -= 1 944 if iterrev in chaininfocache: 945 t = chaininfocache[iterrev] 946 clen += t[0] 947 compresseddeltalen += t[1] 948 break 949 e = index[iterrev] 950 else: 951 # Add text length of base since decompressing that also takes 952 # work. For cache hits the length is already included. 953 compresseddeltalen += e[1] 954 r = (clen, compresseddeltalen) 955 chaininfocache[rev] = r 956 return r 957 958 def _deltachain(self, rev, stoprev=None): 959 """Obtain the delta chain for a revision. 960 961 ``stoprev`` specifies a revision to stop at. If not specified, we 962 stop at the base of the chain. 963 964 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of 965 revs in ascending order and ``stopped`` is a bool indicating whether 966 ``stoprev`` was hit. 967 """ 968 # Try C implementation. 969 try: 970 return self.index.deltachain(rev, stoprev, self._generaldelta) 971 except AttributeError: 972 pass 973 974 chain = [] 975 976 # Alias to prevent attribute lookup in tight loop. 977 index = self.index 978 generaldelta = self._generaldelta 979 980 iterrev = rev 981 e = index[iterrev] 982 while iterrev != e[3] and iterrev != stoprev: 983 chain.append(iterrev) 984 if generaldelta: 985 iterrev = e[3] 986 else: 987 iterrev -= 1 988 e = index[iterrev] 989 990 if iterrev == stoprev: 991 stopped = True 992 else: 993 chain.append(iterrev) 994 stopped = False 995 996 chain.reverse() 997 return chain, stopped 998 999 def ancestors(self, revs, stoprev=0, inclusive=False): 1000 """Generate the ancestors of 'revs' in reverse revision order. 1001 Does not generate revs lower than stoprev. 1002 1003 See the documentation for ancestor.lazyancestors for more details.""" 1004 1005 # first, make sure start revisions aren't filtered 1006 revs = list(revs) 1007 checkrev = self.node 1008 for r in revs: 1009 checkrev(r) 1010 # and we're sure ancestors aren't filtered as well 1011 1012 if rustancestor is not None and self.index.rust_ext_compat: 1013 lazyancestors = rustancestor.LazyAncestors 1014 arg = self.index 1015 else: 1016 lazyancestors = ancestor.lazyancestors 1017 arg = self._uncheckedparentrevs 1018 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive) 1019 1020 def descendants(self, revs): 1021 return dagop.descendantrevs(revs, self.revs, self.parentrevs) 1022 1023 def findcommonmissing(self, common=None, heads=None): 1024 """Return a tuple of the ancestors of common and the ancestors of heads 1025 that are not ancestors of common. In revset terminology, we return the 1026 tuple: 1027 1028 ::common, (::heads) - (::common) 1029 1030 The list is sorted by revision number, meaning it is 1031 topologically sorted. 1032 1033 'heads' and 'common' are both lists of node IDs. If heads is 1034 not supplied, uses all of the revlog's heads. If common is not 1035 supplied, uses nullid.""" 1036 if common is None: 1037 common = [self.nullid] 1038 if heads is None: 1039 heads = self.heads() 1040 1041 common = [self.rev(n) for n in common] 1042 heads = [self.rev(n) for n in heads] 1043 1044 # we want the ancestors, but inclusive 1045 class lazyset(object): 1046 def __init__(self, lazyvalues): 1047 self.addedvalues = set() 1048 self.lazyvalues = lazyvalues 1049 1050 def __contains__(self, value): 1051 return value in self.addedvalues or value in self.lazyvalues 1052 1053 def __iter__(self): 1054 added = self.addedvalues 1055 for r in added: 1056 yield r 1057 for r in self.lazyvalues: 1058 if not r in added: 1059 yield r 1060 1061 def add(self, value): 1062 self.addedvalues.add(value) 1063 1064 def update(self, values): 1065 self.addedvalues.update(values) 1066 1067 has = lazyset(self.ancestors(common)) 1068 has.add(nullrev) 1069 has.update(common) 1070 1071 # take all ancestors from heads that aren't in has 1072 missing = set() 1073 visit = collections.deque(r for r in heads if r not in has) 1074 while visit: 1075 r = visit.popleft() 1076 if r in missing: 1077 continue 1078 else: 1079 missing.add(r) 1080 for p in self.parentrevs(r): 1081 if p not in has: 1082 visit.append(p) 1083 missing = list(missing) 1084 missing.sort() 1085 return has, [self.node(miss) for miss in missing] 1086 1087 def incrementalmissingrevs(self, common=None): 1088 """Return an object that can be used to incrementally compute the 1089 revision numbers of the ancestors of arbitrary sets that are not 1090 ancestors of common. This is an ancestor.incrementalmissingancestors 1091 object. 1092 1093 'common' is a list of revision numbers. If common is not supplied, uses 1094 nullrev. 1095 """ 1096 if common is None: 1097 common = [nullrev] 1098 1099 if rustancestor is not None and self.index.rust_ext_compat: 1100 return rustancestor.MissingAncestors(self.index, common) 1101 return ancestor.incrementalmissingancestors(self.parentrevs, common) 1102 1103 def findmissingrevs(self, common=None, heads=None): 1104 """Return the revision numbers of the ancestors of heads that 1105 are not ancestors of common. 1106 1107 More specifically, return a list of revision numbers corresponding to 1108 nodes N such that every N satisfies the following constraints: 1109 1110 1. N is an ancestor of some node in 'heads' 1111 2. N is not an ancestor of any node in 'common' 1112 1113 The list is sorted by revision number, meaning it is 1114 topologically sorted. 1115 1116 'heads' and 'common' are both lists of revision numbers. If heads is 1117 not supplied, uses all of the revlog's heads. If common is not 1118 supplied, uses nullid.""" 1119 if common is None: 1120 common = [nullrev] 1121 if heads is None: 1122 heads = self.headrevs() 1123 1124 inc = self.incrementalmissingrevs(common=common) 1125 return inc.missingancestors(heads) 1126 1127 def findmissing(self, common=None, heads=None): 1128 """Return the ancestors of heads that are not ancestors of common. 1129 1130 More specifically, return a list of nodes N such that every N 1131 satisfies the following constraints: 1132 1133 1. N is an ancestor of some node in 'heads' 1134 2. N is not an ancestor of any node in 'common' 1135 1136 The list is sorted by revision number, meaning it is 1137 topologically sorted. 1138 1139 'heads' and 'common' are both lists of node IDs. If heads is 1140 not supplied, uses all of the revlog's heads. If common is not 1141 supplied, uses nullid.""" 1142 if common is None: 1143 common = [self.nullid] 1144 if heads is None: 1145 heads = self.heads() 1146 1147 common = [self.rev(n) for n in common] 1148 heads = [self.rev(n) for n in heads] 1149 1150 inc = self.incrementalmissingrevs(common=common) 1151 return [self.node(r) for r in inc.missingancestors(heads)] 1152 1153 def nodesbetween(self, roots=None, heads=None): 1154 """Return a topological path from 'roots' to 'heads'. 1155 1156 Return a tuple (nodes, outroots, outheads) where 'nodes' is a 1157 topologically sorted list of all nodes N that satisfy both of 1158 these constraints: 1159 1160 1. N is a descendant of some node in 'roots' 1161 2. N is an ancestor of some node in 'heads' 1162 1163 Every node is considered to be both a descendant and an ancestor 1164 of itself, so every reachable node in 'roots' and 'heads' will be 1165 included in 'nodes'. 1166 1167 'outroots' is the list of reachable nodes in 'roots', i.e., the 1168 subset of 'roots' that is returned in 'nodes'. Likewise, 1169 'outheads' is the subset of 'heads' that is also in 'nodes'. 1170 1171 'roots' and 'heads' are both lists of node IDs. If 'roots' is 1172 unspecified, uses nullid as the only root. If 'heads' is 1173 unspecified, uses list of all of the revlog's heads.""" 1174 nonodes = ([], [], []) 1175 if roots is not None: 1176 roots = list(roots) 1177 if not roots: 1178 return nonodes 1179 lowestrev = min([self.rev(n) for n in roots]) 1180 else: 1181 roots = [self.nullid] # Everybody's a descendant of nullid 1182 lowestrev = nullrev 1183 if (lowestrev == nullrev) and (heads is None): 1184 # We want _all_ the nodes! 1185 return ( 1186 [self.node(r) for r in self], 1187 [self.nullid], 1188 list(self.heads()), 1189 ) 1190 if heads is None: 1191 # All nodes are ancestors, so the latest ancestor is the last 1192 # node. 1193 highestrev = len(self) - 1 1194 # Set ancestors to None to signal that every node is an ancestor. 1195 ancestors = None 1196 # Set heads to an empty dictionary for later discovery of heads 1197 heads = {} 1198 else: 1199 heads = list(heads) 1200 if not heads: 1201 return nonodes 1202 ancestors = set() 1203 # Turn heads into a dictionary so we can remove 'fake' heads. 1204 # Also, later we will be using it to filter out the heads we can't 1205 # find from roots. 1206 heads = dict.fromkeys(heads, False) 1207 # Start at the top and keep marking parents until we're done. 1208 nodestotag = set(heads) 1209 # Remember where the top was so we can use it as a limit later. 1210 highestrev = max([self.rev(n) for n in nodestotag]) 1211 while nodestotag: 1212 # grab a node to tag 1213 n = nodestotag.pop() 1214 # Never tag nullid 1215 if n == self.nullid: 1216 continue 1217 # A node's revision number represents its place in a 1218 # topologically sorted list of nodes. 1219 r = self.rev(n) 1220 if r >= lowestrev: 1221 if n not in ancestors: 1222 # If we are possibly a descendant of one of the roots 1223 # and we haven't already been marked as an ancestor 1224 ancestors.add(n) # Mark as ancestor 1225 # Add non-nullid parents to list of nodes to tag. 1226 nodestotag.update( 1227 [p for p in self.parents(n) if p != self.nullid] 1228 ) 1229 elif n in heads: # We've seen it before, is it a fake head? 1230 # So it is, real heads should not be the ancestors of 1231 # any other heads. 1232 heads.pop(n) 1233 if not ancestors: 1234 return nonodes 1235 # Now that we have our set of ancestors, we want to remove any 1236 # roots that are not ancestors. 1237 1238 # If one of the roots was nullid, everything is included anyway. 1239 if lowestrev > nullrev: 1240 # But, since we weren't, let's recompute the lowest rev to not 1241 # include roots that aren't ancestors. 1242 1243 # Filter out roots that aren't ancestors of heads 1244 roots = [root for root in roots if root in ancestors] 1245 # Recompute the lowest revision 1246 if roots: 1247 lowestrev = min([self.rev(root) for root in roots]) 1248 else: 1249 # No more roots? Return empty list 1250 return nonodes 1251 else: 1252 # We are descending from nullid, and don't need to care about 1253 # any other roots. 1254 lowestrev = nullrev 1255 roots = [self.nullid] 1256 # Transform our roots list into a set. 1257 descendants = set(roots) 1258 # Also, keep the original roots so we can filter out roots that aren't 1259 # 'real' roots (i.e. are descended from other roots). 1260 roots = descendants.copy() 1261 # Our topologically sorted list of output nodes. 1262 orderedout = [] 1263 # Don't start at nullid since we don't want nullid in our output list, 1264 # and if nullid shows up in descendants, empty parents will look like 1265 # they're descendants. 1266 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1): 1267 n = self.node(r) 1268 isdescendant = False 1269 if lowestrev == nullrev: # Everybody is a descendant of nullid 1270 isdescendant = True 1271 elif n in descendants: 1272 # n is already a descendant 1273 isdescendant = True 1274 # This check only needs to be done here because all the roots 1275 # will start being marked is descendants before the loop. 1276 if n in roots: 1277 # If n was a root, check if it's a 'real' root. 1278 p = tuple(self.parents(n)) 1279 # If any of its parents are descendants, it's not a root. 1280 if (p[0] in descendants) or (p[1] in descendants): 1281 roots.remove(n) 1282 else: 1283 p = tuple(self.parents(n)) 1284 # A node is a descendant if either of its parents are 1285 # descendants. (We seeded the dependents list with the roots 1286 # up there, remember?) 1287 if (p[0] in descendants) or (p[1] in descendants): 1288 descendants.add(n) 1289 isdescendant = True 1290 if isdescendant and ((ancestors is None) or (n in ancestors)): 1291 # Only include nodes that are both descendants and ancestors. 1292 orderedout.append(n) 1293 if (ancestors is not None) and (n in heads): 1294 # We're trying to figure out which heads are reachable 1295 # from roots. 1296 # Mark this head as having been reached 1297 heads[n] = True 1298 elif ancestors is None: 1299 # Otherwise, we're trying to discover the heads. 1300 # Assume this is a head because if it isn't, the next step 1301 # will eventually remove it. 1302 heads[n] = True 1303 # But, obviously its parents aren't. 1304 for p in self.parents(n): 1305 heads.pop(p, None) 1306 heads = [head for head, flag in pycompat.iteritems(heads) if flag] 1307 roots = list(roots) 1308 assert orderedout 1309 assert roots 1310 assert heads 1311 return (orderedout, roots, heads) 1312 1313 def headrevs(self, revs=None): 1314 if revs is None: 1315 try: 1316 return self.index.headrevs() 1317 except AttributeError: 1318 return self._headrevs() 1319 if rustdagop is not None and self.index.rust_ext_compat: 1320 return rustdagop.headrevs(self.index, revs) 1321 return dagop.headrevs(revs, self._uncheckedparentrevs) 1322 1323 def computephases(self, roots): 1324 return self.index.computephasesmapsets(roots) 1325 1326 def _headrevs(self): 1327 count = len(self) 1328 if not count: 1329 return [nullrev] 1330 # we won't iter over filtered rev so nobody is a head at start 1331 ishead = [0] * (count + 1) 1332 index = self.index 1333 for r in self: 1334 ishead[r] = 1 # I may be an head 1335 e = index[r] 1336 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not 1337 return [r for r, val in enumerate(ishead) if val] 1338 1339 def heads(self, start=None, stop=None): 1340 """return the list of all nodes that have no children 1341 1342 if start is specified, only heads that are descendants of 1343 start will be returned 1344 if stop is specified, it will consider all the revs from stop 1345 as if they had no children 1346 """ 1347 if start is None and stop is None: 1348 if not len(self): 1349 return [self.nullid] 1350 return [self.node(r) for r in self.headrevs()] 1351 1352 if start is None: 1353 start = nullrev 1354 else: 1355 start = self.rev(start) 1356 1357 stoprevs = {self.rev(n) for n in stop or []} 1358 1359 revs = dagop.headrevssubset( 1360 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs 1361 ) 1362 1363 return [self.node(rev) for rev in revs] 1364 1365 def children(self, node): 1366 """find the children of a given node""" 1367 c = [] 1368 p = self.rev(node) 1369 for r in self.revs(start=p + 1): 1370 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev] 1371 if prevs: 1372 for pr in prevs: 1373 if pr == p: 1374 c.append(self.node(r)) 1375 elif p == nullrev: 1376 c.append(self.node(r)) 1377 return c 1378 1379 def commonancestorsheads(self, a, b): 1380 """calculate all the heads of the common ancestors of nodes a and b""" 1381 a, b = self.rev(a), self.rev(b) 1382 ancs = self._commonancestorsheads(a, b) 1383 return pycompat.maplist(self.node, ancs) 1384 1385 def _commonancestorsheads(self, *revs): 1386 """calculate all the heads of the common ancestors of revs""" 1387 try: 1388 ancs = self.index.commonancestorsheads(*revs) 1389 except (AttributeError, OverflowError): # C implementation failed 1390 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs) 1391 return ancs 1392 1393 def isancestor(self, a, b): 1394 """return True if node a is an ancestor of node b 1395 1396 A revision is considered an ancestor of itself.""" 1397 a, b = self.rev(a), self.rev(b) 1398 return self.isancestorrev(a, b) 1399 1400 def isancestorrev(self, a, b): 1401 """return True if revision a is an ancestor of revision b 1402 1403 A revision is considered an ancestor of itself. 1404 1405 The implementation of this is trivial but the use of 1406 reachableroots is not.""" 1407 if a == nullrev: 1408 return True 1409 elif a == b: 1410 return True 1411 elif a > b: 1412 return False 1413 return bool(self.reachableroots(a, [b], [a], includepath=False)) 1414 1415 def reachableroots(self, minroot, heads, roots, includepath=False): 1416 """return (heads(::(<roots> and <roots>::<heads>))) 1417 1418 If includepath is True, return (<roots>::<heads>).""" 1419 try: 1420 return self.index.reachableroots2( 1421 minroot, heads, roots, includepath 1422 ) 1423 except AttributeError: 1424 return dagop._reachablerootspure( 1425 self.parentrevs, minroot, roots, heads, includepath 1426 ) 1427 1428 def ancestor(self, a, b): 1429 """calculate the "best" common ancestor of nodes a and b""" 1430 1431 a, b = self.rev(a), self.rev(b) 1432 try: 1433 ancs = self.index.ancestors(a, b) 1434 except (AttributeError, OverflowError): 1435 ancs = ancestor.ancestors(self.parentrevs, a, b) 1436 if ancs: 1437 # choose a consistent winner when there's a tie 1438 return min(map(self.node, ancs)) 1439 return self.nullid 1440 1441 def _match(self, id): 1442 if isinstance(id, int): 1443 # rev 1444 return self.node(id) 1445 if len(id) == self.nodeconstants.nodelen: 1446 # possibly a binary node 1447 # odds of a binary node being all hex in ASCII are 1 in 10**25 1448 try: 1449 node = id 1450 self.rev(node) # quick search the index 1451 return node 1452 except error.LookupError: 1453 pass # may be partial hex id 1454 try: 1455 # str(rev) 1456 rev = int(id) 1457 if b"%d" % rev != id: 1458 raise ValueError 1459 if rev < 0: 1460 rev = len(self) + rev 1461 if rev < 0 or rev >= len(self): 1462 raise ValueError 1463 return self.node(rev) 1464 except (ValueError, OverflowError): 1465 pass 1466 if len(id) == 2 * self.nodeconstants.nodelen: 1467 try: 1468 # a full hex nodeid? 1469 node = bin(id) 1470 self.rev(node) 1471 return node 1472 except (TypeError, error.LookupError): 1473 pass 1474 1475 def _partialmatch(self, id): 1476 # we don't care wdirfilenodeids as they should be always full hash 1477 maybewdir = self.nodeconstants.wdirhex.startswith(id) 1478 ambiguous = False 1479 try: 1480 partial = self.index.partialmatch(id) 1481 if partial and self.hasnode(partial): 1482 if maybewdir: 1483 # single 'ff...' match in radix tree, ambiguous with wdir 1484 ambiguous = True 1485 else: 1486 return partial 1487 elif maybewdir: 1488 # no 'ff...' match in radix tree, wdir identified 1489 raise error.WdirUnsupported 1490 else: 1491 return None 1492 except error.RevlogError: 1493 # parsers.c radix tree lookup gave multiple matches 1494 # fast path: for unfiltered changelog, radix tree is accurate 1495 if not getattr(self, 'filteredrevs', None): 1496 ambiguous = True 1497 # fall through to slow path that filters hidden revisions 1498 except (AttributeError, ValueError): 1499 # we are pure python, or key was too short to search radix tree 1500 pass 1501 if ambiguous: 1502 raise error.AmbiguousPrefixLookupError( 1503 id, self.display_id, _(b'ambiguous identifier') 1504 ) 1505 1506 if id in self._pcache: 1507 return self._pcache[id] 1508 1509 if len(id) <= 40: 1510 try: 1511 # hex(node)[:...] 1512 l = len(id) // 2 # grab an even number of digits 1513 prefix = bin(id[: l * 2]) 1514 nl = [e[7] for e in self.index if e[7].startswith(prefix)] 1515 nl = [ 1516 n for n in nl if hex(n).startswith(id) and self.hasnode(n) 1517 ] 1518 if self.nodeconstants.nullhex.startswith(id): 1519 nl.append(self.nullid) 1520 if len(nl) > 0: 1521 if len(nl) == 1 and not maybewdir: 1522 self._pcache[id] = nl[0] 1523 return nl[0] 1524 raise error.AmbiguousPrefixLookupError( 1525 id, self.display_id, _(b'ambiguous identifier') 1526 ) 1527 if maybewdir: 1528 raise error.WdirUnsupported 1529 return None 1530 except TypeError: 1531 pass 1532 1533 def lookup(self, id): 1534 """locate a node based on: 1535 - revision number or str(revision number) 1536 - nodeid or subset of hex nodeid 1537 """ 1538 n = self._match(id) 1539 if n is not None: 1540 return n 1541 n = self._partialmatch(id) 1542 if n: 1543 return n 1544 1545 raise error.LookupError(id, self.display_id, _(b'no match found')) 1546 1547 def shortest(self, node, minlength=1): 1548 """Find the shortest unambiguous prefix that matches node.""" 1549 1550 def isvalid(prefix): 1551 try: 1552 matchednode = self._partialmatch(prefix) 1553 except error.AmbiguousPrefixLookupError: 1554 return False 1555 except error.WdirUnsupported: 1556 # single 'ff...' match 1557 return True 1558 if matchednode is None: 1559 raise error.LookupError(node, self.display_id, _(b'no node')) 1560 return True 1561 1562 def maybewdir(prefix): 1563 return all(c == b'f' for c in pycompat.iterbytestr(prefix)) 1564 1565 hexnode = hex(node) 1566 1567 def disambiguate(hexnode, minlength): 1568 """Disambiguate against wdirid.""" 1569 for length in range(minlength, len(hexnode) + 1): 1570 prefix = hexnode[:length] 1571 if not maybewdir(prefix): 1572 return prefix 1573 1574 if not getattr(self, 'filteredrevs', None): 1575 try: 1576 length = max(self.index.shortest(node), minlength) 1577 return disambiguate(hexnode, length) 1578 except error.RevlogError: 1579 if node != self.nodeconstants.wdirid: 1580 raise error.LookupError( 1581 node, self.display_id, _(b'no node') 1582 ) 1583 except AttributeError: 1584 # Fall through to pure code 1585 pass 1586 1587 if node == self.nodeconstants.wdirid: 1588 for length in range(minlength, len(hexnode) + 1): 1589 prefix = hexnode[:length] 1590 if isvalid(prefix): 1591 return prefix 1592 1593 for length in range(minlength, len(hexnode) + 1): 1594 prefix = hexnode[:length] 1595 if isvalid(prefix): 1596 return disambiguate(hexnode, length) 1597 1598 def cmp(self, node, text): 1599 """compare text with a given file revision 1600 1601 returns True if text is different than what is stored. 1602 """ 1603 p1, p2 = self.parents(node) 1604 return storageutil.hashrevisionsha1(text, p1, p2) != node 1605 1606 def _getsegmentforrevs(self, startrev, endrev, df=None): 1607 """Obtain a segment of raw data corresponding to a range of revisions. 1608 1609 Accepts the start and end revisions and an optional already-open 1610 file handle to be used for reading. If the file handle is read, its 1611 seek position will not be preserved. 1612 1613 Requests for data may be satisfied by a cache. 1614 1615 Returns a 2-tuple of (offset, data) for the requested range of 1616 revisions. Offset is the integer offset from the beginning of the 1617 revlog and data is a str or buffer of the raw byte data. 1618 1619 Callers will need to call ``self.start(rev)`` and ``self.length(rev)`` 1620 to determine where each revision's data begins and ends. 1621 """ 1622 # Inlined self.start(startrev) & self.end(endrev) for perf reasons 1623 # (functions are expensive). 1624 index = self.index 1625 istart = index[startrev] 1626 start = int(istart[0] >> 16) 1627 if startrev == endrev: 1628 end = start + istart[1] 1629 else: 1630 iend = index[endrev] 1631 end = int(iend[0] >> 16) + iend[1] 1632 1633 if self._inline: 1634 start += (startrev + 1) * self.index.entry_size 1635 end += (endrev + 1) * self.index.entry_size 1636 length = end - start 1637 1638 return start, self._segmentfile.read_chunk(start, length, df) 1639 1640 def _chunk(self, rev, df=None): 1641 """Obtain a single decompressed chunk for a revision. 1642 1643 Accepts an integer revision and an optional already-open file handle 1644 to be used for reading. If used, the seek position of the file will not 1645 be preserved. 1646 1647 Returns a str holding uncompressed data for the requested revision. 1648 """ 1649 compression_mode = self.index[rev][10] 1650 data = self._getsegmentforrevs(rev, rev, df=df)[1] 1651 if compression_mode == COMP_MODE_PLAIN: 1652 return data 1653 elif compression_mode == COMP_MODE_DEFAULT: 1654 return self._decompressor(data) 1655 elif compression_mode == COMP_MODE_INLINE: 1656 return self.decompress(data) 1657 else: 1658 msg = b'unknown compression mode %d' 1659 msg %= compression_mode 1660 raise error.RevlogError(msg) 1661 1662 def _chunks(self, revs, df=None, targetsize=None): 1663 """Obtain decompressed chunks for the specified revisions. 1664 1665 Accepts an iterable of numeric revisions that are assumed to be in 1666 ascending order. Also accepts an optional already-open file handle 1667 to be used for reading. If used, the seek position of the file will 1668 not be preserved. 1669 1670 This function is similar to calling ``self._chunk()`` multiple times, 1671 but is faster. 1672 1673 Returns a list with decompressed data for each requested revision. 1674 """ 1675 if not revs: 1676 return [] 1677 start = self.start 1678 length = self.length 1679 inline = self._inline 1680 iosize = self.index.entry_size 1681 buffer = util.buffer 1682 1683 l = [] 1684 ladd = l.append 1685 1686 if not self._withsparseread: 1687 slicedchunks = (revs,) 1688 else: 1689 slicedchunks = deltautil.slicechunk( 1690 self, revs, targetsize=targetsize 1691 ) 1692 1693 for revschunk in slicedchunks: 1694 firstrev = revschunk[0] 1695 # Skip trailing revisions with empty diff 1696 for lastrev in revschunk[::-1]: 1697 if length(lastrev) != 0: 1698 break 1699 1700 try: 1701 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df) 1702 except OverflowError: 1703 # issue4215 - we can't cache a run of chunks greater than 1704 # 2G on Windows 1705 return [self._chunk(rev, df=df) for rev in revschunk] 1706 1707 decomp = self.decompress 1708 # self._decompressor might be None, but will not be used in that case 1709 def_decomp = self._decompressor 1710 for rev in revschunk: 1711 chunkstart = start(rev) 1712 if inline: 1713 chunkstart += (rev + 1) * iosize 1714 chunklength = length(rev) 1715 comp_mode = self.index[rev][10] 1716 c = buffer(data, chunkstart - offset, chunklength) 1717 if comp_mode == COMP_MODE_PLAIN: 1718 ladd(c) 1719 elif comp_mode == COMP_MODE_INLINE: 1720 ladd(decomp(c)) 1721 elif comp_mode == COMP_MODE_DEFAULT: 1722 ladd(def_decomp(c)) 1723 else: 1724 msg = b'unknown compression mode %d' 1725 msg %= comp_mode 1726 raise error.RevlogError(msg) 1727 1728 return l 1729 1730 def deltaparent(self, rev): 1731 """return deltaparent of the given revision""" 1732 base = self.index[rev][3] 1733 if base == rev: 1734 return nullrev 1735 elif self._generaldelta: 1736 return base 1737 else: 1738 return rev - 1 1739 1740 def issnapshot(self, rev): 1741 """tells whether rev is a snapshot""" 1742 if not self._sparserevlog: 1743 return self.deltaparent(rev) == nullrev 1744 elif util.safehasattr(self.index, b'issnapshot'): 1745 # directly assign the method to cache the testing and access 1746 self.issnapshot = self.index.issnapshot 1747 return self.issnapshot(rev) 1748 if rev == nullrev: 1749 return True 1750 entry = self.index[rev] 1751 base = entry[3] 1752 if base == rev: 1753 return True 1754 if base == nullrev: 1755 return True 1756 p1 = entry[5] 1757 p2 = entry[6] 1758 if base == p1 or base == p2: 1759 return False 1760 return self.issnapshot(base) 1761 1762 def snapshotdepth(self, rev): 1763 """number of snapshot in the chain before this one""" 1764 if not self.issnapshot(rev): 1765 raise error.ProgrammingError(b'revision %d not a snapshot') 1766 return len(self._deltachain(rev)[0]) - 1 1767 1768 def revdiff(self, rev1, rev2): 1769 """return or calculate a delta between two revisions 1770 1771 The delta calculated is in binary form and is intended to be written to 1772 revlog data directly. So this function needs raw revision data. 1773 """ 1774 if rev1 != nullrev and self.deltaparent(rev2) == rev1: 1775 return bytes(self._chunk(rev2)) 1776 1777 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2)) 1778 1779 def _processflags(self, text, flags, operation, raw=False): 1780 """deprecated entry point to access flag processors""" 1781 msg = b'_processflag(...) use the specialized variant' 1782 util.nouideprecwarn(msg, b'5.2', stacklevel=2) 1783 if raw: 1784 return text, flagutil.processflagsraw(self, text, flags) 1785 elif operation == b'read': 1786 return flagutil.processflagsread(self, text, flags) 1787 else: # write operation 1788 return flagutil.processflagswrite(self, text, flags) 1789 1790 def revision(self, nodeorrev, _df=None, raw=False): 1791 """return an uncompressed revision of a given node or revision 1792 number. 1793 1794 _df - an existing file handle to read from. (internal-only) 1795 raw - an optional argument specifying if the revision data is to be 1796 treated as raw data when applying flag transforms. 'raw' should be set 1797 to True when generating changegroups or in debug commands. 1798 """ 1799 if raw: 1800 msg = ( 1801 b'revlog.revision(..., raw=True) is deprecated, ' 1802 b'use revlog.rawdata(...)' 1803 ) 1804 util.nouideprecwarn(msg, b'5.2', stacklevel=2) 1805 return self._revisiondata(nodeorrev, _df, raw=raw) 1806 1807 def sidedata(self, nodeorrev, _df=None): 1808 """a map of extra data related to the changeset but not part of the hash 1809 1810 This function currently return a dictionary. However, more advanced 1811 mapping object will likely be used in the future for a more 1812 efficient/lazy code. 1813 """ 1814 # deal with <nodeorrev> argument type 1815 if isinstance(nodeorrev, int): 1816 rev = nodeorrev 1817 else: 1818 rev = self.rev(nodeorrev) 1819 return self._sidedata(rev) 1820 1821 def _revisiondata(self, nodeorrev, _df=None, raw=False): 1822 # deal with <nodeorrev> argument type 1823 if isinstance(nodeorrev, int): 1824 rev = nodeorrev 1825 node = self.node(rev) 1826 else: 1827 node = nodeorrev 1828 rev = None 1829 1830 # fast path the special `nullid` rev 1831 if node == self.nullid: 1832 return b"" 1833 1834 # ``rawtext`` is the text as stored inside the revlog. Might be the 1835 # revision or might need to be processed to retrieve the revision. 1836 rev, rawtext, validated = self._rawtext(node, rev, _df=_df) 1837 1838 if raw and validated: 1839 # if we don't want to process the raw text and that raw 1840 # text is cached, we can exit early. 1841 return rawtext 1842 if rev is None: 1843 rev = self.rev(node) 1844 # the revlog's flag for this revision 1845 # (usually alter its state or content) 1846 flags = self.flags(rev) 1847 1848 if validated and flags == REVIDX_DEFAULT_FLAGS: 1849 # no extra flags set, no flag processor runs, text = rawtext 1850 return rawtext 1851 1852 if raw: 1853 validatehash = flagutil.processflagsraw(self, rawtext, flags) 1854 text = rawtext 1855 else: 1856 r = flagutil.processflagsread(self, rawtext, flags) 1857 text, validatehash = r 1858 if validatehash: 1859 self.checkhash(text, node, rev=rev) 1860 if not validated: 1861 self._revisioncache = (node, rev, rawtext) 1862 1863 return text 1864 1865 def _rawtext(self, node, rev, _df=None): 1866 """return the possibly unvalidated rawtext for a revision 1867 1868 returns (rev, rawtext, validated) 1869 """ 1870 1871 # revision in the cache (could be useful to apply delta) 1872 cachedrev = None 1873 # An intermediate text to apply deltas to 1874 basetext = None 1875 1876 # Check if we have the entry in cache 1877 # The cache entry looks like (node, rev, rawtext) 1878 if self._revisioncache: 1879 if self._revisioncache[0] == node: 1880 return (rev, self._revisioncache[2], True) 1881 cachedrev = self._revisioncache[1] 1882 1883 if rev is None: 1884 rev = self.rev(node) 1885 1886 chain, stopped = self._deltachain(rev, stoprev=cachedrev) 1887 if stopped: 1888 basetext = self._revisioncache[2] 1889 1890 # drop cache to save memory, the caller is expected to 1891 # update self._revisioncache after validating the text 1892 self._revisioncache = None 1893 1894 targetsize = None 1895 rawsize = self.index[rev][2] 1896 if 0 <= rawsize: 1897 targetsize = 4 * rawsize 1898 1899 bins = self._chunks(chain, df=_df, targetsize=targetsize) 1900 if basetext is None: 1901 basetext = bytes(bins[0]) 1902 bins = bins[1:] 1903 1904 rawtext = mdiff.patches(basetext, bins) 1905 del basetext # let us have a chance to free memory early 1906 return (rev, rawtext, False) 1907 1908 def _sidedata(self, rev): 1909 """Return the sidedata for a given revision number.""" 1910 index_entry = self.index[rev] 1911 sidedata_offset = index_entry[8] 1912 sidedata_size = index_entry[9] 1913 1914 if self._inline: 1915 sidedata_offset += self.index.entry_size * (1 + rev) 1916 if sidedata_size == 0: 1917 return {} 1918 1919 if self._docket.sidedata_end < sidedata_offset + sidedata_size: 1920 filename = self._sidedatafile 1921 end = self._docket.sidedata_end 1922 offset = sidedata_offset 1923 length = sidedata_size 1924 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end) 1925 raise error.RevlogError(m) 1926 1927 comp_segment = self._segmentfile_sidedata.read_chunk( 1928 sidedata_offset, sidedata_size 1929 ) 1930 1931 comp = self.index[rev][11] 1932 if comp == COMP_MODE_PLAIN: 1933 segment = comp_segment 1934 elif comp == COMP_MODE_DEFAULT: 1935 segment = self._decompressor(comp_segment) 1936 elif comp == COMP_MODE_INLINE: 1937 segment = self.decompress(comp_segment) 1938 else: 1939 msg = b'unknown compression mode %d' 1940 msg %= comp 1941 raise error.RevlogError(msg) 1942 1943 sidedata = sidedatautil.deserialize_sidedata(segment) 1944 return sidedata 1945 1946 def rawdata(self, nodeorrev, _df=None): 1947 """return an uncompressed raw data of a given node or revision number. 1948 1949 _df - an existing file handle to read from. (internal-only) 1950 """ 1951 return self._revisiondata(nodeorrev, _df, raw=True) 1952 1953 def hash(self, text, p1, p2): 1954 """Compute a node hash. 1955 1956 Available as a function so that subclasses can replace the hash 1957 as needed. 1958 """ 1959 return storageutil.hashrevisionsha1(text, p1, p2) 1960 1961 def checkhash(self, text, node, p1=None, p2=None, rev=None): 1962 """Check node hash integrity. 1963 1964 Available as a function so that subclasses can extend hash mismatch 1965 behaviors as needed. 1966 """ 1967 try: 1968 if p1 is None and p2 is None: 1969 p1, p2 = self.parents(node) 1970 if node != self.hash(text, p1, p2): 1971 # Clear the revision cache on hash failure. The revision cache 1972 # only stores the raw revision and clearing the cache does have 1973 # the side-effect that we won't have a cache hit when the raw 1974 # revision data is accessed. But this case should be rare and 1975 # it is extra work to teach the cache about the hash 1976 # verification state. 1977 if self._revisioncache and self._revisioncache[0] == node: 1978 self._revisioncache = None 1979 1980 revornode = rev 1981 if revornode is None: 1982 revornode = templatefilters.short(hex(node)) 1983 raise error.RevlogError( 1984 _(b"integrity check failed on %s:%s") 1985 % (self.display_id, pycompat.bytestr(revornode)) 1986 ) 1987 except error.RevlogError: 1988 if self._censorable and storageutil.iscensoredtext(text): 1989 raise error.CensoredNodeError(self.display_id, node, text) 1990 raise 1991 1992 def _enforceinlinesize(self, tr): 1993 """Check if the revlog is too big for inline and convert if so. 1994 1995 This should be called after revisions are added to the revlog. If the 1996 revlog has grown too large to be an inline revlog, it will convert it 1997 to use multiple index and data files. 1998 """ 1999 tiprev = len(self) - 1 2000 total_size = self.start(tiprev) + self.length(tiprev) 2001 if not self._inline or total_size < _maxinline: 2002 return 2003 2004 troffset = tr.findoffset(self._indexfile) 2005 if troffset is None: 2006 raise error.RevlogError( 2007 _(b"%s not found in the transaction") % self._indexfile 2008 ) 2009 trindex = 0 2010 tr.add(self._datafile, 0) 2011 2012 existing_handles = False 2013 if self._writinghandles is not None: 2014 existing_handles = True 2015 fp = self._writinghandles[0] 2016 fp.flush() 2017 fp.close() 2018 # We can't use the cached file handle after close(). So prevent 2019 # its usage. 2020 self._writinghandles = None 2021 self._segmentfile.writing_handle = None 2022 # No need to deal with sidedata writing handle as it is only 2023 # relevant with revlog-v2 which is never inline, not reaching 2024 # this code 2025 2026 new_dfh = self._datafp(b'w+') 2027 new_dfh.truncate(0) # drop any potentially existing data 2028 try: 2029 with self._indexfp() as read_ifh: 2030 for r in self: 2031 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1]) 2032 if troffset <= self.start(r) + r * self.index.entry_size: 2033 trindex = r 2034 new_dfh.flush() 2035 2036 with self.__index_new_fp() as fp: 2037 self._format_flags &= ~FLAG_INLINE_DATA 2038 self._inline = False 2039 for i in self: 2040 e = self.index.entry_binary(i) 2041 if i == 0 and self._docket is None: 2042 header = self._format_flags | self._format_version 2043 header = self.index.pack_header(header) 2044 e = header + e 2045 fp.write(e) 2046 if self._docket is not None: 2047 self._docket.index_end = fp.tell() 2048 2049 # There is a small transactional race here. If the rename of 2050 # the index fails, we should remove the datafile. It is more 2051 # important to ensure that the data file is not truncated 2052 # when the index is replaced as otherwise data is lost. 2053 tr.replace(self._datafile, self.start(trindex)) 2054 2055 # the temp file replace the real index when we exit the context 2056 # manager 2057 2058 tr.replace(self._indexfile, trindex * self.index.entry_size) 2059 nodemaputil.setup_persistent_nodemap(tr, self) 2060 self._segmentfile = randomaccessfile.randomaccessfile( 2061 self.opener, 2062 self._datafile, 2063 self._chunkcachesize, 2064 ) 2065 2066 if existing_handles: 2067 # switched from inline to conventional reopen the index 2068 ifh = self.__index_write_fp() 2069 self._writinghandles = (ifh, new_dfh, None) 2070 self._segmentfile.writing_handle = new_dfh 2071 new_dfh = None 2072 # No need to deal with sidedata writing handle as it is only 2073 # relevant with revlog-v2 which is never inline, not reaching 2074 # this code 2075 finally: 2076 if new_dfh is not None: 2077 new_dfh.close() 2078 2079 def _nodeduplicatecallback(self, transaction, node): 2080 """called when trying to add a node already stored.""" 2081 2082 @contextlib.contextmanager 2083 def reading(self): 2084 """Context manager that keeps data and sidedata files open for reading""" 2085 with self._segmentfile.reading(): 2086 with self._segmentfile_sidedata.reading(): 2087 yield 2088 2089 @contextlib.contextmanager 2090 def _writing(self, transaction): 2091 if self._trypending: 2092 msg = b'try to write in a `trypending` revlog: %s' 2093 msg %= self.display_id 2094 raise error.ProgrammingError(msg) 2095 if self._writinghandles is not None: 2096 yield 2097 else: 2098 ifh = dfh = sdfh = None 2099 try: 2100 r = len(self) 2101 # opening the data file. 2102 dsize = 0 2103 if r: 2104 dsize = self.end(r - 1) 2105 dfh = None 2106 if not self._inline: 2107 try: 2108 dfh = self._datafp(b"r+") 2109 if self._docket is None: 2110 dfh.seek(0, os.SEEK_END) 2111 else: 2112 dfh.seek(self._docket.data_end, os.SEEK_SET) 2113 except IOError as inst: 2114 if inst.errno != errno.ENOENT: 2115 raise 2116 dfh = self._datafp(b"w+") 2117 transaction.add(self._datafile, dsize) 2118 if self._sidedatafile is not None: 2119 # revlog-v2 does not inline, help Pytype 2120 assert dfh is not None 2121 try: 2122 sdfh = self.opener(self._sidedatafile, mode=b"r+") 2123 dfh.seek(self._docket.sidedata_end, os.SEEK_SET) 2124 except IOError as inst: 2125 if inst.errno != errno.ENOENT: 2126 raise 2127 sdfh = self.opener(self._sidedatafile, mode=b"w+") 2128 transaction.add( 2129 self._sidedatafile, self._docket.sidedata_end 2130 ) 2131 2132 # opening the index file. 2133 isize = r * self.index.entry_size 2134 ifh = self.__index_write_fp() 2135 if self._inline: 2136 transaction.add(self._indexfile, dsize + isize) 2137 else: 2138 transaction.add(self._indexfile, isize) 2139 # exposing all file handle for writing. 2140 self._writinghandles = (ifh, dfh, sdfh) 2141 self._segmentfile.writing_handle = ifh if self._inline else dfh 2142 self._segmentfile_sidedata.writing_handle = sdfh 2143 yield 2144 if self._docket is not None: 2145 self._write_docket(transaction) 2146 finally: 2147 self._writinghandles = None 2148 self._segmentfile.writing_handle = None 2149 self._segmentfile_sidedata.writing_handle = None 2150 if dfh is not None: 2151 dfh.close() 2152 if sdfh is not None: 2153 sdfh.close() 2154 # closing the index file last to avoid exposing referent to 2155 # potential unflushed data content. 2156 if ifh is not None: 2157 ifh.close() 2158 2159 def _write_docket(self, transaction): 2160 """write the current docket on disk 2161 2162 Exist as a method to help changelog to implement transaction logic 2163 2164 We could also imagine using the same transaction logic for all revlog 2165 since docket are cheap.""" 2166 self._docket.write(transaction) 2167 2168 def addrevision( 2169 self, 2170 text, 2171 transaction, 2172 link, 2173 p1, 2174 p2, 2175 cachedelta=None, 2176 node=None, 2177 flags=REVIDX_DEFAULT_FLAGS, 2178 deltacomputer=None, 2179 sidedata=None, 2180 ): 2181 """add a revision to the log 2182 2183 text - the revision data to add 2184 transaction - the transaction object used for rollback 2185 link - the linkrev data to add 2186 p1, p2 - the parent nodeids of the revision 2187 cachedelta - an optional precomputed delta 2188 node - nodeid of revision; typically node is not specified, and it is 2189 computed by default as hash(text, p1, p2), however subclasses might 2190 use different hashing method (and override checkhash() in such case) 2191 flags - the known flags to set on the revision 2192 deltacomputer - an optional deltacomputer instance shared between 2193 multiple calls 2194 """ 2195 if link == nullrev: 2196 raise error.RevlogError( 2197 _(b"attempted to add linkrev -1 to %s") % self.display_id 2198 ) 2199 2200 if sidedata is None: 2201 sidedata = {} 2202 elif sidedata and not self.hassidedata: 2203 raise error.ProgrammingError( 2204 _(b"trying to add sidedata to a revlog who don't support them") 2205 ) 2206 2207 if flags: 2208 node = node or self.hash(text, p1, p2) 2209 2210 rawtext, validatehash = flagutil.processflagswrite(self, text, flags) 2211 2212 # If the flag processor modifies the revision data, ignore any provided 2213 # cachedelta. 2214 if rawtext != text: 2215 cachedelta = None 2216 2217 if len(rawtext) > _maxentrysize: 2218 raise error.RevlogError( 2219 _( 2220 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB" 2221 ) 2222 % (self.display_id, len(rawtext)) 2223 ) 2224 2225 node = node or self.hash(rawtext, p1, p2) 2226 rev = self.index.get_rev(node) 2227 if rev is not None: 2228 return rev 2229 2230 if validatehash: 2231 self.checkhash(rawtext, node, p1=p1, p2=p2) 2232 2233 return self.addrawrevision( 2234 rawtext, 2235 transaction, 2236 link, 2237 p1, 2238 p2, 2239 node, 2240 flags, 2241 cachedelta=cachedelta, 2242 deltacomputer=deltacomputer, 2243 sidedata=sidedata, 2244 ) 2245 2246 def addrawrevision( 2247 self, 2248 rawtext, 2249 transaction, 2250 link, 2251 p1, 2252 p2, 2253 node, 2254 flags, 2255 cachedelta=None, 2256 deltacomputer=None, 2257 sidedata=None, 2258 ): 2259 """add a raw revision with known flags, node and parents 2260 useful when reusing a revision not stored in this revlog (ex: received 2261 over wire, or read from an external bundle). 2262 """ 2263 with self._writing(transaction): 2264 return self._addrevision( 2265 node, 2266 rawtext, 2267 transaction, 2268 link, 2269 p1, 2270 p2, 2271 flags, 2272 cachedelta, 2273 deltacomputer=deltacomputer, 2274 sidedata=sidedata, 2275 ) 2276 2277 def compress(self, data): 2278 """Generate a possibly-compressed representation of data.""" 2279 if not data: 2280 return b'', data 2281 2282 compressed = self._compressor.compress(data) 2283 2284 if compressed: 2285 # The revlog compressor added the header in the returned data. 2286 return b'', compressed 2287 2288 if data[0:1] == b'\0': 2289 return b'', data 2290 return b'u', data 2291 2292 def decompress(self, data): 2293 """Decompress a revlog chunk. 2294 2295 The chunk is expected to begin with a header identifying the 2296 format type so it can be routed to an appropriate decompressor. 2297 """ 2298 if not data: 2299 return data 2300 2301 # Revlogs are read much more frequently than they are written and many 2302 # chunks only take microseconds to decompress, so performance is 2303 # important here. 2304 # 2305 # We can make a few assumptions about revlogs: 2306 # 2307 # 1) the majority of chunks will be compressed (as opposed to inline 2308 # raw data). 2309 # 2) decompressing *any* data will likely by at least 10x slower than 2310 # returning raw inline data. 2311 # 3) we want to prioritize common and officially supported compression 2312 # engines 2313 # 2314 # It follows that we want to optimize for "decompress compressed data 2315 # when encoded with common and officially supported compression engines" 2316 # case over "raw data" and "data encoded by less common or non-official 2317 # compression engines." That is why we have the inline lookup first 2318 # followed by the compengines lookup. 2319 # 2320 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib 2321 # compressed chunks. And this matters for changelog and manifest reads. 2322 t = data[0:1] 2323 2324 if t == b'x': 2325 try: 2326 return _zlibdecompress(data) 2327 except zlib.error as e: 2328 raise error.RevlogError( 2329 _(b'revlog decompress error: %s') 2330 % stringutil.forcebytestr(e) 2331 ) 2332 # '\0' is more common than 'u' so it goes first. 2333 elif t == b'\0': 2334 return data 2335 elif t == b'u': 2336 return util.buffer(data, 1) 2337 2338 compressor = self._get_decompressor(t) 2339 2340 return compressor.decompress(data) 2341 2342 def _addrevision( 2343 self, 2344 node, 2345 rawtext, 2346 transaction, 2347 link, 2348 p1, 2349 p2, 2350 flags, 2351 cachedelta, 2352 alwayscache=False, 2353 deltacomputer=None, 2354 sidedata=None, 2355 ): 2356 """internal function to add revisions to the log 2357 2358 see addrevision for argument descriptions. 2359 2360 note: "addrevision" takes non-raw text, "_addrevision" takes raw text. 2361 2362 if "deltacomputer" is not provided or None, a defaultdeltacomputer will 2363 be used. 2364 2365 invariants: 2366 - rawtext is optional (can be None); if not set, cachedelta must be set. 2367 if both are set, they must correspond to each other. 2368 """ 2369 if node == self.nullid: 2370 raise error.RevlogError( 2371 _(b"%s: attempt to add null revision") % self.display_id 2372 ) 2373 if ( 2374 node == self.nodeconstants.wdirid 2375 or node in self.nodeconstants.wdirfilenodeids 2376 ): 2377 raise error.RevlogError( 2378 _(b"%s: attempt to add wdir revision") % self.display_id 2379 ) 2380 if self._writinghandles is None: 2381 msg = b'adding revision outside `revlog._writing` context' 2382 raise error.ProgrammingError(msg) 2383 2384 if self._inline: 2385 fh = self._writinghandles[0] 2386 else: 2387 fh = self._writinghandles[1] 2388 2389 btext = [rawtext] 2390 2391 curr = len(self) 2392 prev = curr - 1 2393 2394 offset = self._get_data_offset(prev) 2395 2396 if self._concurrencychecker: 2397 ifh, dfh, sdfh = self._writinghandles 2398 # XXX no checking for the sidedata file 2399 if self._inline: 2400 # offset is "as if" it were in the .d file, so we need to add on 2401 # the size of the entry metadata. 2402 self._concurrencychecker( 2403 ifh, self._indexfile, offset + curr * self.index.entry_size 2404 ) 2405 else: 2406 # Entries in the .i are a consistent size. 2407 self._concurrencychecker( 2408 ifh, self._indexfile, curr * self.index.entry_size 2409 ) 2410 self._concurrencychecker(dfh, self._datafile, offset) 2411 2412 p1r, p2r = self.rev(p1), self.rev(p2) 2413 2414 # full versions are inserted when the needed deltas 2415 # become comparable to the uncompressed text 2416 if rawtext is None: 2417 # need rawtext size, before changed by flag processors, which is 2418 # the non-raw size. use revlog explicitly to avoid filelog's extra 2419 # logic that might remove metadata size. 2420 textlen = mdiff.patchedsize( 2421 revlog.size(self, cachedelta[0]), cachedelta[1] 2422 ) 2423 else: 2424 textlen = len(rawtext) 2425 2426 if deltacomputer is None: 2427 deltacomputer = deltautil.deltacomputer(self) 2428 2429 revinfo = revlogutils.revisioninfo( 2430 node, 2431 p1, 2432 p2, 2433 btext, 2434 textlen, 2435 cachedelta, 2436 flags, 2437 ) 2438 2439 deltainfo = deltacomputer.finddeltainfo(revinfo, fh) 2440 2441 compression_mode = COMP_MODE_INLINE 2442 if self._docket is not None: 2443 default_comp = self._docket.default_compression_header 2444 r = deltautil.delta_compression(default_comp, deltainfo) 2445 compression_mode, deltainfo = r 2446 2447 sidedata_compression_mode = COMP_MODE_INLINE 2448 if sidedata and self.hassidedata: 2449 sidedata_compression_mode = COMP_MODE_PLAIN 2450 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) 2451 sidedata_offset = self._docket.sidedata_end 2452 h, comp_sidedata = self.compress(serialized_sidedata) 2453 if ( 2454 h != b'u' 2455 and comp_sidedata[0:1] != b'\0' 2456 and len(comp_sidedata) < len(serialized_sidedata) 2457 ): 2458 assert not h 2459 if ( 2460 comp_sidedata[0:1] 2461 == self._docket.default_compression_header 2462 ): 2463 sidedata_compression_mode = COMP_MODE_DEFAULT 2464 serialized_sidedata = comp_sidedata 2465 else: 2466 sidedata_compression_mode = COMP_MODE_INLINE 2467 serialized_sidedata = comp_sidedata 2468 else: 2469 serialized_sidedata = b"" 2470 # Don't store the offset if the sidedata is empty, that way 2471 # we can easily detect empty sidedata and they will be no different 2472 # than ones we manually add. 2473 sidedata_offset = 0 2474 2475 e = revlogutils.entry( 2476 flags=flags, 2477 data_offset=offset, 2478 data_compressed_length=deltainfo.deltalen, 2479 data_uncompressed_length=textlen, 2480 data_compression_mode=compression_mode, 2481 data_delta_base=deltainfo.base, 2482 link_rev=link, 2483 parent_rev_1=p1r, 2484 parent_rev_2=p2r, 2485 node_id=node, 2486 sidedata_offset=sidedata_offset, 2487 sidedata_compressed_length=len(serialized_sidedata), 2488 sidedata_compression_mode=sidedata_compression_mode, 2489 ) 2490 2491 self.index.append(e) 2492 entry = self.index.entry_binary(curr) 2493 if curr == 0 and self._docket is None: 2494 header = self._format_flags | self._format_version 2495 header = self.index.pack_header(header) 2496 entry = header + entry 2497 self._writeentry( 2498 transaction, 2499 entry, 2500 deltainfo.data, 2501 link, 2502 offset, 2503 serialized_sidedata, 2504 sidedata_offset, 2505 ) 2506 2507 rawtext = btext[0] 2508 2509 if alwayscache and rawtext is None: 2510 rawtext = deltacomputer.buildtext(revinfo, fh) 2511 2512 if type(rawtext) == bytes: # only accept immutable objects 2513 self._revisioncache = (node, curr, rawtext) 2514 self._chainbasecache[curr] = deltainfo.chainbase 2515 return curr 2516 2517 def _get_data_offset(self, prev): 2518 """Returns the current offset in the (in-transaction) data file. 2519 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket 2520 file to store that information: since sidedata can be rewritten to the 2521 end of the data file within a transaction, you can have cases where, for 2522 example, rev `n` does not have sidedata while rev `n - 1` does, leading 2523 to `n - 1`'s sidedata being written after `n`'s data. 2524 2525 TODO cache this in a docket file before getting out of experimental.""" 2526 if self._docket is None: 2527 return self.end(prev) 2528 else: 2529 return self._docket.data_end 2530 2531 def _writeentry( 2532 self, transaction, entry, data, link, offset, sidedata, sidedata_offset 2533 ): 2534 # Files opened in a+ mode have inconsistent behavior on various 2535 # platforms. Windows requires that a file positioning call be made 2536 # when the file handle transitions between reads and writes. See 2537 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other 2538 # platforms, Python or the platform itself can be buggy. Some versions 2539 # of Solaris have been observed to not append at the end of the file 2540 # if the file was seeked to before the end. See issue4943 for more. 2541 # 2542 # We work around this issue by inserting a seek() before writing. 2543 # Note: This is likely not necessary on Python 3. However, because 2544 # the file handle is reused for reads and may be seeked there, we need 2545 # to be careful before changing this. 2546 if self._writinghandles is None: 2547 msg = b'adding revision outside `revlog._writing` context' 2548 raise error.ProgrammingError(msg) 2549 ifh, dfh, sdfh = self._writinghandles 2550 if self._docket is None: 2551 ifh.seek(0, os.SEEK_END) 2552 else: 2553 ifh.seek(self._docket.index_end, os.SEEK_SET) 2554 if dfh: 2555 if self._docket is None: 2556 dfh.seek(0, os.SEEK_END) 2557 else: 2558 dfh.seek(self._docket.data_end, os.SEEK_SET) 2559 if sdfh: 2560 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET) 2561 2562 curr = len(self) - 1 2563 if not self._inline: 2564 transaction.add(self._datafile, offset) 2565 if self._sidedatafile: 2566 transaction.add(self._sidedatafile, sidedata_offset) 2567 transaction.add(self._indexfile, curr * len(entry)) 2568 if data[0]: 2569 dfh.write(data[0]) 2570 dfh.write(data[1]) 2571 if sidedata: 2572 sdfh.write(sidedata) 2573 ifh.write(entry) 2574 else: 2575 offset += curr * self.index.entry_size 2576 transaction.add(self._indexfile, offset) 2577 ifh.write(entry) 2578 ifh.write(data[0]) 2579 ifh.write(data[1]) 2580 assert not sidedata 2581 self._enforceinlinesize(transaction) 2582 if self._docket is not None: 2583 # revlog-v2 always has 3 writing handles, help Pytype 2584 wh1 = self._writinghandles[0] 2585 wh2 = self._writinghandles[1] 2586 wh3 = self._writinghandles[2] 2587 assert wh1 is not None 2588 assert wh2 is not None 2589 assert wh3 is not None 2590 self._docket.index_end = wh1.tell() 2591 self._docket.data_end = wh2.tell() 2592 self._docket.sidedata_end = wh3.tell() 2593 2594 nodemaputil.setup_persistent_nodemap(transaction, self) 2595 2596 def addgroup( 2597 self, 2598 deltas, 2599 linkmapper, 2600 transaction, 2601 alwayscache=False, 2602 addrevisioncb=None, 2603 duplicaterevisioncb=None, 2604 ): 2605 """ 2606 add a delta group 2607 2608 given a set of deltas, add them to the revision log. the 2609 first delta is against its parent, which should be in our 2610 log, the rest are against the previous delta. 2611 2612 If ``addrevisioncb`` is defined, it will be called with arguments of 2613 this revlog and the node that was added. 2614 """ 2615 2616 if self._adding_group: 2617 raise error.ProgrammingError(b'cannot nest addgroup() calls') 2618 2619 self._adding_group = True 2620 empty = True 2621 try: 2622 with self._writing(transaction): 2623 deltacomputer = deltautil.deltacomputer(self) 2624 # loop through our set of deltas 2625 for data in deltas: 2626 ( 2627 node, 2628 p1, 2629 p2, 2630 linknode, 2631 deltabase, 2632 delta, 2633 flags, 2634 sidedata, 2635 ) = data 2636 link = linkmapper(linknode) 2637 flags = flags or REVIDX_DEFAULT_FLAGS 2638 2639 rev = self.index.get_rev(node) 2640 if rev is not None: 2641 # this can happen if two branches make the same change 2642 self._nodeduplicatecallback(transaction, rev) 2643 if duplicaterevisioncb: 2644 duplicaterevisioncb(self, rev) 2645 empty = False 2646 continue 2647 2648 for p in (p1, p2): 2649 if not self.index.has_node(p): 2650 raise error.LookupError( 2651 p, self.radix, _(b'unknown parent') 2652 ) 2653 2654 if not self.index.has_node(deltabase): 2655 raise error.LookupError( 2656 deltabase, self.display_id, _(b'unknown delta base') 2657 ) 2658 2659 baserev = self.rev(deltabase) 2660 2661 if baserev != nullrev and self.iscensored(baserev): 2662 # if base is censored, delta must be full replacement in a 2663 # single patch operation 2664 hlen = struct.calcsize(b">lll") 2665 oldlen = self.rawsize(baserev) 2666 newlen = len(delta) - hlen 2667 if delta[:hlen] != mdiff.replacediffheader( 2668 oldlen, newlen 2669 ): 2670 raise error.CensoredBaseError( 2671 self.display_id, self.node(baserev) 2672 ) 2673 2674 if not flags and self._peek_iscensored(baserev, delta): 2675 flags |= REVIDX_ISCENSORED 2676 2677 # We assume consumers of addrevisioncb will want to retrieve 2678 # the added revision, which will require a call to 2679 # revision(). revision() will fast path if there is a cache 2680 # hit. So, we tell _addrevision() to always cache in this case. 2681 # We're only using addgroup() in the context of changegroup 2682 # generation so the revision data can always be handled as raw 2683 # by the flagprocessor. 2684 rev = self._addrevision( 2685 node, 2686 None, 2687 transaction, 2688 link, 2689 p1, 2690 p2, 2691 flags, 2692 (baserev, delta), 2693 alwayscache=alwayscache, 2694 deltacomputer=deltacomputer, 2695 sidedata=sidedata, 2696 ) 2697 2698 if addrevisioncb: 2699 addrevisioncb(self, rev) 2700 empty = False 2701 finally: 2702 self._adding_group = False 2703 return not empty 2704 2705 def iscensored(self, rev): 2706 """Check if a file revision is censored.""" 2707 if not self._censorable: 2708 return False 2709 2710 return self.flags(rev) & REVIDX_ISCENSORED 2711 2712 def _peek_iscensored(self, baserev, delta): 2713 """Quickly check if a delta produces a censored revision.""" 2714 if not self._censorable: 2715 return False 2716 2717 return storageutil.deltaiscensored(delta, baserev, self.rawsize) 2718 2719 def getstrippoint(self, minlink): 2720 """find the minimum rev that must be stripped to strip the linkrev 2721 2722 Returns a tuple containing the minimum rev and a set of all revs that 2723 have linkrevs that will be broken by this strip. 2724 """ 2725 return storageutil.resolvestripinfo( 2726 minlink, 2727 len(self) - 1, 2728 self.headrevs(), 2729 self.linkrev, 2730 self.parentrevs, 2731 ) 2732 2733 def strip(self, minlink, transaction): 2734 """truncate the revlog on the first revision with a linkrev >= minlink 2735 2736 This function is called when we're stripping revision minlink and 2737 its descendants from the repository. 2738 2739 We have to remove all revisions with linkrev >= minlink, because 2740 the equivalent changelog revisions will be renumbered after the 2741 strip. 2742 2743 So we truncate the revlog on the first of these revisions, and 2744 trust that the caller has saved the revisions that shouldn't be 2745 removed and that it'll re-add them after this truncation. 2746 """ 2747 if len(self) == 0: 2748 return 2749 2750 rev, _ = self.getstrippoint(minlink) 2751 if rev == len(self): 2752 return 2753 2754 # first truncate the files on disk 2755 data_end = self.start(rev) 2756 if not self._inline: 2757 transaction.add(self._datafile, data_end) 2758 end = rev * self.index.entry_size 2759 else: 2760 end = data_end + (rev * self.index.entry_size) 2761 2762 if self._sidedatafile: 2763 sidedata_end = self.sidedata_cut_off(rev) 2764 transaction.add(self._sidedatafile, sidedata_end) 2765 2766 transaction.add(self._indexfile, end) 2767 if self._docket is not None: 2768 # XXX we could, leverage the docket while stripping. However it is 2769 # not powerfull enough at the time of this comment 2770 self._docket.index_end = end 2771 self._docket.data_end = data_end 2772 self._docket.sidedata_end = sidedata_end 2773 self._docket.write(transaction, stripping=True) 2774 2775 # then reset internal state in memory to forget those revisions 2776 self._revisioncache = None 2777 self._chaininfocache = util.lrucachedict(500) 2778 self._segmentfile.clear_cache() 2779 self._segmentfile_sidedata.clear_cache() 2780 2781 del self.index[rev:-1] 2782 2783 def checksize(self): 2784 """Check size of index and data files 2785 2786 return a (dd, di) tuple. 2787 - dd: extra bytes for the "data" file 2788 - di: extra bytes for the "index" file 2789 2790 A healthy revlog will return (0, 0). 2791 """ 2792 expected = 0 2793 if len(self): 2794 expected = max(0, self.end(len(self) - 1)) 2795 2796 try: 2797 with self._datafp() as f: 2798 f.seek(0, io.SEEK_END) 2799 actual = f.tell() 2800 dd = actual - expected 2801 except IOError as inst: 2802 if inst.errno != errno.ENOENT: 2803 raise 2804 dd = 0 2805 2806 try: 2807 f = self.opener(self._indexfile) 2808 f.seek(0, io.SEEK_END) 2809 actual = f.tell() 2810 f.close() 2811 s = self.index.entry_size 2812 i = max(0, actual // s) 2813 di = actual - (i * s) 2814 if self._inline: 2815 databytes = 0 2816 for r in self: 2817 databytes += max(0, self.length(r)) 2818 dd = 0 2819 di = actual - len(self) * s - databytes 2820 except IOError as inst: 2821 if inst.errno != errno.ENOENT: 2822 raise 2823 di = 0 2824 2825 return (dd, di) 2826 2827 def files(self): 2828 res = [self._indexfile] 2829 if self._docket_file is None: 2830 if not self._inline: 2831 res.append(self._datafile) 2832 else: 2833 res.append(self._docket_file) 2834 res.extend(self._docket.old_index_filepaths(include_empty=False)) 2835 if self._docket.data_end: 2836 res.append(self._datafile) 2837 res.extend(self._docket.old_data_filepaths(include_empty=False)) 2838 if self._docket.sidedata_end: 2839 res.append(self._sidedatafile) 2840 res.extend(self._docket.old_sidedata_filepaths(include_empty=False)) 2841 return res 2842 2843 def emitrevisions( 2844 self, 2845 nodes, 2846 nodesorder=None, 2847 revisiondata=False, 2848 assumehaveparentrevisions=False, 2849 deltamode=repository.CG_DELTAMODE_STD, 2850 sidedata_helpers=None, 2851 ): 2852 if nodesorder not in (b'nodes', b'storage', b'linear', None): 2853 raise error.ProgrammingError( 2854 b'unhandled value for nodesorder: %s' % nodesorder 2855 ) 2856 2857 if nodesorder is None and not self._generaldelta: 2858 nodesorder = b'storage' 2859 2860 if ( 2861 not self._storedeltachains 2862 and deltamode != repository.CG_DELTAMODE_PREV 2863 ): 2864 deltamode = repository.CG_DELTAMODE_FULL 2865 2866 return storageutil.emitrevisions( 2867 self, 2868 nodes, 2869 nodesorder, 2870 revlogrevisiondelta, 2871 deltaparentfn=self.deltaparent, 2872 candeltafn=self.candelta, 2873 rawsizefn=self.rawsize, 2874 revdifffn=self.revdiff, 2875 flagsfn=self.flags, 2876 deltamode=deltamode, 2877 revisiondata=revisiondata, 2878 assumehaveparentrevisions=assumehaveparentrevisions, 2879 sidedata_helpers=sidedata_helpers, 2880 ) 2881 2882 DELTAREUSEALWAYS = b'always' 2883 DELTAREUSESAMEREVS = b'samerevs' 2884 DELTAREUSENEVER = b'never' 2885 2886 DELTAREUSEFULLADD = b'fulladd' 2887 2888 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'} 2889 2890 def clone( 2891 self, 2892 tr, 2893 destrevlog, 2894 addrevisioncb=None, 2895 deltareuse=DELTAREUSESAMEREVS, 2896 forcedeltabothparents=None, 2897 sidedata_helpers=None, 2898 ): 2899 """Copy this revlog to another, possibly with format changes. 2900 2901 The destination revlog will contain the same revisions and nodes. 2902 However, it may not be bit-for-bit identical due to e.g. delta encoding 2903 differences. 2904 2905 The ``deltareuse`` argument control how deltas from the existing revlog 2906 are preserved in the destination revlog. The argument can have the 2907 following values: 2908 2909 DELTAREUSEALWAYS 2910 Deltas will always be reused (if possible), even if the destination 2911 revlog would not select the same revisions for the delta. This is the 2912 fastest mode of operation. 2913 DELTAREUSESAMEREVS 2914 Deltas will be reused if the destination revlog would pick the same 2915 revisions for the delta. This mode strikes a balance between speed 2916 and optimization. 2917 DELTAREUSENEVER 2918 Deltas will never be reused. This is the slowest mode of execution. 2919 This mode can be used to recompute deltas (e.g. if the diff/delta 2920 algorithm changes). 2921 DELTAREUSEFULLADD 2922 Revision will be re-added as if their were new content. This is 2923 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in. 2924 eg: large file detection and handling. 2925 2926 Delta computation can be slow, so the choice of delta reuse policy can 2927 significantly affect run time. 2928 2929 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between 2930 two extremes. Deltas will be reused if they are appropriate. But if the 2931 delta could choose a better revision, it will do so. This means if you 2932 are converting a non-generaldelta revlog to a generaldelta revlog, 2933 deltas will be recomputed if the delta's parent isn't a parent of the 2934 revision. 2935 2936 In addition to the delta policy, the ``forcedeltabothparents`` 2937 argument controls whether to force compute deltas against both parents 2938 for merges. By default, the current default is used. 2939 2940 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on 2941 `sidedata_helpers`. 2942 """ 2943 if deltareuse not in self.DELTAREUSEALL: 2944 raise ValueError( 2945 _(b'value for deltareuse invalid: %s') % deltareuse 2946 ) 2947 2948 if len(destrevlog): 2949 raise ValueError(_(b'destination revlog is not empty')) 2950 2951 if getattr(self, 'filteredrevs', None): 2952 raise ValueError(_(b'source revlog has filtered revisions')) 2953 if getattr(destrevlog, 'filteredrevs', None): 2954 raise ValueError(_(b'destination revlog has filtered revisions')) 2955 2956 # lazydelta and lazydeltabase controls whether to reuse a cached delta, 2957 # if possible. 2958 oldlazydelta = destrevlog._lazydelta 2959 oldlazydeltabase = destrevlog._lazydeltabase 2960 oldamd = destrevlog._deltabothparents 2961 2962 try: 2963 if deltareuse == self.DELTAREUSEALWAYS: 2964 destrevlog._lazydeltabase = True 2965 destrevlog._lazydelta = True 2966 elif deltareuse == self.DELTAREUSESAMEREVS: 2967 destrevlog._lazydeltabase = False 2968 destrevlog._lazydelta = True 2969 elif deltareuse == self.DELTAREUSENEVER: 2970 destrevlog._lazydeltabase = False 2971 destrevlog._lazydelta = False 2972 2973 destrevlog._deltabothparents = forcedeltabothparents or oldamd 2974 2975 self._clone( 2976 tr, 2977 destrevlog, 2978 addrevisioncb, 2979 deltareuse, 2980 forcedeltabothparents, 2981 sidedata_helpers, 2982 ) 2983 2984 finally: 2985 destrevlog._lazydelta = oldlazydelta 2986 destrevlog._lazydeltabase = oldlazydeltabase 2987 destrevlog._deltabothparents = oldamd 2988 2989 def _clone( 2990 self, 2991 tr, 2992 destrevlog, 2993 addrevisioncb, 2994 deltareuse, 2995 forcedeltabothparents, 2996 sidedata_helpers, 2997 ): 2998 """perform the core duty of `revlog.clone` after parameter processing""" 2999 deltacomputer = deltautil.deltacomputer(destrevlog) 3000 index = self.index 3001 for rev in self: 3002 entry = index[rev] 3003 3004 # Some classes override linkrev to take filtered revs into 3005 # account. Use raw entry from index. 3006 flags = entry[0] & 0xFFFF 3007 linkrev = entry[4] 3008 p1 = index[entry[5]][7] 3009 p2 = index[entry[6]][7] 3010 node = entry[7] 3011 3012 # (Possibly) reuse the delta from the revlog if allowed and 3013 # the revlog chunk is a delta. 3014 cachedelta = None 3015 rawtext = None 3016 if deltareuse == self.DELTAREUSEFULLADD: 3017 text = self._revisiondata(rev) 3018 sidedata = self.sidedata(rev) 3019 3020 if sidedata_helpers is not None: 3021 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers( 3022 self, sidedata_helpers, sidedata, rev 3023 ) 3024 flags = flags | new_flags[0] & ~new_flags[1] 3025 3026 destrevlog.addrevision( 3027 text, 3028 tr, 3029 linkrev, 3030 p1, 3031 p2, 3032 cachedelta=cachedelta, 3033 node=node, 3034 flags=flags, 3035 deltacomputer=deltacomputer, 3036 sidedata=sidedata, 3037 ) 3038 else: 3039 if destrevlog._lazydelta: 3040 dp = self.deltaparent(rev) 3041 if dp != nullrev: 3042 cachedelta = (dp, bytes(self._chunk(rev))) 3043 3044 sidedata = None 3045 if not cachedelta: 3046 rawtext = self._revisiondata(rev) 3047 sidedata = self.sidedata(rev) 3048 if sidedata is None: 3049 sidedata = self.sidedata(rev) 3050 3051 if sidedata_helpers is not None: 3052 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers( 3053 self, sidedata_helpers, sidedata, rev 3054 ) 3055 flags = flags | new_flags[0] & ~new_flags[1] 3056 3057 with destrevlog._writing(tr): 3058 destrevlog._addrevision( 3059 node, 3060 rawtext, 3061 tr, 3062 linkrev, 3063 p1, 3064 p2, 3065 flags, 3066 cachedelta, 3067 deltacomputer=deltacomputer, 3068 sidedata=sidedata, 3069 ) 3070 3071 if addrevisioncb: 3072 addrevisioncb(self, rev, node) 3073 3074 def censorrevision(self, tr, censornode, tombstone=b''): 3075 if self._format_version == REVLOGV0: 3076 raise error.RevlogError( 3077 _(b'cannot censor with version %d revlogs') 3078 % self._format_version 3079 ) 3080 elif self._format_version == REVLOGV1: 3081 rewrite.v1_censor(self, tr, censornode, tombstone) 3082 else: 3083 rewrite.v2_censor(self, tr, censornode, tombstone) 3084 3085 def verifyintegrity(self, state): 3086 """Verifies the integrity of the revlog. 3087 3088 Yields ``revlogproblem`` instances describing problems that are 3089 found. 3090 """ 3091 dd, di = self.checksize() 3092 if dd: 3093 yield revlogproblem(error=_(b'data length off by %d bytes') % dd) 3094 if di: 3095 yield revlogproblem(error=_(b'index contains %d extra bytes') % di) 3096 3097 version = self._format_version 3098 3099 # The verifier tells us what version revlog we should be. 3100 if version != state[b'expectedversion']: 3101 yield revlogproblem( 3102 warning=_(b"warning: '%s' uses revlog format %d; expected %d") 3103 % (self.display_id, version, state[b'expectedversion']) 3104 ) 3105 3106 state[b'skipread'] = set() 3107 state[b'safe_renamed'] = set() 3108 3109 for rev in self: 3110 node = self.node(rev) 3111 3112 # Verify contents. 4 cases to care about: 3113 # 3114 # common: the most common case 3115 # rename: with a rename 3116 # meta: file content starts with b'\1\n', the metadata 3117 # header defined in filelog.py, but without a rename 3118 # ext: content stored externally 3119 # 3120 # More formally, their differences are shown below: 3121 # 3122 # | common | rename | meta | ext 3123 # ------------------------------------------------------- 3124 # flags() | 0 | 0 | 0 | not 0 3125 # renamed() | False | True | False | ? 3126 # rawtext[0:2]=='\1\n'| False | True | True | ? 3127 # 3128 # "rawtext" means the raw text stored in revlog data, which 3129 # could be retrieved by "rawdata(rev)". "text" 3130 # mentioned below is "revision(rev)". 3131 # 3132 # There are 3 different lengths stored physically: 3133 # 1. L1: rawsize, stored in revlog index 3134 # 2. L2: len(rawtext), stored in revlog data 3135 # 3. L3: len(text), stored in revlog data if flags==0, or 3136 # possibly somewhere else if flags!=0 3137 # 3138 # L1 should be equal to L2. L3 could be different from them. 3139 # "text" may or may not affect commit hash depending on flag 3140 # processors (see flagutil.addflagprocessor). 3141 # 3142 # | common | rename | meta | ext 3143 # ------------------------------------------------- 3144 # rawsize() | L1 | L1 | L1 | L1 3145 # size() | L1 | L2-LM | L1(*) | L1 (?) 3146 # len(rawtext) | L2 | L2 | L2 | L2 3147 # len(text) | L2 | L2 | L2 | L3 3148 # len(read()) | L2 | L2-LM | L2-LM | L3 (?) 3149 # 3150 # LM: length of metadata, depending on rawtext 3151 # (*): not ideal, see comment in filelog.size 3152 # (?): could be "- len(meta)" if the resolved content has 3153 # rename metadata 3154 # 3155 # Checks needed to be done: 3156 # 1. length check: L1 == L2, in all cases. 3157 # 2. hash check: depending on flag processor, we may need to 3158 # use either "text" (external), or "rawtext" (in revlog). 3159 3160 try: 3161 skipflags = state.get(b'skipflags', 0) 3162 if skipflags: 3163 skipflags &= self.flags(rev) 3164 3165 _verify_revision(self, skipflags, state, node) 3166 3167 l1 = self.rawsize(rev) 3168 l2 = len(self.rawdata(node)) 3169 3170 if l1 != l2: 3171 yield revlogproblem( 3172 error=_(b'unpacked size is %d, %d expected') % (l2, l1), 3173 node=node, 3174 ) 3175 3176 except error.CensoredNodeError: 3177 if state[b'erroroncensored']: 3178 yield revlogproblem( 3179 error=_(b'censored file data'), node=node 3180 ) 3181 state[b'skipread'].add(node) 3182 except Exception as e: 3183 yield revlogproblem( 3184 error=_(b'unpacking %s: %s') 3185 % (short(node), stringutil.forcebytestr(e)), 3186 node=node, 3187 ) 3188 state[b'skipread'].add(node) 3189 3190 def storageinfo( 3191 self, 3192 exclusivefiles=False, 3193 sharedfiles=False, 3194 revisionscount=False, 3195 trackedsize=False, 3196 storedsize=False, 3197 ): 3198 d = {} 3199 3200 if exclusivefiles: 3201 d[b'exclusivefiles'] = [(self.opener, self._indexfile)] 3202 if not self._inline: 3203 d[b'exclusivefiles'].append((self.opener, self._datafile)) 3204 3205 if sharedfiles: 3206 d[b'sharedfiles'] = [] 3207 3208 if revisionscount: 3209 d[b'revisionscount'] = len(self) 3210 3211 if trackedsize: 3212 d[b'trackedsize'] = sum(map(self.rawsize, iter(self))) 3213 3214 if storedsize: 3215 d[b'storedsize'] = sum( 3216 self.opener.stat(path).st_size for path in self.files() 3217 ) 3218 3219 return d 3220 3221 def rewrite_sidedata(self, transaction, helpers, startrev, endrev): 3222 if not self.hassidedata: 3223 return 3224 # revlog formats with sidedata support does not support inline 3225 assert not self._inline 3226 if not helpers[1] and not helpers[2]: 3227 # Nothing to generate or remove 3228 return 3229 3230 new_entries = [] 3231 # append the new sidedata 3232 with self._writing(transaction): 3233 ifh, dfh, sdfh = self._writinghandles 3234 dfh.seek(self._docket.sidedata_end, os.SEEK_SET) 3235 3236 current_offset = sdfh.tell() 3237 for rev in range(startrev, endrev + 1): 3238 entry = self.index[rev] 3239 new_sidedata, flags = sidedatautil.run_sidedata_helpers( 3240 store=self, 3241 sidedata_helpers=helpers, 3242 sidedata={}, 3243 rev=rev, 3244 ) 3245 3246 serialized_sidedata = sidedatautil.serialize_sidedata( 3247 new_sidedata 3248 ) 3249 3250 sidedata_compression_mode = COMP_MODE_INLINE 3251 if serialized_sidedata and self.hassidedata: 3252 sidedata_compression_mode = COMP_MODE_PLAIN 3253 h, comp_sidedata = self.compress(serialized_sidedata) 3254 if ( 3255 h != b'u' 3256 and comp_sidedata[0] != b'\0' 3257 and len(comp_sidedata) < len(serialized_sidedata) 3258 ): 3259 assert not h 3260 if ( 3261 comp_sidedata[0] 3262 == self._docket.default_compression_header 3263 ): 3264 sidedata_compression_mode = COMP_MODE_DEFAULT 3265 serialized_sidedata = comp_sidedata 3266 else: 3267 sidedata_compression_mode = COMP_MODE_INLINE 3268 serialized_sidedata = comp_sidedata 3269 if entry[8] != 0 or entry[9] != 0: 3270 # rewriting entries that already have sidedata is not 3271 # supported yet, because it introduces garbage data in the 3272 # revlog. 3273 msg = b"rewriting existing sidedata is not supported yet" 3274 raise error.Abort(msg) 3275 3276 # Apply (potential) flags to add and to remove after running 3277 # the sidedata helpers 3278 new_offset_flags = entry[0] | flags[0] & ~flags[1] 3279 entry_update = ( 3280 current_offset, 3281 len(serialized_sidedata), 3282 new_offset_flags, 3283 sidedata_compression_mode, 3284 ) 3285 3286 # the sidedata computation might have move the file cursors around 3287 sdfh.seek(current_offset, os.SEEK_SET) 3288 sdfh.write(serialized_sidedata) 3289 new_entries.append(entry_update) 3290 current_offset += len(serialized_sidedata) 3291 self._docket.sidedata_end = sdfh.tell() 3292 3293 # rewrite the new index entries 3294 ifh.seek(startrev * self.index.entry_size) 3295 for i, e in enumerate(new_entries): 3296 rev = startrev + i 3297 self.index.replace_sidedata_info(rev, *e) 3298 packed = self.index.entry_binary(rev) 3299 if rev == 0 and self._docket is None: 3300 header = self._format_flags | self._format_version 3301 header = self.index.pack_header(header) 3302 packed = header + packed 3303 ifh.write(packed) 3304