1# revlog.py - storage back-end for mercurial
2# coding: utf8
3#
4# Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5#
6# This software may be used and distributed according to the terms of the
7# GNU General Public License version 2 or any later version.
8
9"""Storage back-end for Mercurial.
10
11This provides efficient delta storage with O(1) retrieve and append
12and O(changes) merge between branches.
13"""
14
15from __future__ import absolute_import
16
17import binascii
18import collections
19import contextlib
20import errno
21import io
22import os
23import struct
24import zlib
25
26# import stuff from node for others to import from revlog
27from .node import (
28    bin,
29    hex,
30    nullrev,
31    sha1nodeconstants,
32    short,
33    wdirrev,
34)
35from .i18n import _
36from .pycompat import getattr
37from .revlogutils.constants import (
38    ALL_KINDS,
39    CHANGELOGV2,
40    COMP_MODE_DEFAULT,
41    COMP_MODE_INLINE,
42    COMP_MODE_PLAIN,
43    FEATURES_BY_VERSION,
44    FLAG_GENERALDELTA,
45    FLAG_INLINE_DATA,
46    INDEX_HEADER,
47    KIND_CHANGELOG,
48    REVLOGV0,
49    REVLOGV1,
50    REVLOGV1_FLAGS,
51    REVLOGV2,
52    REVLOGV2_FLAGS,
53    REVLOG_DEFAULT_FLAGS,
54    REVLOG_DEFAULT_FORMAT,
55    REVLOG_DEFAULT_VERSION,
56    SUPPORTED_FLAGS,
57)
58from .revlogutils.flagutil import (
59    REVIDX_DEFAULT_FLAGS,
60    REVIDX_ELLIPSIS,
61    REVIDX_EXTSTORED,
62    REVIDX_FLAGS_ORDER,
63    REVIDX_HASCOPIESINFO,
64    REVIDX_ISCENSORED,
65    REVIDX_RAWTEXT_CHANGING_FLAGS,
66)
67from .thirdparty import attr
68from . import (
69    ancestor,
70    dagop,
71    error,
72    mdiff,
73    policy,
74    pycompat,
75    revlogutils,
76    templatefilters,
77    util,
78)
79from .interfaces import (
80    repository,
81    util as interfaceutil,
82)
83from .revlogutils import (
84    deltas as deltautil,
85    docket as docketutil,
86    flagutil,
87    nodemap as nodemaputil,
88    randomaccessfile,
89    revlogv0,
90    rewrite,
91    sidedata as sidedatautil,
92)
93from .utils import (
94    storageutil,
95    stringutil,
96)
97
98# blanked usage of all the name to prevent pyflakes constraints
99# We need these name available in the module for extensions.
100
101REVLOGV0
102REVLOGV1
103REVLOGV2
104FLAG_INLINE_DATA
105FLAG_GENERALDELTA
106REVLOG_DEFAULT_FLAGS
107REVLOG_DEFAULT_FORMAT
108REVLOG_DEFAULT_VERSION
109REVLOGV1_FLAGS
110REVLOGV2_FLAGS
111REVIDX_ISCENSORED
112REVIDX_ELLIPSIS
113REVIDX_HASCOPIESINFO
114REVIDX_EXTSTORED
115REVIDX_DEFAULT_FLAGS
116REVIDX_FLAGS_ORDER
117REVIDX_RAWTEXT_CHANGING_FLAGS
118
119parsers = policy.importmod('parsers')
120rustancestor = policy.importrust('ancestor')
121rustdagop = policy.importrust('dagop')
122rustrevlog = policy.importrust('revlog')
123
124# Aliased for performance.
125_zlibdecompress = zlib.decompress
126
127# max size of revlog with inline data
128_maxinline = 131072
129
130# Flag processors for REVIDX_ELLIPSIS.
131def ellipsisreadprocessor(rl, text):
132    return text, False
133
134
135def ellipsiswriteprocessor(rl, text):
136    return text, False
137
138
139def ellipsisrawprocessor(rl, text):
140    return False
141
142
143ellipsisprocessor = (
144    ellipsisreadprocessor,
145    ellipsiswriteprocessor,
146    ellipsisrawprocessor,
147)
148
149
150def _verify_revision(rl, skipflags, state, node):
151    """Verify the integrity of the given revlog ``node`` while providing a hook
152    point for extensions to influence the operation."""
153    if skipflags:
154        state[b'skipread'].add(node)
155    else:
156        # Side-effect: read content and verify hash.
157        rl.revision(node)
158
159
160# True if a fast implementation for persistent-nodemap is available
161#
162# We also consider we have a "fast" implementation in "pure" python because
163# people using pure don't really have performance consideration (and a
164# wheelbarrow of other slowness source)
165HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166    parsers, 'BaseIndexObject'
167)
168
169
170@interfaceutil.implementer(repository.irevisiondelta)
171@attr.s(slots=True)
172class revlogrevisiondelta(object):
173    node = attr.ib()
174    p1node = attr.ib()
175    p2node = attr.ib()
176    basenode = attr.ib()
177    flags = attr.ib()
178    baserevisionsize = attr.ib()
179    revision = attr.ib()
180    delta = attr.ib()
181    sidedata = attr.ib()
182    protocol_flags = attr.ib()
183    linknode = attr.ib(default=None)
184
185
186@interfaceutil.implementer(repository.iverifyproblem)
187@attr.s(frozen=True)
188class revlogproblem(object):
189    warning = attr.ib(default=None)
190    error = attr.ib(default=None)
191    node = attr.ib(default=None)
192
193
194def parse_index_v1(data, inline):
195    # call the C implementation to parse the index data
196    index, cache = parsers.parse_index2(data, inline)
197    return index, cache
198
199
200def parse_index_v2(data, inline):
201    # call the C implementation to parse the index data
202    index, cache = parsers.parse_index2(data, inline, revlogv2=True)
203    return index, cache
204
205
206def parse_index_cl_v2(data, inline):
207    # call the C implementation to parse the index data
208    assert not inline
209    from .pure.parsers import parse_index_cl_v2
210
211    index, cache = parse_index_cl_v2(data)
212    return index, cache
213
214
215if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216
217    def parse_index_v1_nodemap(data, inline):
218        index, cache = parsers.parse_index_devel_nodemap(data, inline)
219        return index, cache
220
221
222else:
223    parse_index_v1_nodemap = None
224
225
226def parse_index_v1_mixed(data, inline):
227    index, cache = parse_index_v1(data, inline)
228    return rustrevlog.MixedIndex(index), cache
229
230
231# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232# signed integer)
233_maxentrysize = 0x7FFFFFFF
234
235FILE_TOO_SHORT_MSG = _(
236    b'cannot read from revlog %s;'
237    b'  expected %d bytes from offset %d, data size is %d'
238)
239
240
241class revlog(object):
242    """
243    the underlying revision storage object
244
245    A revlog consists of two parts, an index and the revision data.
246
247    The index is a file with a fixed record size containing
248    information on each revision, including its nodeid (hash), the
249    nodeids of its parents, the position and offset of its data within
250    the data file, and the revision it's based on. Finally, each entry
251    contains a linkrev entry that can serve as a pointer to external
252    data.
253
254    The revision data itself is a linear collection of data chunks.
255    Each chunk represents a revision and is usually represented as a
256    delta against the previous chunk. To bound lookup time, runs of
257    deltas are limited to about 2 times the length of the original
258    version data. This makes retrieval of a version proportional to
259    its size, or O(1) relative to the number of revisions.
260
261    Both pieces of the revlog are written to in an append-only
262    fashion, which means we never need to rewrite a file to insert or
263    remove data, and can use some simple techniques to avoid the need
264    for locking while reading.
265
266    If checkambig, indexfile is opened with checkambig=True at
267    writing, to avoid file stat ambiguity.
268
269    If mmaplargeindex is True, and an mmapindexthreshold is set, the
270    index will be mmapped rather than read if it is larger than the
271    configured threshold.
272
273    If censorable is True, the revlog can have censored revisions.
274
275    If `upperboundcomp` is not None, this is the expected maximal gain from
276    compression for the data content.
277
278    `concurrencychecker` is an optional function that receives 3 arguments: a
279    file handle, a filename, and an expected position. It should check whether
280    the current position in the file handle is valid, and log/warn/fail (by
281    raising).
282
283    See mercurial/revlogutils/contants.py for details about the content of an
284    index entry.
285    """
286
287    _flagserrorclass = error.RevlogError
288
289    def __init__(
290        self,
291        opener,
292        target,
293        radix,
294        postfix=None,  # only exist for `tmpcensored` now
295        checkambig=False,
296        mmaplargeindex=False,
297        censorable=False,
298        upperboundcomp=None,
299        persistentnodemap=False,
300        concurrencychecker=None,
301        trypending=False,
302    ):
303        """
304        create a revlog object
305
306        opener is a function that abstracts the file opening operation
307        and can be used to implement COW semantics or the like.
308
309        `target`: a (KIND, ID) tuple that identify the content stored in
310        this revlog. It help the rest of the code to understand what the revlog
311        is about without having to resort to heuristic and index filename
312        analysis. Note: that this must be reliably be set by normal code, but
313        that test, debug, or performance measurement code might not set this to
314        accurate value.
315        """
316        self.upperboundcomp = upperboundcomp
317
318        self.radix = radix
319
320        self._docket_file = None
321        self._indexfile = None
322        self._datafile = None
323        self._sidedatafile = None
324        self._nodemap_file = None
325        self.postfix = postfix
326        self._trypending = trypending
327        self.opener = opener
328        if persistentnodemap:
329            self._nodemap_file = nodemaputil.get_nodemap_file(self)
330
331        assert target[0] in ALL_KINDS
332        assert len(target) == 2
333        self.target = target
334        #  When True, indexfile is opened with checkambig=True at writing, to
335        #  avoid file stat ambiguity.
336        self._checkambig = checkambig
337        self._mmaplargeindex = mmaplargeindex
338        self._censorable = censorable
339        # 3-tuple of (node, rev, text) for a raw revision.
340        self._revisioncache = None
341        # Maps rev to chain base rev.
342        self._chainbasecache = util.lrucachedict(100)
343        # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344        self._chunkcache = (0, b'')
345        # How much data to read and cache into the raw revlog data cache.
346        self._chunkcachesize = 65536
347        self._maxchainlen = None
348        self._deltabothparents = True
349        self.index = None
350        self._docket = None
351        self._nodemap_docket = None
352        # Mapping of partial identifiers to full nodes.
353        self._pcache = {}
354        # Mapping of revision integer to full node.
355        self._compengine = b'zlib'
356        self._compengineopts = {}
357        self._maxdeltachainspan = -1
358        self._withsparseread = False
359        self._sparserevlog = False
360        self.hassidedata = False
361        self._srdensitythreshold = 0.50
362        self._srmingapsize = 262144
363
364        # Make copy of flag processors so each revlog instance can support
365        # custom flags.
366        self._flagprocessors = dict(flagutil.flagprocessors)
367
368        # 3-tuple of file handles being used for active writing.
369        self._writinghandles = None
370        # prevent nesting of addgroup
371        self._adding_group = None
372
373        self._loadindex()
374
375        self._concurrencychecker = concurrencychecker
376
377    def _init_opts(self):
378        """process options (from above/config) to setup associated default revlog mode
379
380        These values might be affected when actually reading on disk information.
381
382        The relevant values are returned for use in _loadindex().
383
384        * newversionflags:
385            version header to use if we need to create a new revlog
386
387        * mmapindexthreshold:
388            minimal index size for start to use mmap
389
390        * force_nodemap:
391            force the usage of a "development" version of the nodemap code
392        """
393        mmapindexthreshold = None
394        opts = self.opener.options
395
396        if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
397            new_header = CHANGELOGV2
398        elif b'revlogv2' in opts:
399            new_header = REVLOGV2
400        elif b'revlogv1' in opts:
401            new_header = REVLOGV1 | FLAG_INLINE_DATA
402            if b'generaldelta' in opts:
403                new_header |= FLAG_GENERALDELTA
404        elif b'revlogv0' in self.opener.options:
405            new_header = REVLOGV0
406        else:
407            new_header = REVLOG_DEFAULT_VERSION
408
409        if b'chunkcachesize' in opts:
410            self._chunkcachesize = opts[b'chunkcachesize']
411        if b'maxchainlen' in opts:
412            self._maxchainlen = opts[b'maxchainlen']
413        if b'deltabothparents' in opts:
414            self._deltabothparents = opts[b'deltabothparents']
415        self._lazydelta = bool(opts.get(b'lazydelta', True))
416        self._lazydeltabase = False
417        if self._lazydelta:
418            self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
419        if b'compengine' in opts:
420            self._compengine = opts[b'compengine']
421        if b'zlib.level' in opts:
422            self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
423        if b'zstd.level' in opts:
424            self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
425        if b'maxdeltachainspan' in opts:
426            self._maxdeltachainspan = opts[b'maxdeltachainspan']
427        if self._mmaplargeindex and b'mmapindexthreshold' in opts:
428            mmapindexthreshold = opts[b'mmapindexthreshold']
429        self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430        withsparseread = bool(opts.get(b'with-sparse-read', False))
431        # sparse-revlog forces sparse-read
432        self._withsparseread = self._sparserevlog or withsparseread
433        if b'sparse-read-density-threshold' in opts:
434            self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435        if b'sparse-read-min-gap-size' in opts:
436            self._srmingapsize = opts[b'sparse-read-min-gap-size']
437        if opts.get(b'enableellipsis'):
438            self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439
440        # revlog v0 doesn't have flag processors
441        for flag, processor in pycompat.iteritems(
442            opts.get(b'flagprocessors', {})
443        ):
444            flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445
446        if self._chunkcachesize <= 0:
447            raise error.RevlogError(
448                _(b'revlog chunk cache size %r is not greater than 0')
449                % self._chunkcachesize
450            )
451        elif self._chunkcachesize & (self._chunkcachesize - 1):
452            raise error.RevlogError(
453                _(b'revlog chunk cache size %r is not a power of 2')
454                % self._chunkcachesize
455            )
456        force_nodemap = opts.get(b'devel-force-nodemap', False)
457        return new_header, mmapindexthreshold, force_nodemap
458
459    def _get_data(self, filepath, mmap_threshold, size=None):
460        """return a file content with or without mmap
461
462        If the file is missing return the empty string"""
463        try:
464            with self.opener(filepath) as fp:
465                if mmap_threshold is not None:
466                    file_size = self.opener.fstat(fp).st_size
467                    if file_size >= mmap_threshold:
468                        if size is not None:
469                            # avoid potentiel mmap crash
470                            size = min(file_size, size)
471                        # TODO: should .close() to release resources without
472                        # relying on Python GC
473                        if size is None:
474                            return util.buffer(util.mmapread(fp))
475                        else:
476                            return util.buffer(util.mmapread(fp, size))
477                if size is None:
478                    return fp.read()
479                else:
480                    return fp.read(size)
481        except IOError as inst:
482            if inst.errno != errno.ENOENT:
483                raise
484            return b''
485
486    def _loadindex(self, docket=None):
487
488        new_header, mmapindexthreshold, force_nodemap = self._init_opts()
489
490        if self.postfix is not None:
491            entry_point = b'%s.i.%s' % (self.radix, self.postfix)
492        elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
493            entry_point = b'%s.i.a' % self.radix
494        else:
495            entry_point = b'%s.i' % self.radix
496
497        if docket is not None:
498            self._docket = docket
499            self._docket_file = entry_point
500        else:
501            entry_data = b''
502            self._initempty = True
503            entry_data = self._get_data(entry_point, mmapindexthreshold)
504            if len(entry_data) > 0:
505                header = INDEX_HEADER.unpack(entry_data[:4])[0]
506                self._initempty = False
507            else:
508                header = new_header
509
510            self._format_flags = header & ~0xFFFF
511            self._format_version = header & 0xFFFF
512
513            supported_flags = SUPPORTED_FLAGS.get(self._format_version)
514            if supported_flags is None:
515                msg = _(b'unknown version (%d) in revlog %s')
516                msg %= (self._format_version, self.display_id)
517                raise error.RevlogError(msg)
518            elif self._format_flags & ~supported_flags:
519                msg = _(b'unknown flags (%#04x) in version %d revlog %s')
520                display_flag = self._format_flags >> 16
521                msg %= (display_flag, self._format_version, self.display_id)
522                raise error.RevlogError(msg)
523
524            features = FEATURES_BY_VERSION[self._format_version]
525            self._inline = features[b'inline'](self._format_flags)
526            self._generaldelta = features[b'generaldelta'](self._format_flags)
527            self.hassidedata = features[b'sidedata']
528
529            if not features[b'docket']:
530                self._indexfile = entry_point
531                index_data = entry_data
532            else:
533                self._docket_file = entry_point
534                if self._initempty:
535                    self._docket = docketutil.default_docket(self, header)
536                else:
537                    self._docket = docketutil.parse_docket(
538                        self, entry_data, use_pending=self._trypending
539                    )
540
541        if self._docket is not None:
542            self._indexfile = self._docket.index_filepath()
543            index_data = b''
544            index_size = self._docket.index_end
545            if index_size > 0:
546                index_data = self._get_data(
547                    self._indexfile, mmapindexthreshold, size=index_size
548                )
549                if len(index_data) < index_size:
550                    msg = _(b'too few index data for %s: got %d, expected %d')
551                    msg %= (self.display_id, len(index_data), index_size)
552                    raise error.RevlogError(msg)
553
554            self._inline = False
555            # generaldelta implied by version 2 revlogs.
556            self._generaldelta = True
557            # the logic for persistent nodemap will be dealt with within the
558            # main docket, so disable it for now.
559            self._nodemap_file = None
560
561        if self._docket is not None:
562            self._datafile = self._docket.data_filepath()
563            self._sidedatafile = self._docket.sidedata_filepath()
564        elif self.postfix is None:
565            self._datafile = b'%s.d' % self.radix
566        else:
567            self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
568
569        self.nodeconstants = sha1nodeconstants
570        self.nullid = self.nodeconstants.nullid
571
572        # sparse-revlog can't be on without general-delta (issue6056)
573        if not self._generaldelta:
574            self._sparserevlog = False
575
576        self._storedeltachains = True
577
578        devel_nodemap = (
579            self._nodemap_file
580            and force_nodemap
581            and parse_index_v1_nodemap is not None
582        )
583
584        use_rust_index = False
585        if rustrevlog is not None:
586            if self._nodemap_file is not None:
587                use_rust_index = True
588            else:
589                use_rust_index = self.opener.options.get(b'rust.index')
590
591        self._parse_index = parse_index_v1
592        if self._format_version == REVLOGV0:
593            self._parse_index = revlogv0.parse_index_v0
594        elif self._format_version == REVLOGV2:
595            self._parse_index = parse_index_v2
596        elif self._format_version == CHANGELOGV2:
597            self._parse_index = parse_index_cl_v2
598        elif devel_nodemap:
599            self._parse_index = parse_index_v1_nodemap
600        elif use_rust_index:
601            self._parse_index = parse_index_v1_mixed
602        try:
603            d = self._parse_index(index_data, self._inline)
604            index, chunkcache = d
605            use_nodemap = (
606                not self._inline
607                and self._nodemap_file is not None
608                and util.safehasattr(index, 'update_nodemap_data')
609            )
610            if use_nodemap:
611                nodemap_data = nodemaputil.persisted_data(self)
612                if nodemap_data is not None:
613                    docket = nodemap_data[0]
614                    if (
615                        len(d[0]) > docket.tip_rev
616                        and d[0][docket.tip_rev][7] == docket.tip_node
617                    ):
618                        # no changelog tampering
619                        self._nodemap_docket = docket
620                        index.update_nodemap_data(*nodemap_data)
621        except (ValueError, IndexError):
622            raise error.RevlogError(
623                _(b"index %s is corrupted") % self.display_id
624            )
625        self.index = index
626        self._segmentfile = randomaccessfile.randomaccessfile(
627            self.opener,
628            (self._indexfile if self._inline else self._datafile),
629            self._chunkcachesize,
630            chunkcache,
631        )
632        self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
633            self.opener,
634            self._sidedatafile,
635            self._chunkcachesize,
636        )
637        # revnum -> (chain-length, sum-delta-length)
638        self._chaininfocache = util.lrucachedict(500)
639        # revlog header -> revlog compressor
640        self._decompressors = {}
641
642    @util.propertycache
643    def revlog_kind(self):
644        return self.target[0]
645
646    @util.propertycache
647    def display_id(self):
648        """The public facing "ID" of the revlog that we use in message"""
649        # Maybe we should build a user facing representation of
650        # revlog.target instead of using `self.radix`
651        return self.radix
652
653    def _get_decompressor(self, t):
654        try:
655            compressor = self._decompressors[t]
656        except KeyError:
657            try:
658                engine = util.compengines.forrevlogheader(t)
659                compressor = engine.revlogcompressor(self._compengineopts)
660                self._decompressors[t] = compressor
661            except KeyError:
662                raise error.RevlogError(
663                    _(b'unknown compression type %s') % binascii.hexlify(t)
664                )
665        return compressor
666
667    @util.propertycache
668    def _compressor(self):
669        engine = util.compengines[self._compengine]
670        return engine.revlogcompressor(self._compengineopts)
671
672    @util.propertycache
673    def _decompressor(self):
674        """the default decompressor"""
675        if self._docket is None:
676            return None
677        t = self._docket.default_compression_header
678        c = self._get_decompressor(t)
679        return c.decompress
680
681    def _indexfp(self):
682        """file object for the revlog's index file"""
683        return self.opener(self._indexfile, mode=b"r")
684
685    def __index_write_fp(self):
686        # You should not use this directly and use `_writing` instead
687        try:
688            f = self.opener(
689                self._indexfile, mode=b"r+", checkambig=self._checkambig
690            )
691            if self._docket is None:
692                f.seek(0, os.SEEK_END)
693            else:
694                f.seek(self._docket.index_end, os.SEEK_SET)
695            return f
696        except IOError as inst:
697            if inst.errno != errno.ENOENT:
698                raise
699            return self.opener(
700                self._indexfile, mode=b"w+", checkambig=self._checkambig
701            )
702
703    def __index_new_fp(self):
704        # You should not use this unless you are upgrading from inline revlog
705        return self.opener(
706            self._indexfile,
707            mode=b"w",
708            checkambig=self._checkambig,
709            atomictemp=True,
710        )
711
712    def _datafp(self, mode=b'r'):
713        """file object for the revlog's data file"""
714        return self.opener(self._datafile, mode=mode)
715
716    @contextlib.contextmanager
717    def _sidedatareadfp(self):
718        """file object suitable to read sidedata"""
719        if self._writinghandles:
720            yield self._writinghandles[2]
721        else:
722            with self.opener(self._sidedatafile) as fp:
723                yield fp
724
725    def tiprev(self):
726        return len(self.index) - 1
727
728    def tip(self):
729        return self.node(self.tiprev())
730
731    def __contains__(self, rev):
732        return 0 <= rev < len(self)
733
734    def __len__(self):
735        return len(self.index)
736
737    def __iter__(self):
738        return iter(pycompat.xrange(len(self)))
739
740    def revs(self, start=0, stop=None):
741        """iterate over all rev in this revlog (from start to stop)"""
742        return storageutil.iterrevs(len(self), start=start, stop=stop)
743
744    @property
745    def nodemap(self):
746        msg = (
747            b"revlog.nodemap is deprecated, "
748            b"use revlog.index.[has_node|rev|get_rev]"
749        )
750        util.nouideprecwarn(msg, b'5.3', stacklevel=2)
751        return self.index.nodemap
752
753    @property
754    def _nodecache(self):
755        msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
756        util.nouideprecwarn(msg, b'5.3', stacklevel=2)
757        return self.index.nodemap
758
759    def hasnode(self, node):
760        try:
761            self.rev(node)
762            return True
763        except KeyError:
764            return False
765
766    def candelta(self, baserev, rev):
767        """whether two revisions (baserev, rev) can be delta-ed or not"""
768        # Disable delta if either rev requires a content-changing flag
769        # processor (ex. LFS). This is because such flag processor can alter
770        # the rawtext content that the delta will be based on, and two clients
771        # could have a same revlog node with different flags (i.e. different
772        # rawtext contents) and the delta could be incompatible.
773        if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
774            self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
775        ):
776            return False
777        return True
778
779    def update_caches(self, transaction):
780        if self._nodemap_file is not None:
781            if transaction is None:
782                nodemaputil.update_persistent_nodemap(self)
783            else:
784                nodemaputil.setup_persistent_nodemap(transaction, self)
785
786    def clearcaches(self):
787        self._revisioncache = None
788        self._chainbasecache.clear()
789        self._segmentfile.clear_cache()
790        self._segmentfile_sidedata.clear_cache()
791        self._pcache = {}
792        self._nodemap_docket = None
793        self.index.clearcaches()
794        # The python code is the one responsible for validating the docket, we
795        # end up having to refresh it here.
796        use_nodemap = (
797            not self._inline
798            and self._nodemap_file is not None
799            and util.safehasattr(self.index, 'update_nodemap_data')
800        )
801        if use_nodemap:
802            nodemap_data = nodemaputil.persisted_data(self)
803            if nodemap_data is not None:
804                self._nodemap_docket = nodemap_data[0]
805                self.index.update_nodemap_data(*nodemap_data)
806
807    def rev(self, node):
808        try:
809            return self.index.rev(node)
810        except TypeError:
811            raise
812        except error.RevlogError:
813            # parsers.c radix tree lookup failed
814            if (
815                node == self.nodeconstants.wdirid
816                or node in self.nodeconstants.wdirfilenodeids
817            ):
818                raise error.WdirUnsupported
819            raise error.LookupError(node, self.display_id, _(b'no node'))
820
821    # Accessors for index entries.
822
823    # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
824    # are flags.
825    def start(self, rev):
826        return int(self.index[rev][0] >> 16)
827
828    def sidedata_cut_off(self, rev):
829        sd_cut_off = self.index[rev][8]
830        if sd_cut_off != 0:
831            return sd_cut_off
832        # This is some annoying dance, because entries without sidedata
833        # currently use 0 as their ofsset. (instead of previous-offset +
834        # previous-size)
835        #
836        # We should reconsider this sidedata → 0 sidata_offset policy.
837        # In the meantime, we need this.
838        while 0 <= rev:
839            e = self.index[rev]
840            if e[9] != 0:
841                return e[8] + e[9]
842            rev -= 1
843        return 0
844
845    def flags(self, rev):
846        return self.index[rev][0] & 0xFFFF
847
848    def length(self, rev):
849        return self.index[rev][1]
850
851    def sidedata_length(self, rev):
852        if not self.hassidedata:
853            return 0
854        return self.index[rev][9]
855
856    def rawsize(self, rev):
857        """return the length of the uncompressed text for a given revision"""
858        l = self.index[rev][2]
859        if l >= 0:
860            return l
861
862        t = self.rawdata(rev)
863        return len(t)
864
865    def size(self, rev):
866        """length of non-raw text (processed by a "read" flag processor)"""
867        # fast path: if no "read" flag processor could change the content,
868        # size is rawsize. note: ELLIPSIS is known to not change the content.
869        flags = self.flags(rev)
870        if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
871            return self.rawsize(rev)
872
873        return len(self.revision(rev, raw=False))
874
875    def chainbase(self, rev):
876        base = self._chainbasecache.get(rev)
877        if base is not None:
878            return base
879
880        index = self.index
881        iterrev = rev
882        base = index[iterrev][3]
883        while base != iterrev:
884            iterrev = base
885            base = index[iterrev][3]
886
887        self._chainbasecache[rev] = base
888        return base
889
890    def linkrev(self, rev):
891        return self.index[rev][4]
892
893    def parentrevs(self, rev):
894        try:
895            entry = self.index[rev]
896        except IndexError:
897            if rev == wdirrev:
898                raise error.WdirUnsupported
899            raise
900
901        return entry[5], entry[6]
902
903    # fast parentrevs(rev) where rev isn't filtered
904    _uncheckedparentrevs = parentrevs
905
906    def node(self, rev):
907        try:
908            return self.index[rev][7]
909        except IndexError:
910            if rev == wdirrev:
911                raise error.WdirUnsupported
912            raise
913
914    # Derived from index values.
915
916    def end(self, rev):
917        return self.start(rev) + self.length(rev)
918
919    def parents(self, node):
920        i = self.index
921        d = i[self.rev(node)]
922        return i[d[5]][7], i[d[6]][7]  # map revisions to nodes inline
923
924    def chainlen(self, rev):
925        return self._chaininfo(rev)[0]
926
927    def _chaininfo(self, rev):
928        chaininfocache = self._chaininfocache
929        if rev in chaininfocache:
930            return chaininfocache[rev]
931        index = self.index
932        generaldelta = self._generaldelta
933        iterrev = rev
934        e = index[iterrev]
935        clen = 0
936        compresseddeltalen = 0
937        while iterrev != e[3]:
938            clen += 1
939            compresseddeltalen += e[1]
940            if generaldelta:
941                iterrev = e[3]
942            else:
943                iterrev -= 1
944            if iterrev in chaininfocache:
945                t = chaininfocache[iterrev]
946                clen += t[0]
947                compresseddeltalen += t[1]
948                break
949            e = index[iterrev]
950        else:
951            # Add text length of base since decompressing that also takes
952            # work. For cache hits the length is already included.
953            compresseddeltalen += e[1]
954        r = (clen, compresseddeltalen)
955        chaininfocache[rev] = r
956        return r
957
958    def _deltachain(self, rev, stoprev=None):
959        """Obtain the delta chain for a revision.
960
961        ``stoprev`` specifies a revision to stop at. If not specified, we
962        stop at the base of the chain.
963
964        Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
965        revs in ascending order and ``stopped`` is a bool indicating whether
966        ``stoprev`` was hit.
967        """
968        # Try C implementation.
969        try:
970            return self.index.deltachain(rev, stoprev, self._generaldelta)
971        except AttributeError:
972            pass
973
974        chain = []
975
976        # Alias to prevent attribute lookup in tight loop.
977        index = self.index
978        generaldelta = self._generaldelta
979
980        iterrev = rev
981        e = index[iterrev]
982        while iterrev != e[3] and iterrev != stoprev:
983            chain.append(iterrev)
984            if generaldelta:
985                iterrev = e[3]
986            else:
987                iterrev -= 1
988            e = index[iterrev]
989
990        if iterrev == stoprev:
991            stopped = True
992        else:
993            chain.append(iterrev)
994            stopped = False
995
996        chain.reverse()
997        return chain, stopped
998
999    def ancestors(self, revs, stoprev=0, inclusive=False):
1000        """Generate the ancestors of 'revs' in reverse revision order.
1001        Does not generate revs lower than stoprev.
1002
1003        See the documentation for ancestor.lazyancestors for more details."""
1004
1005        # first, make sure start revisions aren't filtered
1006        revs = list(revs)
1007        checkrev = self.node
1008        for r in revs:
1009            checkrev(r)
1010        # and we're sure ancestors aren't filtered as well
1011
1012        if rustancestor is not None and self.index.rust_ext_compat:
1013            lazyancestors = rustancestor.LazyAncestors
1014            arg = self.index
1015        else:
1016            lazyancestors = ancestor.lazyancestors
1017            arg = self._uncheckedparentrevs
1018        return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1019
1020    def descendants(self, revs):
1021        return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1022
1023    def findcommonmissing(self, common=None, heads=None):
1024        """Return a tuple of the ancestors of common and the ancestors of heads
1025        that are not ancestors of common. In revset terminology, we return the
1026        tuple:
1027
1028          ::common, (::heads) - (::common)
1029
1030        The list is sorted by revision number, meaning it is
1031        topologically sorted.
1032
1033        'heads' and 'common' are both lists of node IDs.  If heads is
1034        not supplied, uses all of the revlog's heads.  If common is not
1035        supplied, uses nullid."""
1036        if common is None:
1037            common = [self.nullid]
1038        if heads is None:
1039            heads = self.heads()
1040
1041        common = [self.rev(n) for n in common]
1042        heads = [self.rev(n) for n in heads]
1043
1044        # we want the ancestors, but inclusive
1045        class lazyset(object):
1046            def __init__(self, lazyvalues):
1047                self.addedvalues = set()
1048                self.lazyvalues = lazyvalues
1049
1050            def __contains__(self, value):
1051                return value in self.addedvalues or value in self.lazyvalues
1052
1053            def __iter__(self):
1054                added = self.addedvalues
1055                for r in added:
1056                    yield r
1057                for r in self.lazyvalues:
1058                    if not r in added:
1059                        yield r
1060
1061            def add(self, value):
1062                self.addedvalues.add(value)
1063
1064            def update(self, values):
1065                self.addedvalues.update(values)
1066
1067        has = lazyset(self.ancestors(common))
1068        has.add(nullrev)
1069        has.update(common)
1070
1071        # take all ancestors from heads that aren't in has
1072        missing = set()
1073        visit = collections.deque(r for r in heads if r not in has)
1074        while visit:
1075            r = visit.popleft()
1076            if r in missing:
1077                continue
1078            else:
1079                missing.add(r)
1080                for p in self.parentrevs(r):
1081                    if p not in has:
1082                        visit.append(p)
1083        missing = list(missing)
1084        missing.sort()
1085        return has, [self.node(miss) for miss in missing]
1086
1087    def incrementalmissingrevs(self, common=None):
1088        """Return an object that can be used to incrementally compute the
1089        revision numbers of the ancestors of arbitrary sets that are not
1090        ancestors of common. This is an ancestor.incrementalmissingancestors
1091        object.
1092
1093        'common' is a list of revision numbers. If common is not supplied, uses
1094        nullrev.
1095        """
1096        if common is None:
1097            common = [nullrev]
1098
1099        if rustancestor is not None and self.index.rust_ext_compat:
1100            return rustancestor.MissingAncestors(self.index, common)
1101        return ancestor.incrementalmissingancestors(self.parentrevs, common)
1102
1103    def findmissingrevs(self, common=None, heads=None):
1104        """Return the revision numbers of the ancestors of heads that
1105        are not ancestors of common.
1106
1107        More specifically, return a list of revision numbers corresponding to
1108        nodes N such that every N satisfies the following constraints:
1109
1110          1. N is an ancestor of some node in 'heads'
1111          2. N is not an ancestor of any node in 'common'
1112
1113        The list is sorted by revision number, meaning it is
1114        topologically sorted.
1115
1116        'heads' and 'common' are both lists of revision numbers.  If heads is
1117        not supplied, uses all of the revlog's heads.  If common is not
1118        supplied, uses nullid."""
1119        if common is None:
1120            common = [nullrev]
1121        if heads is None:
1122            heads = self.headrevs()
1123
1124        inc = self.incrementalmissingrevs(common=common)
1125        return inc.missingancestors(heads)
1126
1127    def findmissing(self, common=None, heads=None):
1128        """Return the ancestors of heads that are not ancestors of common.
1129
1130        More specifically, return a list of nodes N such that every N
1131        satisfies the following constraints:
1132
1133          1. N is an ancestor of some node in 'heads'
1134          2. N is not an ancestor of any node in 'common'
1135
1136        The list is sorted by revision number, meaning it is
1137        topologically sorted.
1138
1139        'heads' and 'common' are both lists of node IDs.  If heads is
1140        not supplied, uses all of the revlog's heads.  If common is not
1141        supplied, uses nullid."""
1142        if common is None:
1143            common = [self.nullid]
1144        if heads is None:
1145            heads = self.heads()
1146
1147        common = [self.rev(n) for n in common]
1148        heads = [self.rev(n) for n in heads]
1149
1150        inc = self.incrementalmissingrevs(common=common)
1151        return [self.node(r) for r in inc.missingancestors(heads)]
1152
1153    def nodesbetween(self, roots=None, heads=None):
1154        """Return a topological path from 'roots' to 'heads'.
1155
1156        Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1157        topologically sorted list of all nodes N that satisfy both of
1158        these constraints:
1159
1160          1. N is a descendant of some node in 'roots'
1161          2. N is an ancestor of some node in 'heads'
1162
1163        Every node is considered to be both a descendant and an ancestor
1164        of itself, so every reachable node in 'roots' and 'heads' will be
1165        included in 'nodes'.
1166
1167        'outroots' is the list of reachable nodes in 'roots', i.e., the
1168        subset of 'roots' that is returned in 'nodes'.  Likewise,
1169        'outheads' is the subset of 'heads' that is also in 'nodes'.
1170
1171        'roots' and 'heads' are both lists of node IDs.  If 'roots' is
1172        unspecified, uses nullid as the only root.  If 'heads' is
1173        unspecified, uses list of all of the revlog's heads."""
1174        nonodes = ([], [], [])
1175        if roots is not None:
1176            roots = list(roots)
1177            if not roots:
1178                return nonodes
1179            lowestrev = min([self.rev(n) for n in roots])
1180        else:
1181            roots = [self.nullid]  # Everybody's a descendant of nullid
1182            lowestrev = nullrev
1183        if (lowestrev == nullrev) and (heads is None):
1184            # We want _all_ the nodes!
1185            return (
1186                [self.node(r) for r in self],
1187                [self.nullid],
1188                list(self.heads()),
1189            )
1190        if heads is None:
1191            # All nodes are ancestors, so the latest ancestor is the last
1192            # node.
1193            highestrev = len(self) - 1
1194            # Set ancestors to None to signal that every node is an ancestor.
1195            ancestors = None
1196            # Set heads to an empty dictionary for later discovery of heads
1197            heads = {}
1198        else:
1199            heads = list(heads)
1200            if not heads:
1201                return nonodes
1202            ancestors = set()
1203            # Turn heads into a dictionary so we can remove 'fake' heads.
1204            # Also, later we will be using it to filter out the heads we can't
1205            # find from roots.
1206            heads = dict.fromkeys(heads, False)
1207            # Start at the top and keep marking parents until we're done.
1208            nodestotag = set(heads)
1209            # Remember where the top was so we can use it as a limit later.
1210            highestrev = max([self.rev(n) for n in nodestotag])
1211            while nodestotag:
1212                # grab a node to tag
1213                n = nodestotag.pop()
1214                # Never tag nullid
1215                if n == self.nullid:
1216                    continue
1217                # A node's revision number represents its place in a
1218                # topologically sorted list of nodes.
1219                r = self.rev(n)
1220                if r >= lowestrev:
1221                    if n not in ancestors:
1222                        # If we are possibly a descendant of one of the roots
1223                        # and we haven't already been marked as an ancestor
1224                        ancestors.add(n)  # Mark as ancestor
1225                        # Add non-nullid parents to list of nodes to tag.
1226                        nodestotag.update(
1227                            [p for p in self.parents(n) if p != self.nullid]
1228                        )
1229                    elif n in heads:  # We've seen it before, is it a fake head?
1230                        # So it is, real heads should not be the ancestors of
1231                        # any other heads.
1232                        heads.pop(n)
1233            if not ancestors:
1234                return nonodes
1235            # Now that we have our set of ancestors, we want to remove any
1236            # roots that are not ancestors.
1237
1238            # If one of the roots was nullid, everything is included anyway.
1239            if lowestrev > nullrev:
1240                # But, since we weren't, let's recompute the lowest rev to not
1241                # include roots that aren't ancestors.
1242
1243                # Filter out roots that aren't ancestors of heads
1244                roots = [root for root in roots if root in ancestors]
1245                # Recompute the lowest revision
1246                if roots:
1247                    lowestrev = min([self.rev(root) for root in roots])
1248                else:
1249                    # No more roots?  Return empty list
1250                    return nonodes
1251            else:
1252                # We are descending from nullid, and don't need to care about
1253                # any other roots.
1254                lowestrev = nullrev
1255                roots = [self.nullid]
1256        # Transform our roots list into a set.
1257        descendants = set(roots)
1258        # Also, keep the original roots so we can filter out roots that aren't
1259        # 'real' roots (i.e. are descended from other roots).
1260        roots = descendants.copy()
1261        # Our topologically sorted list of output nodes.
1262        orderedout = []
1263        # Don't start at nullid since we don't want nullid in our output list,
1264        # and if nullid shows up in descendants, empty parents will look like
1265        # they're descendants.
1266        for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1267            n = self.node(r)
1268            isdescendant = False
1269            if lowestrev == nullrev:  # Everybody is a descendant of nullid
1270                isdescendant = True
1271            elif n in descendants:
1272                # n is already a descendant
1273                isdescendant = True
1274                # This check only needs to be done here because all the roots
1275                # will start being marked is descendants before the loop.
1276                if n in roots:
1277                    # If n was a root, check if it's a 'real' root.
1278                    p = tuple(self.parents(n))
1279                    # If any of its parents are descendants, it's not a root.
1280                    if (p[0] in descendants) or (p[1] in descendants):
1281                        roots.remove(n)
1282            else:
1283                p = tuple(self.parents(n))
1284                # A node is a descendant if either of its parents are
1285                # descendants.  (We seeded the dependents list with the roots
1286                # up there, remember?)
1287                if (p[0] in descendants) or (p[1] in descendants):
1288                    descendants.add(n)
1289                    isdescendant = True
1290            if isdescendant and ((ancestors is None) or (n in ancestors)):
1291                # Only include nodes that are both descendants and ancestors.
1292                orderedout.append(n)
1293                if (ancestors is not None) and (n in heads):
1294                    # We're trying to figure out which heads are reachable
1295                    # from roots.
1296                    # Mark this head as having been reached
1297                    heads[n] = True
1298                elif ancestors is None:
1299                    # Otherwise, we're trying to discover the heads.
1300                    # Assume this is a head because if it isn't, the next step
1301                    # will eventually remove it.
1302                    heads[n] = True
1303                    # But, obviously its parents aren't.
1304                    for p in self.parents(n):
1305                        heads.pop(p, None)
1306        heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1307        roots = list(roots)
1308        assert orderedout
1309        assert roots
1310        assert heads
1311        return (orderedout, roots, heads)
1312
1313    def headrevs(self, revs=None):
1314        if revs is None:
1315            try:
1316                return self.index.headrevs()
1317            except AttributeError:
1318                return self._headrevs()
1319        if rustdagop is not None and self.index.rust_ext_compat:
1320            return rustdagop.headrevs(self.index, revs)
1321        return dagop.headrevs(revs, self._uncheckedparentrevs)
1322
1323    def computephases(self, roots):
1324        return self.index.computephasesmapsets(roots)
1325
1326    def _headrevs(self):
1327        count = len(self)
1328        if not count:
1329            return [nullrev]
1330        # we won't iter over filtered rev so nobody is a head at start
1331        ishead = [0] * (count + 1)
1332        index = self.index
1333        for r in self:
1334            ishead[r] = 1  # I may be an head
1335            e = index[r]
1336            ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
1337        return [r for r, val in enumerate(ishead) if val]
1338
1339    def heads(self, start=None, stop=None):
1340        """return the list of all nodes that have no children
1341
1342        if start is specified, only heads that are descendants of
1343        start will be returned
1344        if stop is specified, it will consider all the revs from stop
1345        as if they had no children
1346        """
1347        if start is None and stop is None:
1348            if not len(self):
1349                return [self.nullid]
1350            return [self.node(r) for r in self.headrevs()]
1351
1352        if start is None:
1353            start = nullrev
1354        else:
1355            start = self.rev(start)
1356
1357        stoprevs = {self.rev(n) for n in stop or []}
1358
1359        revs = dagop.headrevssubset(
1360            self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1361        )
1362
1363        return [self.node(rev) for rev in revs]
1364
1365    def children(self, node):
1366        """find the children of a given node"""
1367        c = []
1368        p = self.rev(node)
1369        for r in self.revs(start=p + 1):
1370            prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1371            if prevs:
1372                for pr in prevs:
1373                    if pr == p:
1374                        c.append(self.node(r))
1375            elif p == nullrev:
1376                c.append(self.node(r))
1377        return c
1378
1379    def commonancestorsheads(self, a, b):
1380        """calculate all the heads of the common ancestors of nodes a and b"""
1381        a, b = self.rev(a), self.rev(b)
1382        ancs = self._commonancestorsheads(a, b)
1383        return pycompat.maplist(self.node, ancs)
1384
1385    def _commonancestorsheads(self, *revs):
1386        """calculate all the heads of the common ancestors of revs"""
1387        try:
1388            ancs = self.index.commonancestorsheads(*revs)
1389        except (AttributeError, OverflowError):  # C implementation failed
1390            ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1391        return ancs
1392
1393    def isancestor(self, a, b):
1394        """return True if node a is an ancestor of node b
1395
1396        A revision is considered an ancestor of itself."""
1397        a, b = self.rev(a), self.rev(b)
1398        return self.isancestorrev(a, b)
1399
1400    def isancestorrev(self, a, b):
1401        """return True if revision a is an ancestor of revision b
1402
1403        A revision is considered an ancestor of itself.
1404
1405        The implementation of this is trivial but the use of
1406        reachableroots is not."""
1407        if a == nullrev:
1408            return True
1409        elif a == b:
1410            return True
1411        elif a > b:
1412            return False
1413        return bool(self.reachableroots(a, [b], [a], includepath=False))
1414
1415    def reachableroots(self, minroot, heads, roots, includepath=False):
1416        """return (heads(::(<roots> and <roots>::<heads>)))
1417
1418        If includepath is True, return (<roots>::<heads>)."""
1419        try:
1420            return self.index.reachableroots2(
1421                minroot, heads, roots, includepath
1422            )
1423        except AttributeError:
1424            return dagop._reachablerootspure(
1425                self.parentrevs, minroot, roots, heads, includepath
1426            )
1427
1428    def ancestor(self, a, b):
1429        """calculate the "best" common ancestor of nodes a and b"""
1430
1431        a, b = self.rev(a), self.rev(b)
1432        try:
1433            ancs = self.index.ancestors(a, b)
1434        except (AttributeError, OverflowError):
1435            ancs = ancestor.ancestors(self.parentrevs, a, b)
1436        if ancs:
1437            # choose a consistent winner when there's a tie
1438            return min(map(self.node, ancs))
1439        return self.nullid
1440
1441    def _match(self, id):
1442        if isinstance(id, int):
1443            # rev
1444            return self.node(id)
1445        if len(id) == self.nodeconstants.nodelen:
1446            # possibly a binary node
1447            # odds of a binary node being all hex in ASCII are 1 in 10**25
1448            try:
1449                node = id
1450                self.rev(node)  # quick search the index
1451                return node
1452            except error.LookupError:
1453                pass  # may be partial hex id
1454        try:
1455            # str(rev)
1456            rev = int(id)
1457            if b"%d" % rev != id:
1458                raise ValueError
1459            if rev < 0:
1460                rev = len(self) + rev
1461            if rev < 0 or rev >= len(self):
1462                raise ValueError
1463            return self.node(rev)
1464        except (ValueError, OverflowError):
1465            pass
1466        if len(id) == 2 * self.nodeconstants.nodelen:
1467            try:
1468                # a full hex nodeid?
1469                node = bin(id)
1470                self.rev(node)
1471                return node
1472            except (TypeError, error.LookupError):
1473                pass
1474
1475    def _partialmatch(self, id):
1476        # we don't care wdirfilenodeids as they should be always full hash
1477        maybewdir = self.nodeconstants.wdirhex.startswith(id)
1478        ambiguous = False
1479        try:
1480            partial = self.index.partialmatch(id)
1481            if partial and self.hasnode(partial):
1482                if maybewdir:
1483                    # single 'ff...' match in radix tree, ambiguous with wdir
1484                    ambiguous = True
1485                else:
1486                    return partial
1487            elif maybewdir:
1488                # no 'ff...' match in radix tree, wdir identified
1489                raise error.WdirUnsupported
1490            else:
1491                return None
1492        except error.RevlogError:
1493            # parsers.c radix tree lookup gave multiple matches
1494            # fast path: for unfiltered changelog, radix tree is accurate
1495            if not getattr(self, 'filteredrevs', None):
1496                ambiguous = True
1497            # fall through to slow path that filters hidden revisions
1498        except (AttributeError, ValueError):
1499            # we are pure python, or key was too short to search radix tree
1500            pass
1501        if ambiguous:
1502            raise error.AmbiguousPrefixLookupError(
1503                id, self.display_id, _(b'ambiguous identifier')
1504            )
1505
1506        if id in self._pcache:
1507            return self._pcache[id]
1508
1509        if len(id) <= 40:
1510            try:
1511                # hex(node)[:...]
1512                l = len(id) // 2  # grab an even number of digits
1513                prefix = bin(id[: l * 2])
1514                nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1515                nl = [
1516                    n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1517                ]
1518                if self.nodeconstants.nullhex.startswith(id):
1519                    nl.append(self.nullid)
1520                if len(nl) > 0:
1521                    if len(nl) == 1 and not maybewdir:
1522                        self._pcache[id] = nl[0]
1523                        return nl[0]
1524                    raise error.AmbiguousPrefixLookupError(
1525                        id, self.display_id, _(b'ambiguous identifier')
1526                    )
1527                if maybewdir:
1528                    raise error.WdirUnsupported
1529                return None
1530            except TypeError:
1531                pass
1532
1533    def lookup(self, id):
1534        """locate a node based on:
1535        - revision number or str(revision number)
1536        - nodeid or subset of hex nodeid
1537        """
1538        n = self._match(id)
1539        if n is not None:
1540            return n
1541        n = self._partialmatch(id)
1542        if n:
1543            return n
1544
1545        raise error.LookupError(id, self.display_id, _(b'no match found'))
1546
1547    def shortest(self, node, minlength=1):
1548        """Find the shortest unambiguous prefix that matches node."""
1549
1550        def isvalid(prefix):
1551            try:
1552                matchednode = self._partialmatch(prefix)
1553            except error.AmbiguousPrefixLookupError:
1554                return False
1555            except error.WdirUnsupported:
1556                # single 'ff...' match
1557                return True
1558            if matchednode is None:
1559                raise error.LookupError(node, self.display_id, _(b'no node'))
1560            return True
1561
1562        def maybewdir(prefix):
1563            return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1564
1565        hexnode = hex(node)
1566
1567        def disambiguate(hexnode, minlength):
1568            """Disambiguate against wdirid."""
1569            for length in range(minlength, len(hexnode) + 1):
1570                prefix = hexnode[:length]
1571                if not maybewdir(prefix):
1572                    return prefix
1573
1574        if not getattr(self, 'filteredrevs', None):
1575            try:
1576                length = max(self.index.shortest(node), minlength)
1577                return disambiguate(hexnode, length)
1578            except error.RevlogError:
1579                if node != self.nodeconstants.wdirid:
1580                    raise error.LookupError(
1581                        node, self.display_id, _(b'no node')
1582                    )
1583            except AttributeError:
1584                # Fall through to pure code
1585                pass
1586
1587        if node == self.nodeconstants.wdirid:
1588            for length in range(minlength, len(hexnode) + 1):
1589                prefix = hexnode[:length]
1590                if isvalid(prefix):
1591                    return prefix
1592
1593        for length in range(minlength, len(hexnode) + 1):
1594            prefix = hexnode[:length]
1595            if isvalid(prefix):
1596                return disambiguate(hexnode, length)
1597
1598    def cmp(self, node, text):
1599        """compare text with a given file revision
1600
1601        returns True if text is different than what is stored.
1602        """
1603        p1, p2 = self.parents(node)
1604        return storageutil.hashrevisionsha1(text, p1, p2) != node
1605
1606    def _getsegmentforrevs(self, startrev, endrev, df=None):
1607        """Obtain a segment of raw data corresponding to a range of revisions.
1608
1609        Accepts the start and end revisions and an optional already-open
1610        file handle to be used for reading. If the file handle is read, its
1611        seek position will not be preserved.
1612
1613        Requests for data may be satisfied by a cache.
1614
1615        Returns a 2-tuple of (offset, data) for the requested range of
1616        revisions. Offset is the integer offset from the beginning of the
1617        revlog and data is a str or buffer of the raw byte data.
1618
1619        Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1620        to determine where each revision's data begins and ends.
1621        """
1622        # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1623        # (functions are expensive).
1624        index = self.index
1625        istart = index[startrev]
1626        start = int(istart[0] >> 16)
1627        if startrev == endrev:
1628            end = start + istart[1]
1629        else:
1630            iend = index[endrev]
1631            end = int(iend[0] >> 16) + iend[1]
1632
1633        if self._inline:
1634            start += (startrev + 1) * self.index.entry_size
1635            end += (endrev + 1) * self.index.entry_size
1636        length = end - start
1637
1638        return start, self._segmentfile.read_chunk(start, length, df)
1639
1640    def _chunk(self, rev, df=None):
1641        """Obtain a single decompressed chunk for a revision.
1642
1643        Accepts an integer revision and an optional already-open file handle
1644        to be used for reading. If used, the seek position of the file will not
1645        be preserved.
1646
1647        Returns a str holding uncompressed data for the requested revision.
1648        """
1649        compression_mode = self.index[rev][10]
1650        data = self._getsegmentforrevs(rev, rev, df=df)[1]
1651        if compression_mode == COMP_MODE_PLAIN:
1652            return data
1653        elif compression_mode == COMP_MODE_DEFAULT:
1654            return self._decompressor(data)
1655        elif compression_mode == COMP_MODE_INLINE:
1656            return self.decompress(data)
1657        else:
1658            msg = b'unknown compression mode %d'
1659            msg %= compression_mode
1660            raise error.RevlogError(msg)
1661
1662    def _chunks(self, revs, df=None, targetsize=None):
1663        """Obtain decompressed chunks for the specified revisions.
1664
1665        Accepts an iterable of numeric revisions that are assumed to be in
1666        ascending order. Also accepts an optional already-open file handle
1667        to be used for reading. If used, the seek position of the file will
1668        not be preserved.
1669
1670        This function is similar to calling ``self._chunk()`` multiple times,
1671        but is faster.
1672
1673        Returns a list with decompressed data for each requested revision.
1674        """
1675        if not revs:
1676            return []
1677        start = self.start
1678        length = self.length
1679        inline = self._inline
1680        iosize = self.index.entry_size
1681        buffer = util.buffer
1682
1683        l = []
1684        ladd = l.append
1685
1686        if not self._withsparseread:
1687            slicedchunks = (revs,)
1688        else:
1689            slicedchunks = deltautil.slicechunk(
1690                self, revs, targetsize=targetsize
1691            )
1692
1693        for revschunk in slicedchunks:
1694            firstrev = revschunk[0]
1695            # Skip trailing revisions with empty diff
1696            for lastrev in revschunk[::-1]:
1697                if length(lastrev) != 0:
1698                    break
1699
1700            try:
1701                offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1702            except OverflowError:
1703                # issue4215 - we can't cache a run of chunks greater than
1704                # 2G on Windows
1705                return [self._chunk(rev, df=df) for rev in revschunk]
1706
1707            decomp = self.decompress
1708            # self._decompressor might be None, but will not be used in that case
1709            def_decomp = self._decompressor
1710            for rev in revschunk:
1711                chunkstart = start(rev)
1712                if inline:
1713                    chunkstart += (rev + 1) * iosize
1714                chunklength = length(rev)
1715                comp_mode = self.index[rev][10]
1716                c = buffer(data, chunkstart - offset, chunklength)
1717                if comp_mode == COMP_MODE_PLAIN:
1718                    ladd(c)
1719                elif comp_mode == COMP_MODE_INLINE:
1720                    ladd(decomp(c))
1721                elif comp_mode == COMP_MODE_DEFAULT:
1722                    ladd(def_decomp(c))
1723                else:
1724                    msg = b'unknown compression mode %d'
1725                    msg %= comp_mode
1726                    raise error.RevlogError(msg)
1727
1728        return l
1729
1730    def deltaparent(self, rev):
1731        """return deltaparent of the given revision"""
1732        base = self.index[rev][3]
1733        if base == rev:
1734            return nullrev
1735        elif self._generaldelta:
1736            return base
1737        else:
1738            return rev - 1
1739
1740    def issnapshot(self, rev):
1741        """tells whether rev is a snapshot"""
1742        if not self._sparserevlog:
1743            return self.deltaparent(rev) == nullrev
1744        elif util.safehasattr(self.index, b'issnapshot'):
1745            # directly assign the method to cache the testing and access
1746            self.issnapshot = self.index.issnapshot
1747            return self.issnapshot(rev)
1748        if rev == nullrev:
1749            return True
1750        entry = self.index[rev]
1751        base = entry[3]
1752        if base == rev:
1753            return True
1754        if base == nullrev:
1755            return True
1756        p1 = entry[5]
1757        p2 = entry[6]
1758        if base == p1 or base == p2:
1759            return False
1760        return self.issnapshot(base)
1761
1762    def snapshotdepth(self, rev):
1763        """number of snapshot in the chain before this one"""
1764        if not self.issnapshot(rev):
1765            raise error.ProgrammingError(b'revision %d not a snapshot')
1766        return len(self._deltachain(rev)[0]) - 1
1767
1768    def revdiff(self, rev1, rev2):
1769        """return or calculate a delta between two revisions
1770
1771        The delta calculated is in binary form and is intended to be written to
1772        revlog data directly. So this function needs raw revision data.
1773        """
1774        if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1775            return bytes(self._chunk(rev2))
1776
1777        return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1778
1779    def _processflags(self, text, flags, operation, raw=False):
1780        """deprecated entry point to access flag processors"""
1781        msg = b'_processflag(...) use the specialized variant'
1782        util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1783        if raw:
1784            return text, flagutil.processflagsraw(self, text, flags)
1785        elif operation == b'read':
1786            return flagutil.processflagsread(self, text, flags)
1787        else:  # write operation
1788            return flagutil.processflagswrite(self, text, flags)
1789
1790    def revision(self, nodeorrev, _df=None, raw=False):
1791        """return an uncompressed revision of a given node or revision
1792        number.
1793
1794        _df - an existing file handle to read from. (internal-only)
1795        raw - an optional argument specifying if the revision data is to be
1796        treated as raw data when applying flag transforms. 'raw' should be set
1797        to True when generating changegroups or in debug commands.
1798        """
1799        if raw:
1800            msg = (
1801                b'revlog.revision(..., raw=True) is deprecated, '
1802                b'use revlog.rawdata(...)'
1803            )
1804            util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1805        return self._revisiondata(nodeorrev, _df, raw=raw)
1806
1807    def sidedata(self, nodeorrev, _df=None):
1808        """a map of extra data related to the changeset but not part of the hash
1809
1810        This function currently return a dictionary. However, more advanced
1811        mapping object will likely be used in the future for a more
1812        efficient/lazy code.
1813        """
1814        # deal with <nodeorrev> argument type
1815        if isinstance(nodeorrev, int):
1816            rev = nodeorrev
1817        else:
1818            rev = self.rev(nodeorrev)
1819        return self._sidedata(rev)
1820
1821    def _revisiondata(self, nodeorrev, _df=None, raw=False):
1822        # deal with <nodeorrev> argument type
1823        if isinstance(nodeorrev, int):
1824            rev = nodeorrev
1825            node = self.node(rev)
1826        else:
1827            node = nodeorrev
1828            rev = None
1829
1830        # fast path the special `nullid` rev
1831        if node == self.nullid:
1832            return b""
1833
1834        # ``rawtext`` is the text as stored inside the revlog. Might be the
1835        # revision or might need to be processed to retrieve the revision.
1836        rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1837
1838        if raw and validated:
1839            # if we don't want to process the raw text and that raw
1840            # text is cached, we can exit early.
1841            return rawtext
1842        if rev is None:
1843            rev = self.rev(node)
1844        # the revlog's flag for this revision
1845        # (usually alter its state or content)
1846        flags = self.flags(rev)
1847
1848        if validated and flags == REVIDX_DEFAULT_FLAGS:
1849            # no extra flags set, no flag processor runs, text = rawtext
1850            return rawtext
1851
1852        if raw:
1853            validatehash = flagutil.processflagsraw(self, rawtext, flags)
1854            text = rawtext
1855        else:
1856            r = flagutil.processflagsread(self, rawtext, flags)
1857            text, validatehash = r
1858        if validatehash:
1859            self.checkhash(text, node, rev=rev)
1860        if not validated:
1861            self._revisioncache = (node, rev, rawtext)
1862
1863        return text
1864
1865    def _rawtext(self, node, rev, _df=None):
1866        """return the possibly unvalidated rawtext for a revision
1867
1868        returns (rev, rawtext, validated)
1869        """
1870
1871        # revision in the cache (could be useful to apply delta)
1872        cachedrev = None
1873        # An intermediate text to apply deltas to
1874        basetext = None
1875
1876        # Check if we have the entry in cache
1877        # The cache entry looks like (node, rev, rawtext)
1878        if self._revisioncache:
1879            if self._revisioncache[0] == node:
1880                return (rev, self._revisioncache[2], True)
1881            cachedrev = self._revisioncache[1]
1882
1883        if rev is None:
1884            rev = self.rev(node)
1885
1886        chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1887        if stopped:
1888            basetext = self._revisioncache[2]
1889
1890        # drop cache to save memory, the caller is expected to
1891        # update self._revisioncache after validating the text
1892        self._revisioncache = None
1893
1894        targetsize = None
1895        rawsize = self.index[rev][2]
1896        if 0 <= rawsize:
1897            targetsize = 4 * rawsize
1898
1899        bins = self._chunks(chain, df=_df, targetsize=targetsize)
1900        if basetext is None:
1901            basetext = bytes(bins[0])
1902            bins = bins[1:]
1903
1904        rawtext = mdiff.patches(basetext, bins)
1905        del basetext  # let us have a chance to free memory early
1906        return (rev, rawtext, False)
1907
1908    def _sidedata(self, rev):
1909        """Return the sidedata for a given revision number."""
1910        index_entry = self.index[rev]
1911        sidedata_offset = index_entry[8]
1912        sidedata_size = index_entry[9]
1913
1914        if self._inline:
1915            sidedata_offset += self.index.entry_size * (1 + rev)
1916        if sidedata_size == 0:
1917            return {}
1918
1919        if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1920            filename = self._sidedatafile
1921            end = self._docket.sidedata_end
1922            offset = sidedata_offset
1923            length = sidedata_size
1924            m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1925            raise error.RevlogError(m)
1926
1927        comp_segment = self._segmentfile_sidedata.read_chunk(
1928            sidedata_offset, sidedata_size
1929        )
1930
1931        comp = self.index[rev][11]
1932        if comp == COMP_MODE_PLAIN:
1933            segment = comp_segment
1934        elif comp == COMP_MODE_DEFAULT:
1935            segment = self._decompressor(comp_segment)
1936        elif comp == COMP_MODE_INLINE:
1937            segment = self.decompress(comp_segment)
1938        else:
1939            msg = b'unknown compression mode %d'
1940            msg %= comp
1941            raise error.RevlogError(msg)
1942
1943        sidedata = sidedatautil.deserialize_sidedata(segment)
1944        return sidedata
1945
1946    def rawdata(self, nodeorrev, _df=None):
1947        """return an uncompressed raw data of a given node or revision number.
1948
1949        _df - an existing file handle to read from. (internal-only)
1950        """
1951        return self._revisiondata(nodeorrev, _df, raw=True)
1952
1953    def hash(self, text, p1, p2):
1954        """Compute a node hash.
1955
1956        Available as a function so that subclasses can replace the hash
1957        as needed.
1958        """
1959        return storageutil.hashrevisionsha1(text, p1, p2)
1960
1961    def checkhash(self, text, node, p1=None, p2=None, rev=None):
1962        """Check node hash integrity.
1963
1964        Available as a function so that subclasses can extend hash mismatch
1965        behaviors as needed.
1966        """
1967        try:
1968            if p1 is None and p2 is None:
1969                p1, p2 = self.parents(node)
1970            if node != self.hash(text, p1, p2):
1971                # Clear the revision cache on hash failure. The revision cache
1972                # only stores the raw revision and clearing the cache does have
1973                # the side-effect that we won't have a cache hit when the raw
1974                # revision data is accessed. But this case should be rare and
1975                # it is extra work to teach the cache about the hash
1976                # verification state.
1977                if self._revisioncache and self._revisioncache[0] == node:
1978                    self._revisioncache = None
1979
1980                revornode = rev
1981                if revornode is None:
1982                    revornode = templatefilters.short(hex(node))
1983                raise error.RevlogError(
1984                    _(b"integrity check failed on %s:%s")
1985                    % (self.display_id, pycompat.bytestr(revornode))
1986                )
1987        except error.RevlogError:
1988            if self._censorable and storageutil.iscensoredtext(text):
1989                raise error.CensoredNodeError(self.display_id, node, text)
1990            raise
1991
1992    def _enforceinlinesize(self, tr):
1993        """Check if the revlog is too big for inline and convert if so.
1994
1995        This should be called after revisions are added to the revlog. If the
1996        revlog has grown too large to be an inline revlog, it will convert it
1997        to use multiple index and data files.
1998        """
1999        tiprev = len(self) - 1
2000        total_size = self.start(tiprev) + self.length(tiprev)
2001        if not self._inline or total_size < _maxinline:
2002            return
2003
2004        troffset = tr.findoffset(self._indexfile)
2005        if troffset is None:
2006            raise error.RevlogError(
2007                _(b"%s not found in the transaction") % self._indexfile
2008            )
2009        trindex = 0
2010        tr.add(self._datafile, 0)
2011
2012        existing_handles = False
2013        if self._writinghandles is not None:
2014            existing_handles = True
2015            fp = self._writinghandles[0]
2016            fp.flush()
2017            fp.close()
2018            # We can't use the cached file handle after close(). So prevent
2019            # its usage.
2020            self._writinghandles = None
2021            self._segmentfile.writing_handle = None
2022            # No need to deal with sidedata writing handle as it is only
2023            # relevant with revlog-v2 which is never inline, not reaching
2024            # this code
2025
2026        new_dfh = self._datafp(b'w+')
2027        new_dfh.truncate(0)  # drop any potentially existing data
2028        try:
2029            with self._indexfp() as read_ifh:
2030                for r in self:
2031                    new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2032                    if troffset <= self.start(r) + r * self.index.entry_size:
2033                        trindex = r
2034                new_dfh.flush()
2035
2036            with self.__index_new_fp() as fp:
2037                self._format_flags &= ~FLAG_INLINE_DATA
2038                self._inline = False
2039                for i in self:
2040                    e = self.index.entry_binary(i)
2041                    if i == 0 and self._docket is None:
2042                        header = self._format_flags | self._format_version
2043                        header = self.index.pack_header(header)
2044                        e = header + e
2045                    fp.write(e)
2046                if self._docket is not None:
2047                    self._docket.index_end = fp.tell()
2048
2049                # There is a small transactional race here. If the rename of
2050                # the index fails, we should remove the datafile. It is more
2051                # important to ensure that the data file is not truncated
2052                # when the index is replaced as otherwise data is lost.
2053                tr.replace(self._datafile, self.start(trindex))
2054
2055                # the temp file replace the real index when we exit the context
2056                # manager
2057
2058            tr.replace(self._indexfile, trindex * self.index.entry_size)
2059            nodemaputil.setup_persistent_nodemap(tr, self)
2060            self._segmentfile = randomaccessfile.randomaccessfile(
2061                self.opener,
2062                self._datafile,
2063                self._chunkcachesize,
2064            )
2065
2066            if existing_handles:
2067                # switched from inline to conventional reopen the index
2068                ifh = self.__index_write_fp()
2069                self._writinghandles = (ifh, new_dfh, None)
2070                self._segmentfile.writing_handle = new_dfh
2071                new_dfh = None
2072                # No need to deal with sidedata writing handle as it is only
2073                # relevant with revlog-v2 which is never inline, not reaching
2074                # this code
2075        finally:
2076            if new_dfh is not None:
2077                new_dfh.close()
2078
2079    def _nodeduplicatecallback(self, transaction, node):
2080        """called when trying to add a node already stored."""
2081
2082    @contextlib.contextmanager
2083    def reading(self):
2084        """Context manager that keeps data and sidedata files open for reading"""
2085        with self._segmentfile.reading():
2086            with self._segmentfile_sidedata.reading():
2087                yield
2088
2089    @contextlib.contextmanager
2090    def _writing(self, transaction):
2091        if self._trypending:
2092            msg = b'try to write in a `trypending` revlog: %s'
2093            msg %= self.display_id
2094            raise error.ProgrammingError(msg)
2095        if self._writinghandles is not None:
2096            yield
2097        else:
2098            ifh = dfh = sdfh = None
2099            try:
2100                r = len(self)
2101                # opening the data file.
2102                dsize = 0
2103                if r:
2104                    dsize = self.end(r - 1)
2105                dfh = None
2106                if not self._inline:
2107                    try:
2108                        dfh = self._datafp(b"r+")
2109                        if self._docket is None:
2110                            dfh.seek(0, os.SEEK_END)
2111                        else:
2112                            dfh.seek(self._docket.data_end, os.SEEK_SET)
2113                    except IOError as inst:
2114                        if inst.errno != errno.ENOENT:
2115                            raise
2116                        dfh = self._datafp(b"w+")
2117                    transaction.add(self._datafile, dsize)
2118                if self._sidedatafile is not None:
2119                    # revlog-v2 does not inline, help Pytype
2120                    assert dfh is not None
2121                    try:
2122                        sdfh = self.opener(self._sidedatafile, mode=b"r+")
2123                        dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2124                    except IOError as inst:
2125                        if inst.errno != errno.ENOENT:
2126                            raise
2127                        sdfh = self.opener(self._sidedatafile, mode=b"w+")
2128                    transaction.add(
2129                        self._sidedatafile, self._docket.sidedata_end
2130                    )
2131
2132                # opening the index file.
2133                isize = r * self.index.entry_size
2134                ifh = self.__index_write_fp()
2135                if self._inline:
2136                    transaction.add(self._indexfile, dsize + isize)
2137                else:
2138                    transaction.add(self._indexfile, isize)
2139                # exposing all file handle for writing.
2140                self._writinghandles = (ifh, dfh, sdfh)
2141                self._segmentfile.writing_handle = ifh if self._inline else dfh
2142                self._segmentfile_sidedata.writing_handle = sdfh
2143                yield
2144                if self._docket is not None:
2145                    self._write_docket(transaction)
2146            finally:
2147                self._writinghandles = None
2148                self._segmentfile.writing_handle = None
2149                self._segmentfile_sidedata.writing_handle = None
2150                if dfh is not None:
2151                    dfh.close()
2152                if sdfh is not None:
2153                    sdfh.close()
2154                # closing the index file last to avoid exposing referent to
2155                # potential unflushed data content.
2156                if ifh is not None:
2157                    ifh.close()
2158
2159    def _write_docket(self, transaction):
2160        """write the current docket on disk
2161
2162        Exist as a method to help changelog to implement transaction logic
2163
2164        We could also imagine using the same transaction logic for all revlog
2165        since docket are cheap."""
2166        self._docket.write(transaction)
2167
2168    def addrevision(
2169        self,
2170        text,
2171        transaction,
2172        link,
2173        p1,
2174        p2,
2175        cachedelta=None,
2176        node=None,
2177        flags=REVIDX_DEFAULT_FLAGS,
2178        deltacomputer=None,
2179        sidedata=None,
2180    ):
2181        """add a revision to the log
2182
2183        text - the revision data to add
2184        transaction - the transaction object used for rollback
2185        link - the linkrev data to add
2186        p1, p2 - the parent nodeids of the revision
2187        cachedelta - an optional precomputed delta
2188        node - nodeid of revision; typically node is not specified, and it is
2189            computed by default as hash(text, p1, p2), however subclasses might
2190            use different hashing method (and override checkhash() in such case)
2191        flags - the known flags to set on the revision
2192        deltacomputer - an optional deltacomputer instance shared between
2193            multiple calls
2194        """
2195        if link == nullrev:
2196            raise error.RevlogError(
2197                _(b"attempted to add linkrev -1 to %s") % self.display_id
2198            )
2199
2200        if sidedata is None:
2201            sidedata = {}
2202        elif sidedata and not self.hassidedata:
2203            raise error.ProgrammingError(
2204                _(b"trying to add sidedata to a revlog who don't support them")
2205            )
2206
2207        if flags:
2208            node = node or self.hash(text, p1, p2)
2209
2210        rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2211
2212        # If the flag processor modifies the revision data, ignore any provided
2213        # cachedelta.
2214        if rawtext != text:
2215            cachedelta = None
2216
2217        if len(rawtext) > _maxentrysize:
2218            raise error.RevlogError(
2219                _(
2220                    b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2221                )
2222                % (self.display_id, len(rawtext))
2223            )
2224
2225        node = node or self.hash(rawtext, p1, p2)
2226        rev = self.index.get_rev(node)
2227        if rev is not None:
2228            return rev
2229
2230        if validatehash:
2231            self.checkhash(rawtext, node, p1=p1, p2=p2)
2232
2233        return self.addrawrevision(
2234            rawtext,
2235            transaction,
2236            link,
2237            p1,
2238            p2,
2239            node,
2240            flags,
2241            cachedelta=cachedelta,
2242            deltacomputer=deltacomputer,
2243            sidedata=sidedata,
2244        )
2245
2246    def addrawrevision(
2247        self,
2248        rawtext,
2249        transaction,
2250        link,
2251        p1,
2252        p2,
2253        node,
2254        flags,
2255        cachedelta=None,
2256        deltacomputer=None,
2257        sidedata=None,
2258    ):
2259        """add a raw revision with known flags, node and parents
2260        useful when reusing a revision not stored in this revlog (ex: received
2261        over wire, or read from an external bundle).
2262        """
2263        with self._writing(transaction):
2264            return self._addrevision(
2265                node,
2266                rawtext,
2267                transaction,
2268                link,
2269                p1,
2270                p2,
2271                flags,
2272                cachedelta,
2273                deltacomputer=deltacomputer,
2274                sidedata=sidedata,
2275            )
2276
2277    def compress(self, data):
2278        """Generate a possibly-compressed representation of data."""
2279        if not data:
2280            return b'', data
2281
2282        compressed = self._compressor.compress(data)
2283
2284        if compressed:
2285            # The revlog compressor added the header in the returned data.
2286            return b'', compressed
2287
2288        if data[0:1] == b'\0':
2289            return b'', data
2290        return b'u', data
2291
2292    def decompress(self, data):
2293        """Decompress a revlog chunk.
2294
2295        The chunk is expected to begin with a header identifying the
2296        format type so it can be routed to an appropriate decompressor.
2297        """
2298        if not data:
2299            return data
2300
2301        # Revlogs are read much more frequently than they are written and many
2302        # chunks only take microseconds to decompress, so performance is
2303        # important here.
2304        #
2305        # We can make a few assumptions about revlogs:
2306        #
2307        # 1) the majority of chunks will be compressed (as opposed to inline
2308        #    raw data).
2309        # 2) decompressing *any* data will likely by at least 10x slower than
2310        #    returning raw inline data.
2311        # 3) we want to prioritize common and officially supported compression
2312        #    engines
2313        #
2314        # It follows that we want to optimize for "decompress compressed data
2315        # when encoded with common and officially supported compression engines"
2316        # case over "raw data" and "data encoded by less common or non-official
2317        # compression engines." That is why we have the inline lookup first
2318        # followed by the compengines lookup.
2319        #
2320        # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2321        # compressed chunks. And this matters for changelog and manifest reads.
2322        t = data[0:1]
2323
2324        if t == b'x':
2325            try:
2326                return _zlibdecompress(data)
2327            except zlib.error as e:
2328                raise error.RevlogError(
2329                    _(b'revlog decompress error: %s')
2330                    % stringutil.forcebytestr(e)
2331                )
2332        # '\0' is more common than 'u' so it goes first.
2333        elif t == b'\0':
2334            return data
2335        elif t == b'u':
2336            return util.buffer(data, 1)
2337
2338        compressor = self._get_decompressor(t)
2339
2340        return compressor.decompress(data)
2341
2342    def _addrevision(
2343        self,
2344        node,
2345        rawtext,
2346        transaction,
2347        link,
2348        p1,
2349        p2,
2350        flags,
2351        cachedelta,
2352        alwayscache=False,
2353        deltacomputer=None,
2354        sidedata=None,
2355    ):
2356        """internal function to add revisions to the log
2357
2358        see addrevision for argument descriptions.
2359
2360        note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2361
2362        if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2363        be used.
2364
2365        invariants:
2366        - rawtext is optional (can be None); if not set, cachedelta must be set.
2367          if both are set, they must correspond to each other.
2368        """
2369        if node == self.nullid:
2370            raise error.RevlogError(
2371                _(b"%s: attempt to add null revision") % self.display_id
2372            )
2373        if (
2374            node == self.nodeconstants.wdirid
2375            or node in self.nodeconstants.wdirfilenodeids
2376        ):
2377            raise error.RevlogError(
2378                _(b"%s: attempt to add wdir revision") % self.display_id
2379            )
2380        if self._writinghandles is None:
2381            msg = b'adding revision outside `revlog._writing` context'
2382            raise error.ProgrammingError(msg)
2383
2384        if self._inline:
2385            fh = self._writinghandles[0]
2386        else:
2387            fh = self._writinghandles[1]
2388
2389        btext = [rawtext]
2390
2391        curr = len(self)
2392        prev = curr - 1
2393
2394        offset = self._get_data_offset(prev)
2395
2396        if self._concurrencychecker:
2397            ifh, dfh, sdfh = self._writinghandles
2398            # XXX no checking for the sidedata file
2399            if self._inline:
2400                # offset is "as if" it were in the .d file, so we need to add on
2401                # the size of the entry metadata.
2402                self._concurrencychecker(
2403                    ifh, self._indexfile, offset + curr * self.index.entry_size
2404                )
2405            else:
2406                # Entries in the .i are a consistent size.
2407                self._concurrencychecker(
2408                    ifh, self._indexfile, curr * self.index.entry_size
2409                )
2410                self._concurrencychecker(dfh, self._datafile, offset)
2411
2412        p1r, p2r = self.rev(p1), self.rev(p2)
2413
2414        # full versions are inserted when the needed deltas
2415        # become comparable to the uncompressed text
2416        if rawtext is None:
2417            # need rawtext size, before changed by flag processors, which is
2418            # the non-raw size. use revlog explicitly to avoid filelog's extra
2419            # logic that might remove metadata size.
2420            textlen = mdiff.patchedsize(
2421                revlog.size(self, cachedelta[0]), cachedelta[1]
2422            )
2423        else:
2424            textlen = len(rawtext)
2425
2426        if deltacomputer is None:
2427            deltacomputer = deltautil.deltacomputer(self)
2428
2429        revinfo = revlogutils.revisioninfo(
2430            node,
2431            p1,
2432            p2,
2433            btext,
2434            textlen,
2435            cachedelta,
2436            flags,
2437        )
2438
2439        deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2440
2441        compression_mode = COMP_MODE_INLINE
2442        if self._docket is not None:
2443            default_comp = self._docket.default_compression_header
2444            r = deltautil.delta_compression(default_comp, deltainfo)
2445            compression_mode, deltainfo = r
2446
2447        sidedata_compression_mode = COMP_MODE_INLINE
2448        if sidedata and self.hassidedata:
2449            sidedata_compression_mode = COMP_MODE_PLAIN
2450            serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2451            sidedata_offset = self._docket.sidedata_end
2452            h, comp_sidedata = self.compress(serialized_sidedata)
2453            if (
2454                h != b'u'
2455                and comp_sidedata[0:1] != b'\0'
2456                and len(comp_sidedata) < len(serialized_sidedata)
2457            ):
2458                assert not h
2459                if (
2460                    comp_sidedata[0:1]
2461                    == self._docket.default_compression_header
2462                ):
2463                    sidedata_compression_mode = COMP_MODE_DEFAULT
2464                    serialized_sidedata = comp_sidedata
2465                else:
2466                    sidedata_compression_mode = COMP_MODE_INLINE
2467                    serialized_sidedata = comp_sidedata
2468        else:
2469            serialized_sidedata = b""
2470            # Don't store the offset if the sidedata is empty, that way
2471            # we can easily detect empty sidedata and they will be no different
2472            # than ones we manually add.
2473            sidedata_offset = 0
2474
2475        e = revlogutils.entry(
2476            flags=flags,
2477            data_offset=offset,
2478            data_compressed_length=deltainfo.deltalen,
2479            data_uncompressed_length=textlen,
2480            data_compression_mode=compression_mode,
2481            data_delta_base=deltainfo.base,
2482            link_rev=link,
2483            parent_rev_1=p1r,
2484            parent_rev_2=p2r,
2485            node_id=node,
2486            sidedata_offset=sidedata_offset,
2487            sidedata_compressed_length=len(serialized_sidedata),
2488            sidedata_compression_mode=sidedata_compression_mode,
2489        )
2490
2491        self.index.append(e)
2492        entry = self.index.entry_binary(curr)
2493        if curr == 0 and self._docket is None:
2494            header = self._format_flags | self._format_version
2495            header = self.index.pack_header(header)
2496            entry = header + entry
2497        self._writeentry(
2498            transaction,
2499            entry,
2500            deltainfo.data,
2501            link,
2502            offset,
2503            serialized_sidedata,
2504            sidedata_offset,
2505        )
2506
2507        rawtext = btext[0]
2508
2509        if alwayscache and rawtext is None:
2510            rawtext = deltacomputer.buildtext(revinfo, fh)
2511
2512        if type(rawtext) == bytes:  # only accept immutable objects
2513            self._revisioncache = (node, curr, rawtext)
2514        self._chainbasecache[curr] = deltainfo.chainbase
2515        return curr
2516
2517    def _get_data_offset(self, prev):
2518        """Returns the current offset in the (in-transaction) data file.
2519        Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2520        file to store that information: since sidedata can be rewritten to the
2521        end of the data file within a transaction, you can have cases where, for
2522        example, rev `n` does not have sidedata while rev `n - 1` does, leading
2523        to `n - 1`'s sidedata being written after `n`'s data.
2524
2525        TODO cache this in a docket file before getting out of experimental."""
2526        if self._docket is None:
2527            return self.end(prev)
2528        else:
2529            return self._docket.data_end
2530
2531    def _writeentry(
2532        self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2533    ):
2534        # Files opened in a+ mode have inconsistent behavior on various
2535        # platforms. Windows requires that a file positioning call be made
2536        # when the file handle transitions between reads and writes. See
2537        # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2538        # platforms, Python or the platform itself can be buggy. Some versions
2539        # of Solaris have been observed to not append at the end of the file
2540        # if the file was seeked to before the end. See issue4943 for more.
2541        #
2542        # We work around this issue by inserting a seek() before writing.
2543        # Note: This is likely not necessary on Python 3. However, because
2544        # the file handle is reused for reads and may be seeked there, we need
2545        # to be careful before changing this.
2546        if self._writinghandles is None:
2547            msg = b'adding revision outside `revlog._writing` context'
2548            raise error.ProgrammingError(msg)
2549        ifh, dfh, sdfh = self._writinghandles
2550        if self._docket is None:
2551            ifh.seek(0, os.SEEK_END)
2552        else:
2553            ifh.seek(self._docket.index_end, os.SEEK_SET)
2554        if dfh:
2555            if self._docket is None:
2556                dfh.seek(0, os.SEEK_END)
2557            else:
2558                dfh.seek(self._docket.data_end, os.SEEK_SET)
2559        if sdfh:
2560            sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2561
2562        curr = len(self) - 1
2563        if not self._inline:
2564            transaction.add(self._datafile, offset)
2565            if self._sidedatafile:
2566                transaction.add(self._sidedatafile, sidedata_offset)
2567            transaction.add(self._indexfile, curr * len(entry))
2568            if data[0]:
2569                dfh.write(data[0])
2570            dfh.write(data[1])
2571            if sidedata:
2572                sdfh.write(sidedata)
2573            ifh.write(entry)
2574        else:
2575            offset += curr * self.index.entry_size
2576            transaction.add(self._indexfile, offset)
2577            ifh.write(entry)
2578            ifh.write(data[0])
2579            ifh.write(data[1])
2580            assert not sidedata
2581            self._enforceinlinesize(transaction)
2582        if self._docket is not None:
2583            # revlog-v2 always has 3 writing handles, help Pytype
2584            wh1 = self._writinghandles[0]
2585            wh2 = self._writinghandles[1]
2586            wh3 = self._writinghandles[2]
2587            assert wh1 is not None
2588            assert wh2 is not None
2589            assert wh3 is not None
2590            self._docket.index_end = wh1.tell()
2591            self._docket.data_end = wh2.tell()
2592            self._docket.sidedata_end = wh3.tell()
2593
2594        nodemaputil.setup_persistent_nodemap(transaction, self)
2595
2596    def addgroup(
2597        self,
2598        deltas,
2599        linkmapper,
2600        transaction,
2601        alwayscache=False,
2602        addrevisioncb=None,
2603        duplicaterevisioncb=None,
2604    ):
2605        """
2606        add a delta group
2607
2608        given a set of deltas, add them to the revision log. the
2609        first delta is against its parent, which should be in our
2610        log, the rest are against the previous delta.
2611
2612        If ``addrevisioncb`` is defined, it will be called with arguments of
2613        this revlog and the node that was added.
2614        """
2615
2616        if self._adding_group:
2617            raise error.ProgrammingError(b'cannot nest addgroup() calls')
2618
2619        self._adding_group = True
2620        empty = True
2621        try:
2622            with self._writing(transaction):
2623                deltacomputer = deltautil.deltacomputer(self)
2624                # loop through our set of deltas
2625                for data in deltas:
2626                    (
2627                        node,
2628                        p1,
2629                        p2,
2630                        linknode,
2631                        deltabase,
2632                        delta,
2633                        flags,
2634                        sidedata,
2635                    ) = data
2636                    link = linkmapper(linknode)
2637                    flags = flags or REVIDX_DEFAULT_FLAGS
2638
2639                    rev = self.index.get_rev(node)
2640                    if rev is not None:
2641                        # this can happen if two branches make the same change
2642                        self._nodeduplicatecallback(transaction, rev)
2643                        if duplicaterevisioncb:
2644                            duplicaterevisioncb(self, rev)
2645                        empty = False
2646                        continue
2647
2648                    for p in (p1, p2):
2649                        if not self.index.has_node(p):
2650                            raise error.LookupError(
2651                                p, self.radix, _(b'unknown parent')
2652                            )
2653
2654                    if not self.index.has_node(deltabase):
2655                        raise error.LookupError(
2656                            deltabase, self.display_id, _(b'unknown delta base')
2657                        )
2658
2659                    baserev = self.rev(deltabase)
2660
2661                    if baserev != nullrev and self.iscensored(baserev):
2662                        # if base is censored, delta must be full replacement in a
2663                        # single patch operation
2664                        hlen = struct.calcsize(b">lll")
2665                        oldlen = self.rawsize(baserev)
2666                        newlen = len(delta) - hlen
2667                        if delta[:hlen] != mdiff.replacediffheader(
2668                            oldlen, newlen
2669                        ):
2670                            raise error.CensoredBaseError(
2671                                self.display_id, self.node(baserev)
2672                            )
2673
2674                    if not flags and self._peek_iscensored(baserev, delta):
2675                        flags |= REVIDX_ISCENSORED
2676
2677                    # We assume consumers of addrevisioncb will want to retrieve
2678                    # the added revision, which will require a call to
2679                    # revision(). revision() will fast path if there is a cache
2680                    # hit. So, we tell _addrevision() to always cache in this case.
2681                    # We're only using addgroup() in the context of changegroup
2682                    # generation so the revision data can always be handled as raw
2683                    # by the flagprocessor.
2684                    rev = self._addrevision(
2685                        node,
2686                        None,
2687                        transaction,
2688                        link,
2689                        p1,
2690                        p2,
2691                        flags,
2692                        (baserev, delta),
2693                        alwayscache=alwayscache,
2694                        deltacomputer=deltacomputer,
2695                        sidedata=sidedata,
2696                    )
2697
2698                    if addrevisioncb:
2699                        addrevisioncb(self, rev)
2700                    empty = False
2701        finally:
2702            self._adding_group = False
2703        return not empty
2704
2705    def iscensored(self, rev):
2706        """Check if a file revision is censored."""
2707        if not self._censorable:
2708            return False
2709
2710        return self.flags(rev) & REVIDX_ISCENSORED
2711
2712    def _peek_iscensored(self, baserev, delta):
2713        """Quickly check if a delta produces a censored revision."""
2714        if not self._censorable:
2715            return False
2716
2717        return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2718
2719    def getstrippoint(self, minlink):
2720        """find the minimum rev that must be stripped to strip the linkrev
2721
2722        Returns a tuple containing the minimum rev and a set of all revs that
2723        have linkrevs that will be broken by this strip.
2724        """
2725        return storageutil.resolvestripinfo(
2726            minlink,
2727            len(self) - 1,
2728            self.headrevs(),
2729            self.linkrev,
2730            self.parentrevs,
2731        )
2732
2733    def strip(self, minlink, transaction):
2734        """truncate the revlog on the first revision with a linkrev >= minlink
2735
2736        This function is called when we're stripping revision minlink and
2737        its descendants from the repository.
2738
2739        We have to remove all revisions with linkrev >= minlink, because
2740        the equivalent changelog revisions will be renumbered after the
2741        strip.
2742
2743        So we truncate the revlog on the first of these revisions, and
2744        trust that the caller has saved the revisions that shouldn't be
2745        removed and that it'll re-add them after this truncation.
2746        """
2747        if len(self) == 0:
2748            return
2749
2750        rev, _ = self.getstrippoint(minlink)
2751        if rev == len(self):
2752            return
2753
2754        # first truncate the files on disk
2755        data_end = self.start(rev)
2756        if not self._inline:
2757            transaction.add(self._datafile, data_end)
2758            end = rev * self.index.entry_size
2759        else:
2760            end = data_end + (rev * self.index.entry_size)
2761
2762        if self._sidedatafile:
2763            sidedata_end = self.sidedata_cut_off(rev)
2764            transaction.add(self._sidedatafile, sidedata_end)
2765
2766        transaction.add(self._indexfile, end)
2767        if self._docket is not None:
2768            # XXX we could, leverage the docket while stripping. However it is
2769            # not powerfull enough at the time of this comment
2770            self._docket.index_end = end
2771            self._docket.data_end = data_end
2772            self._docket.sidedata_end = sidedata_end
2773            self._docket.write(transaction, stripping=True)
2774
2775        # then reset internal state in memory to forget those revisions
2776        self._revisioncache = None
2777        self._chaininfocache = util.lrucachedict(500)
2778        self._segmentfile.clear_cache()
2779        self._segmentfile_sidedata.clear_cache()
2780
2781        del self.index[rev:-1]
2782
2783    def checksize(self):
2784        """Check size of index and data files
2785
2786        return a (dd, di) tuple.
2787        - dd: extra bytes for the "data" file
2788        - di: extra bytes for the "index" file
2789
2790        A healthy revlog will return (0, 0).
2791        """
2792        expected = 0
2793        if len(self):
2794            expected = max(0, self.end(len(self) - 1))
2795
2796        try:
2797            with self._datafp() as f:
2798                f.seek(0, io.SEEK_END)
2799                actual = f.tell()
2800            dd = actual - expected
2801        except IOError as inst:
2802            if inst.errno != errno.ENOENT:
2803                raise
2804            dd = 0
2805
2806        try:
2807            f = self.opener(self._indexfile)
2808            f.seek(0, io.SEEK_END)
2809            actual = f.tell()
2810            f.close()
2811            s = self.index.entry_size
2812            i = max(0, actual // s)
2813            di = actual - (i * s)
2814            if self._inline:
2815                databytes = 0
2816                for r in self:
2817                    databytes += max(0, self.length(r))
2818                dd = 0
2819                di = actual - len(self) * s - databytes
2820        except IOError as inst:
2821            if inst.errno != errno.ENOENT:
2822                raise
2823            di = 0
2824
2825        return (dd, di)
2826
2827    def files(self):
2828        res = [self._indexfile]
2829        if self._docket_file is None:
2830            if not self._inline:
2831                res.append(self._datafile)
2832        else:
2833            res.append(self._docket_file)
2834            res.extend(self._docket.old_index_filepaths(include_empty=False))
2835            if self._docket.data_end:
2836                res.append(self._datafile)
2837            res.extend(self._docket.old_data_filepaths(include_empty=False))
2838            if self._docket.sidedata_end:
2839                res.append(self._sidedatafile)
2840            res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2841        return res
2842
2843    def emitrevisions(
2844        self,
2845        nodes,
2846        nodesorder=None,
2847        revisiondata=False,
2848        assumehaveparentrevisions=False,
2849        deltamode=repository.CG_DELTAMODE_STD,
2850        sidedata_helpers=None,
2851    ):
2852        if nodesorder not in (b'nodes', b'storage', b'linear', None):
2853            raise error.ProgrammingError(
2854                b'unhandled value for nodesorder: %s' % nodesorder
2855            )
2856
2857        if nodesorder is None and not self._generaldelta:
2858            nodesorder = b'storage'
2859
2860        if (
2861            not self._storedeltachains
2862            and deltamode != repository.CG_DELTAMODE_PREV
2863        ):
2864            deltamode = repository.CG_DELTAMODE_FULL
2865
2866        return storageutil.emitrevisions(
2867            self,
2868            nodes,
2869            nodesorder,
2870            revlogrevisiondelta,
2871            deltaparentfn=self.deltaparent,
2872            candeltafn=self.candelta,
2873            rawsizefn=self.rawsize,
2874            revdifffn=self.revdiff,
2875            flagsfn=self.flags,
2876            deltamode=deltamode,
2877            revisiondata=revisiondata,
2878            assumehaveparentrevisions=assumehaveparentrevisions,
2879            sidedata_helpers=sidedata_helpers,
2880        )
2881
2882    DELTAREUSEALWAYS = b'always'
2883    DELTAREUSESAMEREVS = b'samerevs'
2884    DELTAREUSENEVER = b'never'
2885
2886    DELTAREUSEFULLADD = b'fulladd'
2887
2888    DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2889
2890    def clone(
2891        self,
2892        tr,
2893        destrevlog,
2894        addrevisioncb=None,
2895        deltareuse=DELTAREUSESAMEREVS,
2896        forcedeltabothparents=None,
2897        sidedata_helpers=None,
2898    ):
2899        """Copy this revlog to another, possibly with format changes.
2900
2901        The destination revlog will contain the same revisions and nodes.
2902        However, it may not be bit-for-bit identical due to e.g. delta encoding
2903        differences.
2904
2905        The ``deltareuse`` argument control how deltas from the existing revlog
2906        are preserved in the destination revlog. The argument can have the
2907        following values:
2908
2909        DELTAREUSEALWAYS
2910           Deltas will always be reused (if possible), even if the destination
2911           revlog would not select the same revisions for the delta. This is the
2912           fastest mode of operation.
2913        DELTAREUSESAMEREVS
2914           Deltas will be reused if the destination revlog would pick the same
2915           revisions for the delta. This mode strikes a balance between speed
2916           and optimization.
2917        DELTAREUSENEVER
2918           Deltas will never be reused. This is the slowest mode of execution.
2919           This mode can be used to recompute deltas (e.g. if the diff/delta
2920           algorithm changes).
2921        DELTAREUSEFULLADD
2922           Revision will be re-added as if their were new content. This is
2923           slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2924           eg: large file detection and handling.
2925
2926        Delta computation can be slow, so the choice of delta reuse policy can
2927        significantly affect run time.
2928
2929        The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2930        two extremes. Deltas will be reused if they are appropriate. But if the
2931        delta could choose a better revision, it will do so. This means if you
2932        are converting a non-generaldelta revlog to a generaldelta revlog,
2933        deltas will be recomputed if the delta's parent isn't a parent of the
2934        revision.
2935
2936        In addition to the delta policy, the ``forcedeltabothparents``
2937        argument controls whether to force compute deltas against both parents
2938        for merges. By default, the current default is used.
2939
2940        See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2941        `sidedata_helpers`.
2942        """
2943        if deltareuse not in self.DELTAREUSEALL:
2944            raise ValueError(
2945                _(b'value for deltareuse invalid: %s') % deltareuse
2946            )
2947
2948        if len(destrevlog):
2949            raise ValueError(_(b'destination revlog is not empty'))
2950
2951        if getattr(self, 'filteredrevs', None):
2952            raise ValueError(_(b'source revlog has filtered revisions'))
2953        if getattr(destrevlog, 'filteredrevs', None):
2954            raise ValueError(_(b'destination revlog has filtered revisions'))
2955
2956        # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2957        # if possible.
2958        oldlazydelta = destrevlog._lazydelta
2959        oldlazydeltabase = destrevlog._lazydeltabase
2960        oldamd = destrevlog._deltabothparents
2961
2962        try:
2963            if deltareuse == self.DELTAREUSEALWAYS:
2964                destrevlog._lazydeltabase = True
2965                destrevlog._lazydelta = True
2966            elif deltareuse == self.DELTAREUSESAMEREVS:
2967                destrevlog._lazydeltabase = False
2968                destrevlog._lazydelta = True
2969            elif deltareuse == self.DELTAREUSENEVER:
2970                destrevlog._lazydeltabase = False
2971                destrevlog._lazydelta = False
2972
2973            destrevlog._deltabothparents = forcedeltabothparents or oldamd
2974
2975            self._clone(
2976                tr,
2977                destrevlog,
2978                addrevisioncb,
2979                deltareuse,
2980                forcedeltabothparents,
2981                sidedata_helpers,
2982            )
2983
2984        finally:
2985            destrevlog._lazydelta = oldlazydelta
2986            destrevlog._lazydeltabase = oldlazydeltabase
2987            destrevlog._deltabothparents = oldamd
2988
2989    def _clone(
2990        self,
2991        tr,
2992        destrevlog,
2993        addrevisioncb,
2994        deltareuse,
2995        forcedeltabothparents,
2996        sidedata_helpers,
2997    ):
2998        """perform the core duty of `revlog.clone` after parameter processing"""
2999        deltacomputer = deltautil.deltacomputer(destrevlog)
3000        index = self.index
3001        for rev in self:
3002            entry = index[rev]
3003
3004            # Some classes override linkrev to take filtered revs into
3005            # account. Use raw entry from index.
3006            flags = entry[0] & 0xFFFF
3007            linkrev = entry[4]
3008            p1 = index[entry[5]][7]
3009            p2 = index[entry[6]][7]
3010            node = entry[7]
3011
3012            # (Possibly) reuse the delta from the revlog if allowed and
3013            # the revlog chunk is a delta.
3014            cachedelta = None
3015            rawtext = None
3016            if deltareuse == self.DELTAREUSEFULLADD:
3017                text = self._revisiondata(rev)
3018                sidedata = self.sidedata(rev)
3019
3020                if sidedata_helpers is not None:
3021                    (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3022                        self, sidedata_helpers, sidedata, rev
3023                    )
3024                    flags = flags | new_flags[0] & ~new_flags[1]
3025
3026                destrevlog.addrevision(
3027                    text,
3028                    tr,
3029                    linkrev,
3030                    p1,
3031                    p2,
3032                    cachedelta=cachedelta,
3033                    node=node,
3034                    flags=flags,
3035                    deltacomputer=deltacomputer,
3036                    sidedata=sidedata,
3037                )
3038            else:
3039                if destrevlog._lazydelta:
3040                    dp = self.deltaparent(rev)
3041                    if dp != nullrev:
3042                        cachedelta = (dp, bytes(self._chunk(rev)))
3043
3044                sidedata = None
3045                if not cachedelta:
3046                    rawtext = self._revisiondata(rev)
3047                    sidedata = self.sidedata(rev)
3048                if sidedata is None:
3049                    sidedata = self.sidedata(rev)
3050
3051                if sidedata_helpers is not None:
3052                    (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3053                        self, sidedata_helpers, sidedata, rev
3054                    )
3055                    flags = flags | new_flags[0] & ~new_flags[1]
3056
3057                with destrevlog._writing(tr):
3058                    destrevlog._addrevision(
3059                        node,
3060                        rawtext,
3061                        tr,
3062                        linkrev,
3063                        p1,
3064                        p2,
3065                        flags,
3066                        cachedelta,
3067                        deltacomputer=deltacomputer,
3068                        sidedata=sidedata,
3069                    )
3070
3071            if addrevisioncb:
3072                addrevisioncb(self, rev, node)
3073
3074    def censorrevision(self, tr, censornode, tombstone=b''):
3075        if self._format_version == REVLOGV0:
3076            raise error.RevlogError(
3077                _(b'cannot censor with version %d revlogs')
3078                % self._format_version
3079            )
3080        elif self._format_version == REVLOGV1:
3081            rewrite.v1_censor(self, tr, censornode, tombstone)
3082        else:
3083            rewrite.v2_censor(self, tr, censornode, tombstone)
3084
3085    def verifyintegrity(self, state):
3086        """Verifies the integrity of the revlog.
3087
3088        Yields ``revlogproblem`` instances describing problems that are
3089        found.
3090        """
3091        dd, di = self.checksize()
3092        if dd:
3093            yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3094        if di:
3095            yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3096
3097        version = self._format_version
3098
3099        # The verifier tells us what version revlog we should be.
3100        if version != state[b'expectedversion']:
3101            yield revlogproblem(
3102                warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3103                % (self.display_id, version, state[b'expectedversion'])
3104            )
3105
3106        state[b'skipread'] = set()
3107        state[b'safe_renamed'] = set()
3108
3109        for rev in self:
3110            node = self.node(rev)
3111
3112            # Verify contents. 4 cases to care about:
3113            #
3114            #   common: the most common case
3115            #   rename: with a rename
3116            #   meta: file content starts with b'\1\n', the metadata
3117            #         header defined in filelog.py, but without a rename
3118            #   ext: content stored externally
3119            #
3120            # More formally, their differences are shown below:
3121            #
3122            #                       | common | rename | meta  | ext
3123            #  -------------------------------------------------------
3124            #   flags()             | 0      | 0      | 0     | not 0
3125            #   renamed()           | False  | True   | False | ?
3126            #   rawtext[0:2]=='\1\n'| False  | True   | True  | ?
3127            #
3128            # "rawtext" means the raw text stored in revlog data, which
3129            # could be retrieved by "rawdata(rev)". "text"
3130            # mentioned below is "revision(rev)".
3131            #
3132            # There are 3 different lengths stored physically:
3133            #  1. L1: rawsize, stored in revlog index
3134            #  2. L2: len(rawtext), stored in revlog data
3135            #  3. L3: len(text), stored in revlog data if flags==0, or
3136            #     possibly somewhere else if flags!=0
3137            #
3138            # L1 should be equal to L2. L3 could be different from them.
3139            # "text" may or may not affect commit hash depending on flag
3140            # processors (see flagutil.addflagprocessor).
3141            #
3142            #              | common  | rename | meta  | ext
3143            # -------------------------------------------------
3144            #    rawsize() | L1      | L1     | L1    | L1
3145            #       size() | L1      | L2-LM  | L1(*) | L1 (?)
3146            # len(rawtext) | L2      | L2     | L2    | L2
3147            #    len(text) | L2      | L2     | L2    | L3
3148            #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
3149            #
3150            # LM:  length of metadata, depending on rawtext
3151            # (*): not ideal, see comment in filelog.size
3152            # (?): could be "- len(meta)" if the resolved content has
3153            #      rename metadata
3154            #
3155            # Checks needed to be done:
3156            #  1. length check: L1 == L2, in all cases.
3157            #  2. hash check: depending on flag processor, we may need to
3158            #     use either "text" (external), or "rawtext" (in revlog).
3159
3160            try:
3161                skipflags = state.get(b'skipflags', 0)
3162                if skipflags:
3163                    skipflags &= self.flags(rev)
3164
3165                _verify_revision(self, skipflags, state, node)
3166
3167                l1 = self.rawsize(rev)
3168                l2 = len(self.rawdata(node))
3169
3170                if l1 != l2:
3171                    yield revlogproblem(
3172                        error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3173                        node=node,
3174                    )
3175
3176            except error.CensoredNodeError:
3177                if state[b'erroroncensored']:
3178                    yield revlogproblem(
3179                        error=_(b'censored file data'), node=node
3180                    )
3181                    state[b'skipread'].add(node)
3182            except Exception as e:
3183                yield revlogproblem(
3184                    error=_(b'unpacking %s: %s')
3185                    % (short(node), stringutil.forcebytestr(e)),
3186                    node=node,
3187                )
3188                state[b'skipread'].add(node)
3189
3190    def storageinfo(
3191        self,
3192        exclusivefiles=False,
3193        sharedfiles=False,
3194        revisionscount=False,
3195        trackedsize=False,
3196        storedsize=False,
3197    ):
3198        d = {}
3199
3200        if exclusivefiles:
3201            d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3202            if not self._inline:
3203                d[b'exclusivefiles'].append((self.opener, self._datafile))
3204
3205        if sharedfiles:
3206            d[b'sharedfiles'] = []
3207
3208        if revisionscount:
3209            d[b'revisionscount'] = len(self)
3210
3211        if trackedsize:
3212            d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3213
3214        if storedsize:
3215            d[b'storedsize'] = sum(
3216                self.opener.stat(path).st_size for path in self.files()
3217            )
3218
3219        return d
3220
3221    def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3222        if not self.hassidedata:
3223            return
3224        # revlog formats with sidedata support does not support inline
3225        assert not self._inline
3226        if not helpers[1] and not helpers[2]:
3227            # Nothing to generate or remove
3228            return
3229
3230        new_entries = []
3231        # append the new sidedata
3232        with self._writing(transaction):
3233            ifh, dfh, sdfh = self._writinghandles
3234            dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3235
3236            current_offset = sdfh.tell()
3237            for rev in range(startrev, endrev + 1):
3238                entry = self.index[rev]
3239                new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3240                    store=self,
3241                    sidedata_helpers=helpers,
3242                    sidedata={},
3243                    rev=rev,
3244                )
3245
3246                serialized_sidedata = sidedatautil.serialize_sidedata(
3247                    new_sidedata
3248                )
3249
3250                sidedata_compression_mode = COMP_MODE_INLINE
3251                if serialized_sidedata and self.hassidedata:
3252                    sidedata_compression_mode = COMP_MODE_PLAIN
3253                    h, comp_sidedata = self.compress(serialized_sidedata)
3254                    if (
3255                        h != b'u'
3256                        and comp_sidedata[0] != b'\0'
3257                        and len(comp_sidedata) < len(serialized_sidedata)
3258                    ):
3259                        assert not h
3260                        if (
3261                            comp_sidedata[0]
3262                            == self._docket.default_compression_header
3263                        ):
3264                            sidedata_compression_mode = COMP_MODE_DEFAULT
3265                            serialized_sidedata = comp_sidedata
3266                        else:
3267                            sidedata_compression_mode = COMP_MODE_INLINE
3268                            serialized_sidedata = comp_sidedata
3269                if entry[8] != 0 or entry[9] != 0:
3270                    # rewriting entries that already have sidedata is not
3271                    # supported yet, because it introduces garbage data in the
3272                    # revlog.
3273                    msg = b"rewriting existing sidedata is not supported yet"
3274                    raise error.Abort(msg)
3275
3276                # Apply (potential) flags to add and to remove after running
3277                # the sidedata helpers
3278                new_offset_flags = entry[0] | flags[0] & ~flags[1]
3279                entry_update = (
3280                    current_offset,
3281                    len(serialized_sidedata),
3282                    new_offset_flags,
3283                    sidedata_compression_mode,
3284                )
3285
3286                # the sidedata computation might have move the file cursors around
3287                sdfh.seek(current_offset, os.SEEK_SET)
3288                sdfh.write(serialized_sidedata)
3289                new_entries.append(entry_update)
3290                current_offset += len(serialized_sidedata)
3291                self._docket.sidedata_end = sdfh.tell()
3292
3293            # rewrite the new index entries
3294            ifh.seek(startrev * self.index.entry_size)
3295            for i, e in enumerate(new_entries):
3296                rev = startrev + i
3297                self.index.replace_sidedata_info(rev, *e)
3298                packed = self.index.entry_binary(rev)
3299                if rev == 0 and self._docket is None:
3300                    header = self._format_flags | self._format_version
3301                    header = self.index.pack_header(header)
3302                    packed = header + packed
3303                ifh.write(packed)
3304