1# revlogdeltas.py - constant used for revlog logic.
2#
3# Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4# Copyright 2018 Octobus <contact@octobus.net>
5#
6# This software may be used and distributed according to the terms of the
7# GNU General Public License version 2 or any later version.
8"""Helper class to compute deltas stored inside revlogs"""
9
10from __future__ import absolute_import
11
12import struct
13
14from ..interfaces import repository
15from .. import revlogutils
16
17### Internal utily constants
18
19KIND_CHANGELOG = 1001  # over 256 to not be comparable with a bytes
20KIND_MANIFESTLOG = 1002
21KIND_FILELOG = 1003
22KIND_OTHER = 1004
23
24ALL_KINDS = {
25    KIND_CHANGELOG,
26    KIND_MANIFESTLOG,
27    KIND_FILELOG,
28    KIND_OTHER,
29}
30
31### Index entry key
32#
33#
34#    Internal details
35#    ----------------
36#
37#    A large part of the revlog logic deals with revisions' "index entries", tuple
38#    objects that contains the same "items" whatever the revlog version.
39#    Different versions will have different ways of storing these items (sometimes
40#    not having them at all), but the tuple will always be the same. New fields
41#    are usually added at the end to avoid breaking existing code that relies
42#    on the existing order. The field are defined as follows:
43
44#    [0] offset:
45#            The byte index of the start of revision data chunk.
46#            That value is shifted up by 16 bits. use "offset = field >> 16" to
47#            retrieve it.
48#
49#        flags:
50#            A flag field that carries special information or changes the behavior
51#            of the revision. (see `REVIDX_*` constants for details)
52#            The flag field only occupies the first 16 bits of this field,
53#            use "flags = field & 0xFFFF" to retrieve the value.
54ENTRY_DATA_OFFSET = 0
55
56#    [1] compressed length:
57#            The size, in bytes, of the chunk on disk
58ENTRY_DATA_COMPRESSED_LENGTH = 1
59
60#    [2] uncompressed length:
61#            The size, in bytes, of the full revision once reconstructed.
62ENTRY_DATA_UNCOMPRESSED_LENGTH = 2
63
64#    [3] base rev:
65#            Either the base of the revision delta chain (without general
66#            delta), or the base of the delta (stored in the data chunk)
67#            with general delta.
68ENTRY_DELTA_BASE = 3
69
70#    [4] link rev:
71#            Changelog revision number of the changeset introducing this
72#            revision.
73ENTRY_LINK_REV = 4
74
75#    [5] parent 1 rev:
76#            Revision number of the first parent
77ENTRY_PARENT_1 = 5
78
79#    [6] parent 2 rev:
80#            Revision number of the second parent
81ENTRY_PARENT_2 = 6
82
83#    [7] node id:
84#            The node id of the current revision
85ENTRY_NODE_ID = 7
86
87#    [8] sidedata offset:
88#            The byte index of the start of the revision's side-data chunk.
89ENTRY_SIDEDATA_OFFSET = 8
90
91#    [9] sidedata chunk length:
92#            The size, in bytes, of the revision's side-data chunk.
93ENTRY_SIDEDATA_COMPRESSED_LENGTH = 9
94
95#    [10] data compression mode:
96#            two bits that detail the way the data chunk is compressed on disk.
97#            (see "COMP_MODE_*" constants for details). For revlog version 0 and
98#            1 this will always be COMP_MODE_INLINE.
99ENTRY_DATA_COMPRESSION_MODE = 10
100
101#    [11] side-data compression mode:
102#            two bits that detail the way the sidedata chunk is compressed on disk.
103#            (see "COMP_MODE_*" constants for details)
104ENTRY_SIDEDATA_COMPRESSION_MODE = 11
105
106### main revlog header
107
108# We cannot rely on  Struct.format is inconsistent for python <=3.6 versus above
109INDEX_HEADER_FMT = b">I"
110INDEX_HEADER = struct.Struct(INDEX_HEADER_FMT)
111
112## revlog version
113REVLOGV0 = 0
114REVLOGV1 = 1
115# Dummy value until file format is finalized.
116REVLOGV2 = 0xDEAD
117# Dummy value until file format is finalized.
118CHANGELOGV2 = 0xD34D
119
120##  global revlog header flags
121# Shared across v1 and v2.
122FLAG_INLINE_DATA = 1 << 16
123# Only used by v1, implied by v2.
124FLAG_GENERALDELTA = 1 << 17
125REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
126REVLOG_DEFAULT_FORMAT = REVLOGV1
127REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
128REVLOGV0_FLAGS = 0
129REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
130REVLOGV2_FLAGS = FLAG_INLINE_DATA
131CHANGELOGV2_FLAGS = 0
132
133### individual entry
134
135## index v0:
136#  4 bytes: offset
137#  4 bytes: compressed length
138#  4 bytes: base rev
139#  4 bytes: link rev
140# 20 bytes: parent 1 nodeid
141# 20 bytes: parent 2 nodeid
142# 20 bytes: nodeid
143INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
144
145## index v1
146#  6 bytes: offset
147#  2 bytes: flags
148#  4 bytes: compressed length
149#  4 bytes: uncompressed length
150#  4 bytes: base rev
151#  4 bytes: link rev
152#  4 bytes: parent 1 rev
153#  4 bytes: parent 2 rev
154# 32 bytes: nodeid
155INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
156assert INDEX_ENTRY_V1.size == 32 * 2
157
158#  6 bytes: offset
159#  2 bytes: flags
160#  4 bytes: compressed length
161#  4 bytes: uncompressed length
162#  4 bytes: base rev
163#  4 bytes: link rev
164#  4 bytes: parent 1 rev
165#  4 bytes: parent 2 rev
166# 32 bytes: nodeid
167#  8 bytes: sidedata offset
168#  4 bytes: sidedata compressed length
169#  1 bytes: compression mode (2 lower bit are data_compression_mode)
170#  19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
171INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
172assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
173
174#  6 bytes: offset
175#  2 bytes: flags
176#  4 bytes: compressed length
177#  4 bytes: uncompressed length
178#  4 bytes: parent 1 rev
179#  4 bytes: parent 2 rev
180# 32 bytes: nodeid
181#  8 bytes: sidedata offset
182#  4 bytes: sidedata compressed length
183#  1 bytes: compression mode (2 lower bit are data_compression_mode)
184#  27 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
185INDEX_ENTRY_CL_V2 = struct.Struct(b">Qiiii20s12xQiB27x")
186assert INDEX_ENTRY_CL_V2.size == 32 * 3, INDEX_ENTRY_V2.size
187
188# revlog index flags
189
190# For historical reasons, revlog's internal flags were exposed via the
191# wire protocol and are even exposed in parts of the storage APIs.
192
193# revision has censor metadata, must be verified
194REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
195# revision hash does not match data (narrowhg)
196REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
197# revision data is stored externally
198REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
199# revision changes files in a way that could affect copy tracing.
200REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
201REVIDX_DEFAULT_FLAGS = 0
202# stable order in which flags need to be processed and their processors applied
203REVIDX_FLAGS_ORDER = [
204    REVIDX_ISCENSORED,
205    REVIDX_ELLIPSIS,
206    REVIDX_EXTSTORED,
207    REVIDX_HASCOPIESINFO,
208]
209
210# bitmark for flags that could cause rawdata content change
211REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
212
213## chunk compression mode constants:
214# These constants are used in revlog version >=2 to denote the compression used
215# for a chunk.
216
217# Chunk use no compression, the data stored on disk can be directly use as
218# chunk value. Without any header information prefixed.
219COMP_MODE_PLAIN = 0
220
221# Chunk use the "default compression" for the revlog (usually defined in the
222# revlog docket). A header is still used.
223#
224# XXX: keeping a header is probably not useful and we should probably drop it.
225#
226# XXX: The value of allow mixed type of compression in the revlog is unclear
227#      and we should consider making PLAIN/DEFAULT the only available mode for
228#      revlog v2, disallowing INLINE mode.
229COMP_MODE_DEFAULT = 1
230
231# Chunk use a compression mode stored "inline" at the start of the chunk
232# itself.  This is the mode always used for revlog version "0" and "1"
233COMP_MODE_INLINE = revlogutils.COMP_MODE_INLINE
234
235SUPPORTED_FLAGS = {
236    REVLOGV0: REVLOGV0_FLAGS,
237    REVLOGV1: REVLOGV1_FLAGS,
238    REVLOGV2: REVLOGV2_FLAGS,
239    CHANGELOGV2: CHANGELOGV2_FLAGS,
240}
241
242_no = lambda flags: False
243_yes = lambda flags: True
244
245
246def _from_flag(flag):
247    return lambda flags: bool(flags & flag)
248
249
250FEATURES_BY_VERSION = {
251    REVLOGV0: {
252        b'inline': _no,
253        b'generaldelta': _no,
254        b'sidedata': False,
255        b'docket': False,
256    },
257    REVLOGV1: {
258        b'inline': _from_flag(FLAG_INLINE_DATA),
259        b'generaldelta': _from_flag(FLAG_GENERALDELTA),
260        b'sidedata': False,
261        b'docket': False,
262    },
263    REVLOGV2: {
264        # The point of inline-revlog is to reduce the number of files used in
265        # the store. Using a docket defeat this purpose. So we needs other
266        # means to reduce the number of files for revlogv2.
267        b'inline': _no,
268        b'generaldelta': _yes,
269        b'sidedata': True,
270        b'docket': True,
271    },
272    CHANGELOGV2: {
273        b'inline': _no,
274        # General delta is useless for changelog since we don't do any delta
275        b'generaldelta': _no,
276        b'sidedata': True,
277        b'docket': True,
278    },
279}
280
281
282SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
283