1# revlogdeltas.py - constant used for revlog logic. 2# 3# Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> 4# Copyright 2018 Octobus <contact@octobus.net> 5# 6# This software may be used and distributed according to the terms of the 7# GNU General Public License version 2 or any later version. 8"""Helper class to compute deltas stored inside revlogs""" 9 10from __future__ import absolute_import 11 12import struct 13 14from ..interfaces import repository 15from .. import revlogutils 16 17### Internal utily constants 18 19KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes 20KIND_MANIFESTLOG = 1002 21KIND_FILELOG = 1003 22KIND_OTHER = 1004 23 24ALL_KINDS = { 25 KIND_CHANGELOG, 26 KIND_MANIFESTLOG, 27 KIND_FILELOG, 28 KIND_OTHER, 29} 30 31### Index entry key 32# 33# 34# Internal details 35# ---------------- 36# 37# A large part of the revlog logic deals with revisions' "index entries", tuple 38# objects that contains the same "items" whatever the revlog version. 39# Different versions will have different ways of storing these items (sometimes 40# not having them at all), but the tuple will always be the same. New fields 41# are usually added at the end to avoid breaking existing code that relies 42# on the existing order. The field are defined as follows: 43 44# [0] offset: 45# The byte index of the start of revision data chunk. 46# That value is shifted up by 16 bits. use "offset = field >> 16" to 47# retrieve it. 48# 49# flags: 50# A flag field that carries special information or changes the behavior 51# of the revision. (see `REVIDX_*` constants for details) 52# The flag field only occupies the first 16 bits of this field, 53# use "flags = field & 0xFFFF" to retrieve the value. 54ENTRY_DATA_OFFSET = 0 55 56# [1] compressed length: 57# The size, in bytes, of the chunk on disk 58ENTRY_DATA_COMPRESSED_LENGTH = 1 59 60# [2] uncompressed length: 61# The size, in bytes, of the full revision once reconstructed. 62ENTRY_DATA_UNCOMPRESSED_LENGTH = 2 63 64# [3] base rev: 65# Either the base of the revision delta chain (without general 66# delta), or the base of the delta (stored in the data chunk) 67# with general delta. 68ENTRY_DELTA_BASE = 3 69 70# [4] link rev: 71# Changelog revision number of the changeset introducing this 72# revision. 73ENTRY_LINK_REV = 4 74 75# [5] parent 1 rev: 76# Revision number of the first parent 77ENTRY_PARENT_1 = 5 78 79# [6] parent 2 rev: 80# Revision number of the second parent 81ENTRY_PARENT_2 = 6 82 83# [7] node id: 84# The node id of the current revision 85ENTRY_NODE_ID = 7 86 87# [8] sidedata offset: 88# The byte index of the start of the revision's side-data chunk. 89ENTRY_SIDEDATA_OFFSET = 8 90 91# [9] sidedata chunk length: 92# The size, in bytes, of the revision's side-data chunk. 93ENTRY_SIDEDATA_COMPRESSED_LENGTH = 9 94 95# [10] data compression mode: 96# two bits that detail the way the data chunk is compressed on disk. 97# (see "COMP_MODE_*" constants for details). For revlog version 0 and 98# 1 this will always be COMP_MODE_INLINE. 99ENTRY_DATA_COMPRESSION_MODE = 10 100 101# [11] side-data compression mode: 102# two bits that detail the way the sidedata chunk is compressed on disk. 103# (see "COMP_MODE_*" constants for details) 104ENTRY_SIDEDATA_COMPRESSION_MODE = 11 105 106### main revlog header 107 108# We cannot rely on Struct.format is inconsistent for python <=3.6 versus above 109INDEX_HEADER_FMT = b">I" 110INDEX_HEADER = struct.Struct(INDEX_HEADER_FMT) 111 112## revlog version 113REVLOGV0 = 0 114REVLOGV1 = 1 115# Dummy value until file format is finalized. 116REVLOGV2 = 0xDEAD 117# Dummy value until file format is finalized. 118CHANGELOGV2 = 0xD34D 119 120## global revlog header flags 121# Shared across v1 and v2. 122FLAG_INLINE_DATA = 1 << 16 123# Only used by v1, implied by v2. 124FLAG_GENERALDELTA = 1 << 17 125REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA 126REVLOG_DEFAULT_FORMAT = REVLOGV1 127REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS 128REVLOGV0_FLAGS = 0 129REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA 130REVLOGV2_FLAGS = FLAG_INLINE_DATA 131CHANGELOGV2_FLAGS = 0 132 133### individual entry 134 135## index v0: 136# 4 bytes: offset 137# 4 bytes: compressed length 138# 4 bytes: base rev 139# 4 bytes: link rev 140# 20 bytes: parent 1 nodeid 141# 20 bytes: parent 2 nodeid 142# 20 bytes: nodeid 143INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s") 144 145## index v1 146# 6 bytes: offset 147# 2 bytes: flags 148# 4 bytes: compressed length 149# 4 bytes: uncompressed length 150# 4 bytes: base rev 151# 4 bytes: link rev 152# 4 bytes: parent 1 rev 153# 4 bytes: parent 2 rev 154# 32 bytes: nodeid 155INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x") 156assert INDEX_ENTRY_V1.size == 32 * 2 157 158# 6 bytes: offset 159# 2 bytes: flags 160# 4 bytes: compressed length 161# 4 bytes: uncompressed length 162# 4 bytes: base rev 163# 4 bytes: link rev 164# 4 bytes: parent 1 rev 165# 4 bytes: parent 2 rev 166# 32 bytes: nodeid 167# 8 bytes: sidedata offset 168# 4 bytes: sidedata compressed length 169# 1 bytes: compression mode (2 lower bit are data_compression_mode) 170# 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page) 171INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x") 172assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size 173 174# 6 bytes: offset 175# 2 bytes: flags 176# 4 bytes: compressed length 177# 4 bytes: uncompressed length 178# 4 bytes: parent 1 rev 179# 4 bytes: parent 2 rev 180# 32 bytes: nodeid 181# 8 bytes: sidedata offset 182# 4 bytes: sidedata compressed length 183# 1 bytes: compression mode (2 lower bit are data_compression_mode) 184# 27 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page) 185INDEX_ENTRY_CL_V2 = struct.Struct(b">Qiiii20s12xQiB27x") 186assert INDEX_ENTRY_CL_V2.size == 32 * 3, INDEX_ENTRY_V2.size 187 188# revlog index flags 189 190# For historical reasons, revlog's internal flags were exposed via the 191# wire protocol and are even exposed in parts of the storage APIs. 192 193# revision has censor metadata, must be verified 194REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED 195# revision hash does not match data (narrowhg) 196REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS 197# revision data is stored externally 198REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED 199# revision changes files in a way that could affect copy tracing. 200REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO 201REVIDX_DEFAULT_FLAGS = 0 202# stable order in which flags need to be processed and their processors applied 203REVIDX_FLAGS_ORDER = [ 204 REVIDX_ISCENSORED, 205 REVIDX_ELLIPSIS, 206 REVIDX_EXTSTORED, 207 REVIDX_HASCOPIESINFO, 208] 209 210# bitmark for flags that could cause rawdata content change 211REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED 212 213## chunk compression mode constants: 214# These constants are used in revlog version >=2 to denote the compression used 215# for a chunk. 216 217# Chunk use no compression, the data stored on disk can be directly use as 218# chunk value. Without any header information prefixed. 219COMP_MODE_PLAIN = 0 220 221# Chunk use the "default compression" for the revlog (usually defined in the 222# revlog docket). A header is still used. 223# 224# XXX: keeping a header is probably not useful and we should probably drop it. 225# 226# XXX: The value of allow mixed type of compression in the revlog is unclear 227# and we should consider making PLAIN/DEFAULT the only available mode for 228# revlog v2, disallowing INLINE mode. 229COMP_MODE_DEFAULT = 1 230 231# Chunk use a compression mode stored "inline" at the start of the chunk 232# itself. This is the mode always used for revlog version "0" and "1" 233COMP_MODE_INLINE = revlogutils.COMP_MODE_INLINE 234 235SUPPORTED_FLAGS = { 236 REVLOGV0: REVLOGV0_FLAGS, 237 REVLOGV1: REVLOGV1_FLAGS, 238 REVLOGV2: REVLOGV2_FLAGS, 239 CHANGELOGV2: CHANGELOGV2_FLAGS, 240} 241 242_no = lambda flags: False 243_yes = lambda flags: True 244 245 246def _from_flag(flag): 247 return lambda flags: bool(flags & flag) 248 249 250FEATURES_BY_VERSION = { 251 REVLOGV0: { 252 b'inline': _no, 253 b'generaldelta': _no, 254 b'sidedata': False, 255 b'docket': False, 256 }, 257 REVLOGV1: { 258 b'inline': _from_flag(FLAG_INLINE_DATA), 259 b'generaldelta': _from_flag(FLAG_GENERALDELTA), 260 b'sidedata': False, 261 b'docket': False, 262 }, 263 REVLOGV2: { 264 # The point of inline-revlog is to reduce the number of files used in 265 # the store. Using a docket defeat this purpose. So we needs other 266 # means to reduce the number of files for revlogv2. 267 b'inline': _no, 268 b'generaldelta': _yes, 269 b'sidedata': True, 270 b'docket': True, 271 }, 272 CHANGELOGV2: { 273 b'inline': _no, 274 # General delta is useless for changelog since we don't do any delta 275 b'generaldelta': _no, 276 b'sidedata': True, 277 b'docket': True, 278 }, 279} 280 281 282SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000 283