1# storageutil.py - Storage functionality agnostic of backend implementation. 2# 3# Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com> 4# 5# This software may be used and distributed according to the terms of the 6# GNU General Public License version 2 or any later version. 7 8from __future__ import absolute_import 9 10import re 11import struct 12 13from ..i18n import _ 14from ..node import ( 15 bin, 16 nullrev, 17 sha1nodeconstants, 18) 19from .. import ( 20 dagop, 21 error, 22 mdiff, 23 pycompat, 24) 25from ..interfaces import repository 26from ..revlogutils import sidedata as sidedatamod 27from ..utils import hashutil 28 29_nullhash = hashutil.sha1(sha1nodeconstants.nullid) 30 31# revision data contains extra metadata not part of the official digest 32# Only used in changegroup >= v4. 33CG_FLAG_SIDEDATA = 1 34 35 36def hashrevisionsha1(text, p1, p2): 37 """Compute the SHA-1 for revision data and its parents. 38 39 This hash combines both the current file contents and its history 40 in a manner that makes it easy to distinguish nodes with the same 41 content in the revision graph. 42 """ 43 # As of now, if one of the parent node is null, p2 is null 44 if p2 == sha1nodeconstants.nullid: 45 # deep copy of a hash is faster than creating one 46 s = _nullhash.copy() 47 s.update(p1) 48 else: 49 # none of the parent nodes are nullid 50 if p1 < p2: 51 a = p1 52 b = p2 53 else: 54 a = p2 55 b = p1 56 s = hashutil.sha1(a) 57 s.update(b) 58 s.update(text) 59 return s.digest() 60 61 62METADATA_RE = re.compile(b'\x01\n') 63 64 65def parsemeta(text): 66 """Parse metadata header from revision data. 67 68 Returns a 2-tuple of (metadata, offset), where both can be None if there 69 is no metadata. 70 """ 71 # text can be buffer, so we can't use .startswith or .index 72 if text[:2] != b'\x01\n': 73 return None, None 74 s = METADATA_RE.search(text, 2).start() 75 mtext = text[2:s] 76 meta = {} 77 for l in mtext.splitlines(): 78 k, v = l.split(b': ', 1) 79 meta[k] = v 80 return meta, s + 2 81 82 83def packmeta(meta, text): 84 """Add metadata to fulltext to produce revision text.""" 85 keys = sorted(meta) 86 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys) 87 return b'\x01\n%s\x01\n%s' % (metatext, text) 88 89 90def iscensoredtext(text): 91 meta = parsemeta(text)[0] 92 return meta and b'censored' in meta 93 94 95def filtermetadata(text): 96 """Extract just the revision data from source text. 97 98 Returns ``text`` unless it has a metadata header, in which case we return 99 a new buffer without hte metadata. 100 """ 101 if not text.startswith(b'\x01\n'): 102 return text 103 104 offset = text.index(b'\x01\n', 2) 105 return text[offset + 2 :] 106 107 108def filerevisioncopied(store, node): 109 """Resolve file revision copy metadata. 110 111 Returns ``False`` if the file has no copy metadata. Otherwise a 112 2-tuple of the source filename and node. 113 """ 114 if store.parents(node)[0] != sha1nodeconstants.nullid: 115 return False 116 117 meta = parsemeta(store.revision(node))[0] 118 119 # copy and copyrev occur in pairs. In rare cases due to old bugs, 120 # one can occur without the other. So ensure both are present to flag 121 # as a copy. 122 if meta and b'copy' in meta and b'copyrev' in meta: 123 return meta[b'copy'], bin(meta[b'copyrev']) 124 125 return False 126 127 128def filedataequivalent(store, node, filedata): 129 """Determines whether file data is equivalent to a stored node. 130 131 Returns True if the passed file data would hash to the same value 132 as a stored revision and False otherwise. 133 134 When a stored revision is censored, filedata must be empty to have 135 equivalence. 136 137 When a stored revision has copy metadata, it is ignored as part 138 of the compare. 139 """ 140 141 if filedata.startswith(b'\x01\n'): 142 revisiontext = b'\x01\n\x01\n' + filedata 143 else: 144 revisiontext = filedata 145 146 p1, p2 = store.parents(node) 147 148 computednode = hashrevisionsha1(revisiontext, p1, p2) 149 150 if computednode == node: 151 return True 152 153 # Censored files compare against the empty file. 154 if store.iscensored(store.rev(node)): 155 return filedata == b'' 156 157 # Renaming a file produces a different hash, even if the data 158 # remains unchanged. Check if that's the case. 159 if store.renamed(node): 160 return store.read(node) == filedata 161 162 return False 163 164 165def iterrevs(storelen, start=0, stop=None): 166 """Iterate over revision numbers in a store.""" 167 step = 1 168 169 if stop is not None: 170 if start > stop: 171 step = -1 172 stop += step 173 if stop > storelen: 174 stop = storelen 175 else: 176 stop = storelen 177 178 return pycompat.xrange(start, stop, step) 179 180 181def fileidlookup(store, fileid, identifier): 182 """Resolve the file node for a value. 183 184 ``store`` is an object implementing the ``ifileindex`` interface. 185 186 ``fileid`` can be: 187 188 * A 20 or 32 byte binary node. 189 * An integer revision number 190 * A 40 or 64 byte hex node. 191 * A bytes that can be parsed as an integer representing a revision number. 192 193 ``identifier`` is used to populate ``error.LookupError`` with an identifier 194 for the store. 195 196 Raises ``error.LookupError`` on failure. 197 """ 198 if isinstance(fileid, int): 199 try: 200 return store.node(fileid) 201 except IndexError: 202 raise error.LookupError( 203 b'%d' % fileid, identifier, _(b'no match found') 204 ) 205 206 if len(fileid) in (20, 32): 207 try: 208 store.rev(fileid) 209 return fileid 210 except error.LookupError: 211 pass 212 213 if len(fileid) in (40, 64): 214 try: 215 rawnode = bin(fileid) 216 store.rev(rawnode) 217 return rawnode 218 except TypeError: 219 pass 220 221 try: 222 rev = int(fileid) 223 224 if b'%d' % rev != fileid: 225 raise ValueError 226 227 try: 228 return store.node(rev) 229 except (IndexError, TypeError): 230 pass 231 except (ValueError, OverflowError): 232 pass 233 234 raise error.LookupError(fileid, identifier, _(b'no match found')) 235 236 237def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn): 238 """Resolve information needed to strip revisions. 239 240 Finds the minimum revision number that must be stripped in order to 241 strip ``minlinkrev``. 242 243 Returns a 2-tuple of the minimum revision number to do that and a set 244 of all revision numbers that have linkrevs that would be broken 245 by that strip. 246 247 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``. 248 ``headrevs`` is an iterable of head revisions. 249 ``linkrevfn`` is a callable that receives a revision and returns a linked 250 revision. 251 ``parentrevsfn`` is a callable that receives a revision number and returns 252 an iterable of its parent revision numbers. 253 """ 254 brokenrevs = set() 255 strippoint = tiprev + 1 256 257 heads = {} 258 futurelargelinkrevs = set() 259 for head in headrevs: 260 headlinkrev = linkrevfn(head) 261 heads[head] = headlinkrev 262 if headlinkrev >= minlinkrev: 263 futurelargelinkrevs.add(headlinkrev) 264 265 # This algorithm involves walking down the rev graph, starting at the 266 # heads. Since the revs are topologically sorted according to linkrev, 267 # once all head linkrevs are below the minlink, we know there are 268 # no more revs that could have a linkrev greater than minlink. 269 # So we can stop walking. 270 while futurelargelinkrevs: 271 strippoint -= 1 272 linkrev = heads.pop(strippoint) 273 274 if linkrev < minlinkrev: 275 brokenrevs.add(strippoint) 276 else: 277 futurelargelinkrevs.remove(linkrev) 278 279 for p in parentrevsfn(strippoint): 280 if p != nullrev: 281 plinkrev = linkrevfn(p) 282 heads[p] = plinkrev 283 if plinkrev >= minlinkrev: 284 futurelargelinkrevs.add(plinkrev) 285 286 return strippoint, brokenrevs 287 288 289def emitrevisions( 290 store, 291 nodes, 292 nodesorder, 293 resultcls, 294 deltaparentfn=None, 295 candeltafn=None, 296 rawsizefn=None, 297 revdifffn=None, 298 flagsfn=None, 299 deltamode=repository.CG_DELTAMODE_STD, 300 revisiondata=False, 301 assumehaveparentrevisions=False, 302 sidedata_helpers=None, 303): 304 """Generic implementation of ifiledata.emitrevisions(). 305 306 Emitting revision data is subtly complex. This function attempts to 307 encapsulate all the logic for doing so in a backend-agnostic way. 308 309 ``store`` 310 Object conforming to ``ifilestorage`` interface. 311 312 ``nodes`` 313 List of revision nodes whose data to emit. 314 315 ``resultcls`` 316 A type implementing the ``irevisiondelta`` interface that will be 317 constructed and returned. 318 319 ``deltaparentfn`` (optional) 320 Callable receiving a revision number and returning the revision number 321 of a revision that the internal delta is stored against. This delta 322 will be preferred over computing a new arbitrary delta. 323 324 If not defined, a delta will always be computed from raw revision 325 data. 326 327 ``candeltafn`` (optional) 328 Callable receiving a pair of revision numbers that returns a bool 329 indicating whether a delta between them can be produced. 330 331 If not defined, it is assumed that any two revisions can delta with 332 each other. 333 334 ``rawsizefn`` (optional) 335 Callable receiving a revision number and returning the length of the 336 ``store.rawdata(rev)``. 337 338 If not defined, ``len(store.rawdata(rev))`` will be called. 339 340 ``revdifffn`` (optional) 341 Callable receiving a pair of revision numbers that returns a delta 342 between them. 343 344 If not defined, a delta will be computed by invoking mdiff code 345 on ``store.revision()`` results. 346 347 Defining this function allows a precomputed or stored delta to be 348 used without having to compute on. 349 350 ``flagsfn`` (optional) 351 Callable receiving a revision number and returns the integer flags 352 value for it. If not defined, flags value will be 0. 353 354 ``deltamode`` 355 constaint on delta to be sent: 356 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas, 357 * CG_DELTAMODE_PREV - only delta against "prev", 358 * CG_DELTAMODE_FULL - only issue full snapshot. 359 360 Whether to send fulltext revisions instead of deltas, if allowed. 361 362 ``nodesorder`` 363 ``revisiondata`` 364 ``assumehaveparentrevisions`` 365 ``sidedata_helpers`` (optional) 366 If not None, means that sidedata should be included. 367 See `revlogutil.sidedata.get_sidedata_helpers`. 368 """ 369 370 fnode = store.node 371 frev = store.rev 372 373 if nodesorder == b'nodes': 374 revs = [frev(n) for n in nodes] 375 elif nodesorder == b'linear': 376 revs = {frev(n) for n in nodes} 377 revs = dagop.linearize(revs, store.parentrevs) 378 else: # storage and default 379 revs = sorted(frev(n) for n in nodes) 380 381 prevrev = None 382 383 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions: 384 prevrev = store.parentrevs(revs[0])[0] 385 386 # Set of revs available to delta against. 387 available = set() 388 389 for rev in revs: 390 if rev == nullrev: 391 continue 392 393 node = fnode(rev) 394 p1rev, p2rev = store.parentrevs(rev) 395 396 if deltaparentfn: 397 deltaparentrev = deltaparentfn(rev) 398 else: 399 deltaparentrev = nullrev 400 401 # Forced delta against previous mode. 402 if deltamode == repository.CG_DELTAMODE_PREV: 403 baserev = prevrev 404 405 # We're instructed to send fulltext. Honor that. 406 elif deltamode == repository.CG_DELTAMODE_FULL: 407 baserev = nullrev 408 # We're instructed to use p1. Honor that 409 elif deltamode == repository.CG_DELTAMODE_P1: 410 baserev = p1rev 411 412 # There is a delta in storage. We try to use that because it 413 # amounts to effectively copying data from storage and is 414 # therefore the fastest. 415 elif deltaparentrev != nullrev: 416 # Base revision was already emitted in this group. We can 417 # always safely use the delta. 418 if deltaparentrev in available: 419 baserev = deltaparentrev 420 421 # Base revision is a parent that hasn't been emitted already. 422 # Use it if we can assume the receiver has the parent revision. 423 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev): 424 baserev = deltaparentrev 425 426 # No guarantee the receiver has the delta parent. Send delta 427 # against last revision (if possible), which in the common case 428 # should be similar enough to this revision that the delta is 429 # reasonable. 430 elif prevrev is not None: 431 baserev = prevrev 432 else: 433 baserev = nullrev 434 435 # Storage has a fulltext revision. 436 437 # Let's use the previous revision, which is as good a guess as any. 438 # There is definitely room to improve this logic. 439 elif prevrev is not None: 440 baserev = prevrev 441 else: 442 baserev = nullrev 443 444 # But we can't actually use our chosen delta base for whatever 445 # reason. Reset to fulltext. 446 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)): 447 baserev = nullrev 448 449 revision = None 450 delta = None 451 baserevisionsize = None 452 453 if revisiondata: 454 if store.iscensored(baserev) or store.iscensored(rev): 455 try: 456 revision = store.rawdata(node) 457 except error.CensoredNodeError as e: 458 revision = e.tombstone 459 460 if baserev != nullrev: 461 if rawsizefn: 462 baserevisionsize = rawsizefn(baserev) 463 else: 464 baserevisionsize = len(store.rawdata(baserev)) 465 466 elif ( 467 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV 468 ): 469 revision = store.rawdata(node) 470 available.add(rev) 471 else: 472 if revdifffn: 473 delta = revdifffn(baserev, rev) 474 else: 475 delta = mdiff.textdiff( 476 store.rawdata(baserev), store.rawdata(rev) 477 ) 478 479 available.add(rev) 480 481 serialized_sidedata = None 482 sidedata_flags = (0, 0) 483 if sidedata_helpers: 484 try: 485 old_sidedata = store.sidedata(rev) 486 except error.CensoredNodeError: 487 # skip any potential sidedata of the censored revision 488 sidedata = {} 489 else: 490 sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers( 491 store=store, 492 sidedata_helpers=sidedata_helpers, 493 sidedata=old_sidedata, 494 rev=rev, 495 ) 496 if sidedata: 497 serialized_sidedata = sidedatamod.serialize_sidedata(sidedata) 498 499 flags = flagsfn(rev) if flagsfn else 0 500 protocol_flags = 0 501 if serialized_sidedata: 502 # Advertise that sidedata exists to the other side 503 protocol_flags |= CG_FLAG_SIDEDATA 504 # Computers and removers can return flags to add and/or remove 505 flags = flags | sidedata_flags[0] & ~sidedata_flags[1] 506 507 yield resultcls( 508 node=node, 509 p1node=fnode(p1rev), 510 p2node=fnode(p2rev), 511 basenode=fnode(baserev), 512 flags=flags, 513 baserevisionsize=baserevisionsize, 514 revision=revision, 515 delta=delta, 516 sidedata=serialized_sidedata, 517 protocol_flags=protocol_flags, 518 ) 519 520 prevrev = rev 521 522 523def deltaiscensored(delta, baserev, baselenfn): 524 """Determine if a delta represents censored revision data. 525 526 ``baserev`` is the base revision this delta is encoded against. 527 ``baselenfn`` is a callable receiving a revision number that resolves the 528 length of the revision fulltext. 529 530 Returns a bool indicating if the result of the delta represents a censored 531 revision. 532 """ 533 # Fragile heuristic: unless new file meta keys are added alphabetically 534 # preceding "censored", all censored revisions are prefixed by 535 # "\1\ncensored:". A delta producing such a censored revision must be a 536 # full-replacement delta, so we inspect the first and only patch in the 537 # delta for this prefix. 538 hlen = struct.calcsize(b">lll") 539 if len(delta) <= hlen: 540 return False 541 542 oldlen = baselenfn(baserev) 543 newlen = len(delta) - hlen 544 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen): 545 return False 546 547 add = b"\1\ncensored:" 548 addlen = len(add) 549 return newlen >= addlen and delta[hlen : hlen + addlen] == add 550