1# Code dedicated to an cache around obsolescence property 2# 3# This module content aims at being upstreamed. 4# 5# Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org> 6# 7# This software may be used and distributed according to the terms of the 8# GNU General Public License version 2 or any later version. 9 10import errno 11import hashlib 12import struct 13 14from mercurial import ( 15 localrepo, 16 obsolete, 17 phases, 18 node, 19 util, 20) 21 22from mercurial.utils.stringutil import forcebytestr 23 24from . import ( 25 compat, 26 exthelper, 27) 28 29eh = exthelper.exthelper() 30 31obsstorefilecache = localrepo.localrepository.obsstore 32 33# obsstore is a filecache so we have do to some spacial dancing 34@eh.wrapfunction(obsstorefilecache, 'func') 35def obsstorewithcache(orig, repo): 36 obsstore = orig(repo) 37 obsstore.obscache = obscache(repo.unfiltered()) 38 39 class cachekeyobsstore(obsstore.__class__): 40 41 _obskeysize = 200 42 43 def cachekey(self, index=None): 44 """return (current-length, cachekey) 45 46 'current-length': is the current length of the obsstore storage file, 47 'cachekey' is the hash of the last 200 bytes ending at 'index'. 48 49 if 'index' is unspecified, current obsstore length is used. 50 Cacheckey will be set to null id if the obstore is empty. 51 52 If the index specified is higher than the current obsstore file 53 length, cachekey will be set to None.""" 54 # default value 55 obsstoresize = 0 56 keydata = b'' 57 # try to get actual data from the obsstore 58 try: 59 with self.svfs(b'obsstore') as obsfile: 60 obsfile.seek(0, 2) 61 obsstoresize = obsfile.tell() 62 if index is None: 63 index = obsstoresize 64 elif obsstoresize < index: 65 return obsstoresize, None 66 actualsize = min(index, self._obskeysize) 67 if actualsize: 68 obsfile.seek(index - actualsize, 0) 69 keydata = obsfile.read(actualsize) 70 except (OSError, IOError) as e: 71 if e.errno != errno.ENOENT: 72 raise 73 if keydata: 74 key = hashlib.sha1(keydata).digest() 75 else: 76 # reusing an existing "empty" value make it easier to define a 77 # default cachekey for 'no data'. 78 key = node.nullid 79 return obsstoresize, key 80 81 obsstore.__class__ = cachekeyobsstore 82 83 return obsstore 84 85def markersfrom(obsstore, byteoffset, firstmarker): 86 if not firstmarker: 87 return list(obsstore) 88 elif r'_all' in vars(obsstore): 89 # if the data are in memory, just use that 90 return obsstore._all[firstmarker:] 91 else: 92 obsdata = obsstore.svfs.tryread(b'obsstore') 93 return obsolete._readmarkers(obsdata, byteoffset)[1] 94 95 96class dualsourcecache(object): 97 """An abstract class for cache that needs both changelog and obsstore 98 99 This class handle the tracking of changelog and obsstore update. It provide 100 data to performs incremental update (see the 'updatefrom' function for 101 details). This class can also detect stripping of the changelog or the 102 obsstore and can reset the cache in this cache (see the 'clear' function 103 for details). 104 """ 105 106 # default key used for an empty cache 107 # 108 # The cache key covering the changesets and obsmarkers content 109 # 110 # The cache key parts are: 111 # - tip-rev, 112 # - tip-node, 113 # - obsstore-length (nb markers), 114 # - obsstore-file-size (in bytes), 115 # - obsstore "cache key" 116 emptykey = (node.nullrev, node.nullid, 0, 0, node.nullid) 117 _cachename = None # used for error message 118 119 def __init__(self): 120 super(dualsourcecache, self).__init__() 121 self._cachekey = None 122 123 def _updatefrom(self, repo, revs, obsmarkers): 124 """override this method to update your cache data incrementally 125 126 revs: list of new revision in the changelog 127 obsmarker: list of new obsmarkers in the obsstore 128 """ 129 raise NotImplementedError 130 131 def clear(self, reset=False): 132 """invalidate the cache content 133 134 if 'reset' is passed, we detected a strip and the cache will have to be 135 recomputed. 136 """ 137 # /!\ IMPORTANT /!\ 138 # You must overide this method to actually 139 if reset: 140 self._cachekey = self.emptykey 141 else: 142 self._cachekey = None 143 144 def load(self, repo): 145 """Load data from disk 146 147 Do not forget to restore the "cachekey" attribute while doing so. 148 """ 149 raise NotImplementedError 150 151 # Useful public function (no need to override them) 152 153 def uptodate(self, repo): 154 """return True if the cache content is up to date False otherwise 155 156 This method can be used to detect of the cache is lagging behind new 157 data in either changelog or obsstore. 158 """ 159 if self._cachekey is None: 160 self.load(repo) 161 status = self._checkkey(repo.changelog, repo.obsstore) 162 return (status is not None 163 and status[0] == self._cachekey[0] # tiprev 164 and status[1] == self._cachekey[3]) # obssize 165 166 def update(self, repo): 167 """update the cache with new repository data 168 169 The update will be incremental when possible""" 170 repo = repo.unfiltered() 171 # If we do not have any data, try loading from disk 172 if self._cachekey is None: 173 self.load(repo) 174 175 assert repo.filtername is None 176 cl = repo.changelog 177 178 upgrade = self._upgradeneeded(repo) 179 if upgrade is None: 180 return 181 182 reset, revs, obsmarkers, obskeypair = upgrade 183 if reset or self._cachekey is None: 184 repo.ui.log(b'evoext-cache', b'strip detected, %s cache reset\n' % self._cachename) 185 self.clear(reset=True) 186 187 starttime = util.timer() 188 revs = list(revs) 189 obsmarkers = list(obsmarkers) 190 self._updatefrom(repo, revs, obsmarkers) 191 duration = util.timer() - starttime 192 repo.ui.log(b'evoext-cache', b'updated %s in %.4f seconds (%dr, %do)\n', 193 self._cachename, duration, len(revs), len(obsmarkers)) 194 195 # update the key from the new data 196 key = list(self._cachekey) 197 if revs: 198 key[0] = cl.tiprev() 199 key[1] = cl.node(key[0]) 200 if obsmarkers: 201 key[2] += len(obsmarkers) 202 key[3], key[4] = obskeypair 203 self._cachekey = tuple(key) 204 205 # from here, there are internal function only 206 207 def _checkkey(self, changelog, obsstore): 208 """internal function""" 209 key = self._cachekey 210 if key is None: 211 return None 212 213 ### Is the cache valid ? 214 keytiprev, keytipnode, keyobslength, keyobssize, keyobskey = key 215 # check for changelog strip 216 tiprev = changelog.tiprev() 217 if (tiprev < keytiprev 218 or changelog.node(keytiprev) != keytipnode): 219 return None 220 # check for obsstore strip 221 obssize, obskey = obsstore.cachekey(index=keyobssize) 222 if obskey != keyobskey: 223 return None 224 if obssize != keyobssize: 225 # we want to return the obskey for the new size 226 __, obskey = obsstore.cachekey(index=obssize) 227 return tiprev, obssize, obskey 228 229 def _upgradeneeded(self, repo): 230 """return (valid, start-rev, start-obs-idx) 231 232 'valid': is "False" if older cache value needs invalidation, 233 234 'start-rev': first revision not in the cache. None if cache is up to date, 235 236 'start-obs-idx': index of the first obs-markers not in the cache. None is 237 up to date. 238 """ 239 240 # We need to ensure we use the same changelog and obsstore through the 241 # processing. Otherwise some invalidation could update the object and their 242 # content after we computed the cache key. 243 cl = repo.changelog 244 obsstore = repo.obsstore 245 key = self._cachekey 246 247 reset = False 248 249 status = self._checkkey(cl, obsstore) 250 if status is None: 251 reset = True 252 key = self.emptykey 253 obssize, obskey = obsstore.cachekey() 254 tiprev = cl.tiprev() 255 else: 256 tiprev, obssize, obskey = status 257 258 keytiprev, keytipnode, keyobslength, keyobssize, keyobskey = key 259 260 if not reset and keytiprev == tiprev and keyobssize == obssize: 261 return None # nothing to upgrade 262 263 ### cache is valid, is there anything to update 264 265 # any new changesets ? 266 revs = () 267 if keytiprev < tiprev: 268 revs = list(cl.revs(start=keytiprev + 1, stop=tiprev)) 269 270 # any new markers 271 markers = () 272 if keyobssize < obssize: 273 # XXX Three are a small race change here. Since the obsstore might have 274 # move forward between the time we computed the cache key and we access 275 # the data. To fix this we need so "up to" argument when fetching the 276 # markers here. Otherwise we might return more markers than covered by 277 # the cache key. 278 # 279 # In pratice the cache is only updated after each transaction within a 280 # lock. So we should be fine. We could enforce this with a new repository 281 # requirement (or fix the race, that is not too hard). 282 markers = markersfrom(obsstore, keyobssize, keyobslength) 283 284 return reset, revs, markers, (obssize, obskey) 285 286class obscache(dualsourcecache): 287 """cache the "does a rev" is the precursors of some obsmarkers data 288 289 This is not directly holding the "is this revision obsolete" information, 290 because phases data gets into play here. However, it allow to compute the 291 "obsolescence" set without reading the obsstore content. 292 293 Implementation note #1: 294 295 The obsstore is implementing only half of the transaction logic it 296 should. It properly record the starting point of the obsstore to allow 297 clean rollback. However it still write to the obsstore file directly 298 during the transaction. Instead it should be keeping data in memory and 299 write to a '.pending' file to make the data vailable for hooks. 300 301 This cache is not going futher than what the obstore is doing, so it does 302 not has any '.pending' logic. When the obsstore gains proper '.pending' 303 support, adding it to this cache should not be too hard. As the flag 304 always move from 0 to 1, we could have a second '.pending' cache file to 305 be read. If flag is set in any of them, the value is 1. For the same 306 reason, updating the file in place should be possible. 307 308 Implementation note #2: 309 310 Instead of having a large final update run, we could update this cache at 311 the level adding a new changeset or a new obsmarkers. More on this in the 312 'update code'. 313 314 Implementation note #3: 315 316 Storage-wise, we could have a "start rev" to avoid storing useless 317 zero. That would be especially useful for the '.pending' overlay. 318 """ 319 320 _filepath = b'evoext-obscache-00' 321 _headerformat = b'>q20sQQ20s' 322 323 _cachename = b'evo-ext-obscache' # used for error message 324 325 def __init__(self, repo): 326 super(obscache, self).__init__() 327 self._ondiskkey = None 328 329 @util.propertycache 330 def get(self): 331 """final signature: obscache.get(rev) 332 333 return True if "rev" is used as "precursors for any obsmarkers 334 335 IMPORTANT: make sure the cache has been updated to match the repository 336 content before using it 337 338 We use a property cache to skip the attribute resolution overhead in 339 hot loops.""" 340 return self._data.__getitem__ 341 342 def _setdata(self, data): 343 """set a new bytearray data, invalidating the 'get' shortcut if needed""" 344 self._data = data 345 if r'get' in vars(self): 346 del self.get 347 348 def clear(self, reset=False): 349 """invalidate the cache content""" 350 super(obscache, self).clear(reset=reset) 351 self._setdata(bytearray()) 352 353 def _updatefrom(self, repo, revs, obsmarkers): 354 if revs: 355 self._updaterevs(repo, revs) 356 if obsmarkers: 357 self._updatemarkers(repo, obsmarkers) 358 359 def _updaterevs(self, repo, revs): 360 """update the cache with new revisions 361 362 Newly added changeset might be affected by obsolescence markers 363 we already have locally. So we needs to have some global 364 knowledge about the markers to handle that question. 365 366 Right now this requires parsing all markers in the obsstore. We could 367 imagine using various optimisation (eg: another cache, network 368 exchange, etc). 369 370 A possible approach to this is to build a set of all node used as 371 precursors in `obsstore._obscandidate`. If markers are not loaded yet, 372 we could initialize it by doing a quick scan through the obsstore data 373 and filling a (pre-sized) set. Doing so would be much faster than 374 parsing all the obsmarkers since we would access less data, not create 375 any object beside the nodes and not have to decode any complex data. 376 377 For now we stick to the simpler approach of paying the 378 performance cost on new changesets. 379 """ 380 new_entries = bytearray(len(revs)) 381 if not self._data: 382 self._setdata(new_entries) 383 else: 384 self._data.extend(new_entries) 385 data = self._data 386 if repo.obsstore: 387 node = repo.changelog.node 388 succs = repo.obsstore.successors 389 for r in revs: 390 if node(r) in succs: 391 data[r] = 1 392 393 def _updatemarkers(self, repo, obsmarkers): 394 """update the cache with new markers""" 395 getrev = compat.getgetrev(repo.changelog) 396 for m in obsmarkers: 397 r = getrev(m[0]) 398 if r is not None: 399 self._data[r] = 1 400 401 def save(self, repo): 402 """save the data to disk""" 403 404 # XXX it happens that the obsstore is (buggilly) always up to date on disk 405 if self._cachekey is None or self._cachekey == self._ondiskkey: 406 return 407 408 try: 409 cachefile = repo.cachevfs(self._filepath, b'w', atomictemp=True) 410 headerdata = struct.pack(self._headerformat, *self._cachekey) 411 cachefile.write(headerdata) 412 cachefile.write(self._data) 413 cachefile.close() 414 self._ondiskkey = self._cachekey 415 except (IOError, OSError) as exc: 416 repo.ui.log(b'obscache', b'could not write update %s\n' % forcebytestr(exc)) 417 repo.ui.debug(b'obscache: could not write update %s\n' % forcebytestr(exc)) 418 419 def load(self, repo): 420 """load data from disk""" 421 assert repo.filtername is None 422 423 data = repo.cachevfs.tryread(self._filepath) 424 if not data: 425 self._cachekey = self.emptykey 426 self._setdata(bytearray()) 427 else: 428 headersize = struct.calcsize(self._headerformat) 429 self._cachekey = struct.unpack(self._headerformat, data[:headersize]) 430 self._setdata(bytearray(data[headersize:])) 431 self._ondiskkey = self._cachekey 432 433def _computeobsoleteset(orig, repo): 434 """the set of obsolete revisions""" 435 obs = set() 436 repo = repo.unfiltered() 437 notpublic = repo._phasecache.getrevset(repo, (phases.draft, phases.secret)) 438 if notpublic: 439 obscache = repo.obsstore.obscache 440 # Since we warm the cache at the end of every transaction, the cache 441 # should be up to date. However a non-enabled client might have touched 442 # the repository. 443 # 444 # Updating the cache without a lock is sloppy, so we fallback to the 445 # old method and rely on the fact the next transaction will write the 446 # cache down anyway. 447 # 448 # With the current implementation updating the cache will requires to 449 # load the obsstore anyway. Once loaded, hitting the obsstore directly 450 # will be about as fast... 451 if not obscache.uptodate(repo): 452 if repo.currenttransaction() is None: 453 repo.ui.log(b'evoext-cache', 454 b'obscache is out of date, ' 455 b'falling back to slower obsstore version\n') 456 repo.ui.debug(b'obscache is out of date\n') 457 return orig(repo) 458 else: 459 # If a transaction is open, it is worthwhile to update and use 460 # the cache, the lock prevent race and it will be written on 461 # disk when the transaction close. 462 obscache.update(repo) 463 isobs = obscache.get 464 for r in notpublic: 465 if isobs(r): 466 obs.add(r) 467 return obs 468 469@eh.uisetup 470def cachefuncs(ui): 471 orig = obsolete.cachefuncs[b'obsolete'] 472 wrapped = lambda repo: _computeobsoleteset(orig, repo) 473 obsolete.cachefuncs[b'obsolete'] = wrapped 474 475@eh.reposetup 476def setupcache(ui, repo): 477 478 class obscacherepo(repo.__class__): 479 480 @localrepo.unfilteredmethod 481 def destroyed(self): 482 if r'obsstore' in vars(self): 483 self.obsstore.obscache.clear() 484 super(obscacherepo, self).destroyed() 485 486 @localrepo.unfilteredmethod 487 def updatecaches(self, tr=None, **kwargs): 488 super(obscacherepo, self).updatecaches(tr, **kwargs) 489 self.obsstore.obscache.update(self) 490 self.obsstore.obscache.save(self) 491 492 repo.__class__ = obscacherepo 493