1# __init__.py - remotefilelog extension
2#
3# Copyright 2013 Facebook, Inc.
4#
5# This software may be used and distributed according to the terms of the
6# GNU General Public License version 2 or any later version.
7"""remotefilelog causes Mercurial to lazilly fetch file contents (EXPERIMENTAL)
8
9This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
10GUARANTEES. This means that repositories created with this extension may
11only be usable with the exact version of this extension/Mercurial that was
12used. The extension attempts to enforce this in order to prevent repository
13corruption.
14
15remotefilelog works by fetching file contents lazily and storing them
16in a cache on the client rather than in revlogs. This allows enormous
17histories to be transferred only partially, making them easier to
18operate on.
19
20Configs:
21
22    ``packs.maxchainlen`` specifies the maximum delta chain length in pack files
23
24    ``packs.maxpacksize`` specifies the maximum pack file size
25
26    ``packs.maxpackfilecount`` specifies the maximum number of packs in the
27      shared cache (trees only for now)
28
29    ``remotefilelog.backgroundprefetch`` runs prefetch in background when True
30
31    ``remotefilelog.bgprefetchrevs`` specifies revisions to fetch on commit and
32      update, and on other commands that use them. Different from pullprefetch.
33
34    ``remotefilelog.gcrepack`` does garbage collection during repack when True
35
36    ``remotefilelog.nodettl`` specifies maximum TTL of a node in seconds before
37      it is garbage collected
38
39    ``remotefilelog.repackonhggc`` runs repack on hg gc when True
40
41    ``remotefilelog.prefetchdays`` specifies the maximum age of a commit in
42      days after which it is no longer prefetched.
43
44    ``remotefilelog.prefetchdelay`` specifies delay between background
45      prefetches in seconds after operations that change the working copy parent
46
47    ``remotefilelog.data.gencountlimit`` constraints the minimum number of data
48      pack files required to be considered part of a generation. In particular,
49      minimum number of packs files > gencountlimit.
50
51    ``remotefilelog.data.generations`` list for specifying the lower bound of
52      each generation of the data pack files. For example, list ['100MB','1MB']
53      or ['1MB', '100MB'] will lead to three generations: [0, 1MB), [
54      1MB, 100MB) and [100MB, infinity).
55
56    ``remotefilelog.data.maxrepackpacks`` the maximum number of pack files to
57      include in an incremental data repack.
58
59    ``remotefilelog.data.repackmaxpacksize`` the maximum size of a pack file for
60      it to be considered for an incremental data repack.
61
62    ``remotefilelog.data.repacksizelimit`` the maximum total size of pack files
63      to include in an incremental data repack.
64
65    ``remotefilelog.history.gencountlimit`` constraints the minimum number of
66      history pack files required to be considered part of a generation. In
67      particular, minimum number of packs files > gencountlimit.
68
69    ``remotefilelog.history.generations`` list for specifying the lower bound of
70      each generation of the history pack files. For example, list [
71      '100MB', '1MB'] or ['1MB', '100MB'] will lead to three generations: [
72      0, 1MB), [1MB, 100MB) and [100MB, infinity).
73
74    ``remotefilelog.history.maxrepackpacks`` the maximum number of pack files to
75      include in an incremental history repack.
76
77    ``remotefilelog.history.repackmaxpacksize`` the maximum size of a pack file
78      for it to be considered for an incremental history repack.
79
80    ``remotefilelog.history.repacksizelimit`` the maximum total size of pack
81      files to include in an incremental history repack.
82
83    ``remotefilelog.backgroundrepack`` automatically consolidate packs in the
84      background
85
86    ``remotefilelog.cachepath`` path to cache
87
88    ``remotefilelog.cachegroup`` if set, make cache directory sgid to this
89      group
90
91    ``remotefilelog.cacheprocess`` binary to invoke for fetching file data
92
93    ``remotefilelog.debug`` turn on remotefilelog-specific debug output
94
95    ``remotefilelog.excludepattern`` pattern of files to exclude from pulls
96
97    ``remotefilelog.includepattern`` pattern of files to include in pulls
98
99    ``remotefilelog.fetchwarning``: message to print when too many
100      single-file fetches occur
101
102    ``remotefilelog.getfilesstep`` number of files to request in a single RPC
103
104    ``remotefilelog.getfilestype`` if set to 'threaded' use threads to fetch
105      files, otherwise use optimistic fetching
106
107    ``remotefilelog.pullprefetch`` revset for selecting files that should be
108      eagerly downloaded rather than lazily
109
110    ``remotefilelog.reponame`` name of the repo. If set, used to partition
111      data from other repos in a shared store.
112
113    ``remotefilelog.server`` if true, enable server-side functionality
114
115    ``remotefilelog.servercachepath`` path for caching blobs on the server
116
117    ``remotefilelog.serverexpiration`` number of days to keep cached server
118      blobs
119
120    ``remotefilelog.validatecache`` if set, check cache entries for corruption
121      before returning blobs
122
123    ``remotefilelog.validatecachelog`` if set, check cache entries for
124      corruption before returning metadata
125
126"""
127from __future__ import absolute_import
128
129import os
130import time
131import traceback
132
133from mercurial.node import (
134    hex,
135    wdirrev,
136)
137from mercurial.i18n import _
138from mercurial.pycompat import open
139from mercurial import (
140    changegroup,
141    changelog,
142    commands,
143    configitems,
144    context,
145    copies,
146    debugcommands as hgdebugcommands,
147    dispatch,
148    error,
149    exchange,
150    extensions,
151    hg,
152    localrepo,
153    match as matchmod,
154    merge,
155    mergestate as mergestatemod,
156    patch,
157    pycompat,
158    registrar,
159    repair,
160    repoview,
161    revset,
162    scmutil,
163    smartset,
164    streamclone,
165    util,
166)
167from . import (
168    constants,
169    debugcommands,
170    fileserverclient,
171    remotefilectx,
172    remotefilelog,
173    remotefilelogserver,
174    repack as repackmod,
175    shallowbundle,
176    shallowrepo,
177    shallowstore,
178    shallowutil,
179    shallowverifier,
180)
181
182# ensures debug commands are registered
183hgdebugcommands.command
184
185cmdtable = {}
186command = registrar.command(cmdtable)
187
188configtable = {}
189configitem = registrar.configitem(configtable)
190
191configitem(b'remotefilelog', b'debug', default=False)
192
193configitem(b'remotefilelog', b'reponame', default=b'')
194configitem(b'remotefilelog', b'cachepath', default=None)
195configitem(b'remotefilelog', b'cachegroup', default=None)
196configitem(b'remotefilelog', b'cacheprocess', default=None)
197configitem(b'remotefilelog', b'cacheprocess.includepath', default=None)
198configitem(b"remotefilelog", b"cachelimit", default=b"1000 GB")
199
200configitem(
201    b'remotefilelog',
202    b'fallbackpath',
203    default=configitems.dynamicdefault,
204    alias=[(b'remotefilelog', b'fallbackrepo')],
205)
206
207configitem(b'remotefilelog', b'validatecachelog', default=None)
208configitem(b'remotefilelog', b'validatecache', default=b'on')
209configitem(b'remotefilelog', b'server', default=None)
210configitem(b'remotefilelog', b'servercachepath', default=None)
211configitem(b"remotefilelog", b"serverexpiration", default=30)
212configitem(b'remotefilelog', b'backgroundrepack', default=False)
213configitem(b'remotefilelog', b'bgprefetchrevs', default=None)
214configitem(b'remotefilelog', b'pullprefetch', default=None)
215configitem(b'remotefilelog', b'backgroundprefetch', default=False)
216configitem(b'remotefilelog', b'prefetchdelay', default=120)
217configitem(b'remotefilelog', b'prefetchdays', default=14)
218# Other values include 'local' or 'none'. Any unrecognized value is 'all'.
219configitem(b'remotefilelog', b'strip.includefiles', default='all')
220
221configitem(b'remotefilelog', b'getfilesstep', default=10000)
222configitem(b'remotefilelog', b'getfilestype', default=b'optimistic')
223configitem(b'remotefilelog', b'batchsize', configitems.dynamicdefault)
224configitem(b'remotefilelog', b'fetchwarning', default=b'')
225
226configitem(b'remotefilelog', b'includepattern', default=None)
227configitem(b'remotefilelog', b'excludepattern', default=None)
228
229configitem(b'remotefilelog', b'gcrepack', default=False)
230configitem(b'remotefilelog', b'repackonhggc', default=False)
231configitem(b'repack', b'chainorphansbysize', default=True, experimental=True)
232
233configitem(b'packs', b'maxpacksize', default=0)
234configitem(b'packs', b'maxchainlen', default=1000)
235
236configitem(b'devel', b'remotefilelog.bg-wait', default=False)
237
238#  default TTL limit is 30 days
239_defaultlimit = 60 * 60 * 24 * 30
240configitem(b'remotefilelog', b'nodettl', default=_defaultlimit)
241
242configitem(b'remotefilelog', b'data.gencountlimit', default=2),
243configitem(
244    b'remotefilelog', b'data.generations', default=[b'1GB', b'100MB', b'1MB']
245)
246configitem(b'remotefilelog', b'data.maxrepackpacks', default=50)
247configitem(b'remotefilelog', b'data.repackmaxpacksize', default=b'4GB')
248configitem(b'remotefilelog', b'data.repacksizelimit', default=b'100MB')
249
250configitem(b'remotefilelog', b'history.gencountlimit', default=2),
251configitem(b'remotefilelog', b'history.generations', default=[b'100MB'])
252configitem(b'remotefilelog', b'history.maxrepackpacks', default=50)
253configitem(b'remotefilelog', b'history.repackmaxpacksize', default=b'400MB')
254configitem(b'remotefilelog', b'history.repacksizelimit', default=b'100MB')
255
256# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
257# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
258# be specifying the version(s) of Mercurial they are tested with, or
259# leave the attribute unspecified.
260testedwith = b'ships-with-hg-core'
261
262repoclass = localrepo.localrepository
263repoclass._basesupported.add(constants.SHALLOWREPO_REQUIREMENT)
264
265isenabled = shallowutil.isenabled
266
267
268def uisetup(ui):
269    """Wraps user facing Mercurial commands to swap them out with shallow
270    versions.
271    """
272    hg.wirepeersetupfuncs.append(fileserverclient.peersetup)
273
274    entry = extensions.wrapcommand(commands.table, b'clone', cloneshallow)
275    entry[1].append(
276        (
277            b'',
278            b'shallow',
279            None,
280            _(b"create a shallow clone which uses remote file history"),
281        )
282    )
283
284    extensions.wrapcommand(
285        commands.table, b'debugindex', debugcommands.debugindex
286    )
287    extensions.wrapcommand(
288        commands.table, b'debugindexdot', debugcommands.debugindexdot
289    )
290    extensions.wrapcommand(commands.table, b'log', log)
291    extensions.wrapcommand(commands.table, b'pull', pull)
292
293    # Prevent 'hg manifest --all'
294    def _manifest(orig, ui, repo, *args, **opts):
295        if isenabled(repo) and opts.get('all'):
296            raise error.Abort(_(b"--all is not supported in a shallow repo"))
297
298        return orig(ui, repo, *args, **opts)
299
300    extensions.wrapcommand(commands.table, b"manifest", _manifest)
301
302    # Wrap remotefilelog with lfs code
303    def _lfsloaded(loaded=False):
304        lfsmod = None
305        try:
306            lfsmod = extensions.find(b'lfs')
307        except KeyError:
308            pass
309        if lfsmod:
310            lfsmod.wrapfilelog(remotefilelog.remotefilelog)
311            fileserverclient._lfsmod = lfsmod
312
313    extensions.afterloaded(b'lfs', _lfsloaded)
314
315    # debugdata needs remotefilelog.len to work
316    extensions.wrapcommand(commands.table, b'debugdata', debugdatashallow)
317
318    changegroup.cgpacker = shallowbundle.shallowcg1packer
319
320    extensions.wrapfunction(
321        changegroup, b'_addchangegroupfiles', shallowbundle.addchangegroupfiles
322    )
323    extensions.wrapfunction(
324        changegroup, b'makechangegroup', shallowbundle.makechangegroup
325    )
326    extensions.wrapfunction(localrepo, b'makestore', storewrapper)
327    extensions.wrapfunction(exchange, b'pull', exchangepull)
328    extensions.wrapfunction(merge, b'applyupdates', applyupdates)
329    extensions.wrapfunction(merge, b'_checkunknownfiles', checkunknownfiles)
330    extensions.wrapfunction(context.workingctx, b'_checklookup', checklookup)
331    extensions.wrapfunction(scmutil, b'_findrenames', findrenames)
332    extensions.wrapfunction(
333        copies, b'_computeforwardmissing', computeforwardmissing
334    )
335    extensions.wrapfunction(dispatch, b'runcommand', runcommand)
336    extensions.wrapfunction(repair, b'_collectbrokencsets', _collectbrokencsets)
337    extensions.wrapfunction(context.changectx, b'filectx', filectx)
338    extensions.wrapfunction(context.workingctx, b'filectx', workingfilectx)
339    extensions.wrapfunction(patch, b'trydiff', trydiff)
340    extensions.wrapfunction(hg, b'verify', _verify)
341    scmutil.fileprefetchhooks.add(b'remotefilelog', _fileprefetchhook)
342
343    # disappointing hacks below
344    extensions.wrapfunction(scmutil, b'getrenamedfn', getrenamedfn)
345    extensions.wrapfunction(revset, b'filelog', filelogrevset)
346    revset.symbols[b'filelog'] = revset.filelog
347
348
349def cloneshallow(orig, ui, repo, *args, **opts):
350    if opts.get('shallow'):
351        repos = []
352
353        def pull_shallow(orig, self, *args, **kwargs):
354            if not isenabled(self):
355                repos.append(self.unfiltered())
356                # set up the client hooks so the post-clone update works
357                setupclient(self.ui, self.unfiltered())
358
359                # setupclient fixed the class on the repo itself
360                # but we also need to fix it on the repoview
361                if isinstance(self, repoview.repoview):
362                    self.__class__.__bases__ = (
363                        self.__class__.__bases__[0],
364                        self.unfiltered().__class__,
365                    )
366                self.requirements.add(constants.SHALLOWREPO_REQUIREMENT)
367                with self.lock():
368                    # acquire store lock before writing requirements as some
369                    # requirements might be written to .hg/store/requires
370                    scmutil.writereporequirements(self)
371
372                # Since setupclient hadn't been called, exchange.pull was not
373                # wrapped. So we need to manually invoke our version of it.
374                return exchangepull(orig, self, *args, **kwargs)
375            else:
376                return orig(self, *args, **kwargs)
377
378        extensions.wrapfunction(exchange, b'pull', pull_shallow)
379
380        # Wrap the stream logic to add requirements and to pass include/exclude
381        # patterns around.
382        def setup_streamout(repo, remote):
383            # Replace remote.stream_out with a version that sends file
384            # patterns.
385            def stream_out_shallow(orig):
386                caps = remote.capabilities()
387                if constants.NETWORK_CAP_LEGACY_SSH_GETFILES in caps:
388                    opts = {}
389                    if repo.includepattern:
390                        opts['includepattern'] = b'\0'.join(repo.includepattern)
391                    if repo.excludepattern:
392                        opts['excludepattern'] = b'\0'.join(repo.excludepattern)
393                    return remote._callstream(b'stream_out_shallow', **opts)
394                else:
395                    return orig()
396
397            extensions.wrapfunction(remote, b'stream_out', stream_out_shallow)
398
399        def stream_wrap(orig, op):
400            setup_streamout(op.repo, op.remote)
401            return orig(op)
402
403        extensions.wrapfunction(
404            streamclone, b'maybeperformlegacystreamclone', stream_wrap
405        )
406
407        def canperformstreamclone(orig, pullop, bundle2=False):
408            # remotefilelog is currently incompatible with the
409            # bundle2 flavor of streamclones, so force us to use
410            # v1 instead.
411            if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
412                pullop.remotebundle2caps[b'stream'] = [
413                    c for c in pullop.remotebundle2caps[b'stream'] if c != b'v2'
414                ]
415            if bundle2:
416                return False, None
417            supported, requirements = orig(pullop, bundle2=bundle2)
418            if requirements is not None:
419                requirements.add(constants.SHALLOWREPO_REQUIREMENT)
420            return supported, requirements
421
422        extensions.wrapfunction(
423            streamclone, b'canperformstreamclone', canperformstreamclone
424        )
425
426    try:
427        orig(ui, repo, *args, **opts)
428    finally:
429        if opts.get('shallow'):
430            for r in repos:
431                if util.safehasattr(r, b'fileservice'):
432                    r.fileservice.close()
433
434
435def debugdatashallow(orig, *args, **kwds):
436    oldlen = remotefilelog.remotefilelog.__len__
437    try:
438        remotefilelog.remotefilelog.__len__ = lambda x: 1
439        return orig(*args, **kwds)
440    finally:
441        remotefilelog.remotefilelog.__len__ = oldlen
442
443
444def reposetup(ui, repo):
445    if not repo.local():
446        return
447
448    # put here intentionally bc doesnt work in uisetup
449    ui.setconfig(b'hooks', b'update.prefetch', wcpprefetch)
450    ui.setconfig(b'hooks', b'commit.prefetch', wcpprefetch)
451
452    isserverenabled = ui.configbool(b'remotefilelog', b'server')
453    isshallowclient = isenabled(repo)
454
455    if isserverenabled and isshallowclient:
456        raise RuntimeError(b"Cannot be both a server and shallow client.")
457
458    if isshallowclient:
459        setupclient(ui, repo)
460
461    if isserverenabled:
462        remotefilelogserver.setupserver(ui, repo)
463
464
465def setupclient(ui, repo):
466    if not isinstance(repo, localrepo.localrepository):
467        return
468
469    # Even clients get the server setup since they need to have the
470    # wireprotocol endpoints registered.
471    remotefilelogserver.onetimesetup(ui)
472    onetimeclientsetup(ui)
473
474    shallowrepo.wraprepo(repo)
475    repo.store = shallowstore.wrapstore(repo.store)
476
477
478def storewrapper(orig, requirements, path, vfstype):
479    s = orig(requirements, path, vfstype)
480    if constants.SHALLOWREPO_REQUIREMENT in requirements:
481        s = shallowstore.wrapstore(s)
482
483    return s
484
485
486# prefetch files before update
487def applyupdates(
488    orig, repo, mresult, wctx, mctx, overwrite, wantfiledata, **opts
489):
490    if isenabled(repo):
491        manifest = mctx.manifest()
492        files = []
493        for f, args, msg in mresult.getactions([mergestatemod.ACTION_GET]):
494            files.append((f, hex(manifest[f])))
495        # batch fetch the needed files from the server
496        repo.fileservice.prefetch(files)
497    return orig(repo, mresult, wctx, mctx, overwrite, wantfiledata, **opts)
498
499
500# Prefetch merge checkunknownfiles
501def checkunknownfiles(orig, repo, wctx, mctx, force, mresult, *args, **kwargs):
502    if isenabled(repo):
503        files = []
504        sparsematch = repo.maybesparsematch(mctx.rev())
505        for f, (m, actionargs, msg) in mresult.filemap():
506            if sparsematch and not sparsematch(f):
507                continue
508            if m in (
509                mergestatemod.ACTION_CREATED,
510                mergestatemod.ACTION_DELETED_CHANGED,
511                mergestatemod.ACTION_CREATED_MERGE,
512            ):
513                files.append((f, hex(mctx.filenode(f))))
514            elif m == mergestatemod.ACTION_LOCAL_DIR_RENAME_GET:
515                f2 = actionargs[0]
516                files.append((f2, hex(mctx.filenode(f2))))
517        # batch fetch the needed files from the server
518        repo.fileservice.prefetch(files)
519    return orig(repo, wctx, mctx, force, mresult, *args, **kwargs)
520
521
522# Prefetch files before status attempts to look at their size and contents
523def checklookup(orig, self, files):
524    repo = self._repo
525    if isenabled(repo):
526        prefetchfiles = []
527        for parent in self._parents:
528            for f in files:
529                if f in parent:
530                    prefetchfiles.append((f, hex(parent.filenode(f))))
531        # batch fetch the needed files from the server
532        repo.fileservice.prefetch(prefetchfiles)
533    return orig(self, files)
534
535
536# Prefetch the logic that compares added and removed files for renames
537def findrenames(orig, repo, matcher, added, removed, *args, **kwargs):
538    if isenabled(repo):
539        files = []
540        pmf = repo[b'.'].manifest()
541        for f in removed:
542            if f in pmf:
543                files.append((f, hex(pmf[f])))
544        # batch fetch the needed files from the server
545        repo.fileservice.prefetch(files)
546    return orig(repo, matcher, added, removed, *args, **kwargs)
547
548
549# prefetch files before pathcopies check
550def computeforwardmissing(orig, a, b, match=None):
551    missing = orig(a, b, match=match)
552    repo = a._repo
553    if isenabled(repo):
554        mb = b.manifest()
555
556        files = []
557        sparsematch = repo.maybesparsematch(b.rev())
558        if sparsematch:
559            sparsemissing = set()
560            for f in missing:
561                if sparsematch(f):
562                    files.append((f, hex(mb[f])))
563                    sparsemissing.add(f)
564            missing = sparsemissing
565
566        # batch fetch the needed files from the server
567        repo.fileservice.prefetch(files)
568    return missing
569
570
571# close cache miss server connection after the command has finished
572def runcommand(orig, lui, repo, *args, **kwargs):
573    fileservice = None
574    # repo can be None when running in chg:
575    # - at startup, reposetup was called because serve is not norepo
576    # - a norepo command like "help" is called
577    if repo and isenabled(repo):
578        fileservice = repo.fileservice
579    try:
580        return orig(lui, repo, *args, **kwargs)
581    finally:
582        if fileservice:
583            fileservice.close()
584
585
586# prevent strip from stripping remotefilelogs
587def _collectbrokencsets(orig, repo, files, striprev):
588    if isenabled(repo):
589        files = list([f for f in files if not repo.shallowmatch(f)])
590    return orig(repo, files, striprev)
591
592
593# changectx wrappers
594def filectx(orig, self, path, fileid=None, filelog=None):
595    if fileid is None:
596        fileid = self.filenode(path)
597    if isenabled(self._repo) and self._repo.shallowmatch(path):
598        return remotefilectx.remotefilectx(
599            self._repo, path, fileid=fileid, changectx=self, filelog=filelog
600        )
601    return orig(self, path, fileid=fileid, filelog=filelog)
602
603
604def workingfilectx(orig, self, path, filelog=None):
605    if isenabled(self._repo) and self._repo.shallowmatch(path):
606        return remotefilectx.remoteworkingfilectx(
607            self._repo, path, workingctx=self, filelog=filelog
608        )
609    return orig(self, path, filelog=filelog)
610
611
612# prefetch required revisions before a diff
613def trydiff(
614    orig,
615    repo,
616    revs,
617    ctx1,
618    ctx2,
619    modified,
620    added,
621    removed,
622    copy,
623    getfilectx,
624    *args,
625    **kwargs
626):
627    if isenabled(repo):
628        prefetch = []
629        mf1 = ctx1.manifest()
630        for fname in modified + added + removed:
631            if fname in mf1:
632                fnode = getfilectx(fname, ctx1).filenode()
633                # fnode can be None if it's a edited working ctx file
634                if fnode:
635                    prefetch.append((fname, hex(fnode)))
636            if fname not in removed:
637                fnode = getfilectx(fname, ctx2).filenode()
638                if fnode:
639                    prefetch.append((fname, hex(fnode)))
640
641        repo.fileservice.prefetch(prefetch)
642
643    return orig(
644        repo,
645        revs,
646        ctx1,
647        ctx2,
648        modified,
649        added,
650        removed,
651        copy,
652        getfilectx,
653        *args,
654        **kwargs
655    )
656
657
658# Prevent verify from processing files
659# a stub for mercurial.hg.verify()
660def _verify(orig, repo, level=None):
661    lock = repo.lock()
662    try:
663        return shallowverifier.shallowverifier(repo).verify()
664    finally:
665        lock.release()
666
667
668clientonetime = False
669
670
671def onetimeclientsetup(ui):
672    global clientonetime
673    if clientonetime:
674        return
675    clientonetime = True
676
677    # Don't commit filelogs until we know the commit hash, since the hash
678    # is present in the filelog blob.
679    # This violates Mercurial's filelog->manifest->changelog write order,
680    # but is generally fine for client repos.
681    pendingfilecommits = []
682
683    def addrawrevision(
684        orig,
685        self,
686        rawtext,
687        transaction,
688        link,
689        p1,
690        p2,
691        node,
692        flags,
693        cachedelta=None,
694        _metatuple=None,
695    ):
696        if isinstance(link, int):
697            pendingfilecommits.append(
698                (
699                    self,
700                    rawtext,
701                    transaction,
702                    link,
703                    p1,
704                    p2,
705                    node,
706                    flags,
707                    cachedelta,
708                    _metatuple,
709                )
710            )
711            return node
712        else:
713            return orig(
714                self,
715                rawtext,
716                transaction,
717                link,
718                p1,
719                p2,
720                node,
721                flags,
722                cachedelta,
723                _metatuple=_metatuple,
724            )
725
726    extensions.wrapfunction(
727        remotefilelog.remotefilelog, b'addrawrevision', addrawrevision
728    )
729
730    def changelogadd(orig, self, *args, **kwargs):
731        oldlen = len(self)
732        node = orig(self, *args, **kwargs)
733        newlen = len(self)
734        if oldlen != newlen:
735            for oldargs in pendingfilecommits:
736                log, rt, tr, link, p1, p2, n, fl, c, m = oldargs
737                linknode = self.node(link)
738                if linknode == node:
739                    log.addrawrevision(rt, tr, linknode, p1, p2, n, fl, c, m)
740                else:
741                    raise error.ProgrammingError(
742                        b'pending multiple integer revisions are not supported'
743                    )
744        else:
745            # "link" is actually wrong here (it is set to len(changelog))
746            # if changelog remains unchanged, skip writing file revisions
747            # but still do a sanity check about pending multiple revisions
748            if len({x[3] for x in pendingfilecommits}) > 1:
749                raise error.ProgrammingError(
750                    b'pending multiple integer revisions are not supported'
751                )
752        del pendingfilecommits[:]
753        return node
754
755    extensions.wrapfunction(changelog.changelog, b'add', changelogadd)
756
757
758def getrenamedfn(orig, repo, endrev=None):
759    if not isenabled(repo) or copies.usechangesetcentricalgo(repo):
760        return orig(repo, endrev)
761
762    rcache = {}
763
764    def getrenamed(fn, rev):
765        """looks up all renames for a file (up to endrev) the first
766        time the file is given. It indexes on the changerev and only
767        parses the manifest if linkrev != changerev.
768        Returns rename info for fn at changerev rev."""
769        if rev in rcache.setdefault(fn, {}):
770            return rcache[fn][rev]
771
772        try:
773            fctx = repo[rev].filectx(fn)
774            for ancestor in fctx.ancestors():
775                if ancestor.path() == fn:
776                    renamed = ancestor.renamed()
777                    rcache[fn][ancestor.rev()] = renamed and renamed[0]
778
779            renamed = fctx.renamed()
780            return renamed and renamed[0]
781        except error.LookupError:
782            return None
783
784    return getrenamed
785
786
787def filelogrevset(orig, repo, subset, x):
788    """``filelog(pattern)``
789    Changesets connected to the specified filelog.
790
791    For performance reasons, ``filelog()`` does not show every changeset
792    that affects the requested file(s). See :hg:`help log` for details. For
793    a slower, more accurate result, use ``file()``.
794    """
795
796    if not isenabled(repo):
797        return orig(repo, subset, x)
798
799    # i18n: "filelog" is a keyword
800    pat = revset.getstring(x, _(b"filelog requires a pattern"))
801    m = matchmod.match(
802        repo.root, repo.getcwd(), [pat], default=b'relpath', ctx=repo[None]
803    )
804    s = set()
805
806    if not matchmod.patkind(pat):
807        # slow
808        for r in subset:
809            ctx = repo[r]
810            cfiles = ctx.files()
811            for f in m.files():
812                if f in cfiles:
813                    s.add(ctx.rev())
814                    break
815    else:
816        # partial
817        files = (f for f in repo[None] if m(f))
818        for f in files:
819            fctx = repo[None].filectx(f)
820            s.add(fctx.linkrev())
821            for actx in fctx.ancestors():
822                s.add(actx.linkrev())
823
824    return smartset.baseset([r for r in subset if r in s])
825
826
827@command(b'gc', [], _(b'hg gc [REPO...]'), norepo=True)
828def gc(ui, *args, **opts):
829    """garbage collect the client and server filelog caches"""
830    cachepaths = set()
831
832    # get the system client cache
833    systemcache = shallowutil.getcachepath(ui, allowempty=True)
834    if systemcache:
835        cachepaths.add(systemcache)
836
837    # get repo client and server cache
838    repopaths = []
839    pwd = ui.environ.get(b'PWD')
840    if pwd:
841        repopaths.append(pwd)
842
843    repopaths.extend(args)
844    repos = []
845    for repopath in repopaths:
846        try:
847            repo = hg.peer(ui, {}, repopath)
848            repos.append(repo)
849
850            repocache = shallowutil.getcachepath(repo.ui, allowempty=True)
851            if repocache:
852                cachepaths.add(repocache)
853        except error.RepoError:
854            pass
855
856    # gc client cache
857    for cachepath in cachepaths:
858        gcclient(ui, cachepath)
859
860    # gc server cache
861    for repo in repos:
862        remotefilelogserver.gcserver(ui, repo._repo)
863
864
865def gcclient(ui, cachepath):
866    # get list of repos that use this cache
867    repospath = os.path.join(cachepath, b'repos')
868    if not os.path.exists(repospath):
869        ui.warn(_(b"no known cache at %s\n") % cachepath)
870        return
871
872    reposfile = open(repospath, b'rb')
873    repos = {r[:-1] for r in reposfile.readlines()}
874    reposfile.close()
875
876    # build list of useful files
877    validrepos = []
878    keepkeys = set()
879
880    sharedcache = None
881    filesrepacked = False
882
883    count = 0
884    progress = ui.makeprogress(
885        _(b"analyzing repositories"), unit=b"repos", total=len(repos)
886    )
887    for path in repos:
888        progress.update(count)
889        count += 1
890        try:
891            path = util.expandpath(os.path.normpath(path))
892        except TypeError as e:
893            ui.warn(_(b"warning: malformed path: %r:%s\n") % (path, e))
894            traceback.print_exc()
895            continue
896        try:
897            peer = hg.peer(ui, {}, path)
898            repo = peer._repo
899        except error.RepoError:
900            continue
901
902        validrepos.append(path)
903
904        # Protect against any repo or config changes that have happened since
905        # this repo was added to the repos file. We'd rather this loop succeed
906        # and too much be deleted, than the loop fail and nothing gets deleted.
907        if not isenabled(repo):
908            continue
909
910        if not util.safehasattr(repo, b'name'):
911            ui.warn(
912                _(b"repo %s is a misconfigured remotefilelog repo\n") % path
913            )
914            continue
915
916        # If garbage collection on repack and repack on hg gc are enabled
917        # then loose files are repacked and garbage collected.
918        # Otherwise regular garbage collection is performed.
919        repackonhggc = repo.ui.configbool(b'remotefilelog', b'repackonhggc')
920        gcrepack = repo.ui.configbool(b'remotefilelog', b'gcrepack')
921        if repackonhggc and gcrepack:
922            try:
923                repackmod.incrementalrepack(repo)
924                filesrepacked = True
925                continue
926            except (IOError, repackmod.RepackAlreadyRunning):
927                # If repack cannot be performed due to not enough disk space
928                # continue doing garbage collection of loose files w/o repack
929                pass
930
931        reponame = repo.name
932        if not sharedcache:
933            sharedcache = repo.sharedstore
934
935        # Compute a keepset which is not garbage collected
936        def keyfn(fname, fnode):
937            return fileserverclient.getcachekey(reponame, fname, hex(fnode))
938
939        keepkeys = repackmod.keepset(repo, keyfn=keyfn, lastkeepkeys=keepkeys)
940
941    progress.complete()
942
943    # write list of valid repos back
944    oldumask = os.umask(0o002)
945    try:
946        reposfile = open(repospath, b'wb')
947        reposfile.writelines([(b"%s\n" % r) for r in validrepos])
948        reposfile.close()
949    finally:
950        os.umask(oldumask)
951
952    # prune cache
953    if sharedcache is not None:
954        sharedcache.gc(keepkeys)
955    elif not filesrepacked:
956        ui.warn(_(b"warning: no valid repos in repofile\n"))
957
958
959def log(orig, ui, repo, *pats, **opts):
960    if not isenabled(repo):
961        return orig(ui, repo, *pats, **opts)
962
963    follow = opts.get('follow')
964    revs = opts.get('rev')
965    if pats:
966        # Force slowpath for non-follow patterns and follows that start from
967        # non-working-copy-parent revs.
968        if not follow or revs:
969            # This forces the slowpath
970            opts['removed'] = True
971
972        # If this is a non-follow log without any revs specified, recommend that
973        # the user add -f to speed it up.
974        if not follow and not revs:
975            match = scmutil.match(repo[b'.'], pats, pycompat.byteskwargs(opts))
976            isfile = not match.anypats()
977            if isfile:
978                for file in match.files():
979                    if not os.path.isfile(repo.wjoin(file)):
980                        isfile = False
981                        break
982
983            if isfile:
984                ui.warn(
985                    _(
986                        b"warning: file log can be slow on large repos - "
987                        + b"use -f to speed it up\n"
988                    )
989                )
990
991    return orig(ui, repo, *pats, **opts)
992
993
994def revdatelimit(ui, revset):
995    """Update revset so that only changesets no older than 'prefetchdays' days
996    are included. The default value is set to 14 days. If 'prefetchdays' is set
997    to zero or negative value then date restriction is not applied.
998    """
999    days = ui.configint(b'remotefilelog', b'prefetchdays')
1000    if days > 0:
1001        revset = b'(%s) & date(-%s)' % (revset, days)
1002    return revset
1003
1004
1005def readytofetch(repo):
1006    """Check that enough time has passed since the last background prefetch.
1007    This only relates to prefetches after operations that change the working
1008    copy parent. Default delay between background prefetches is 2 minutes.
1009    """
1010    timeout = repo.ui.configint(b'remotefilelog', b'prefetchdelay')
1011    fname = repo.vfs.join(b'lastprefetch')
1012
1013    ready = False
1014    with open(fname, b'a'):
1015        # the with construct above is used to avoid race conditions
1016        modtime = os.path.getmtime(fname)
1017        if (time.time() - modtime) > timeout:
1018            os.utime(fname, None)
1019            ready = True
1020
1021    return ready
1022
1023
1024def wcpprefetch(ui, repo, **kwargs):
1025    """Prefetches in background revisions specified by bgprefetchrevs revset.
1026    Does background repack if backgroundrepack flag is set in config.
1027    """
1028    shallow = isenabled(repo)
1029    bgprefetchrevs = ui.config(b'remotefilelog', b'bgprefetchrevs')
1030    isready = readytofetch(repo)
1031
1032    if not (shallow and bgprefetchrevs and isready):
1033        return
1034
1035    bgrepack = repo.ui.configbool(b'remotefilelog', b'backgroundrepack')
1036    # update a revset with a date limit
1037    bgprefetchrevs = revdatelimit(ui, bgprefetchrevs)
1038
1039    def anon(unused_success):
1040        if util.safehasattr(repo, b'ranprefetch') and repo.ranprefetch:
1041            return
1042        repo.ranprefetch = True
1043        repo.backgroundprefetch(bgprefetchrevs, repack=bgrepack)
1044
1045    repo._afterlock(anon)
1046
1047
1048def pull(orig, ui, repo, *pats, **opts):
1049    result = orig(ui, repo, *pats, **opts)
1050
1051    if isenabled(repo):
1052        # prefetch if it's configured
1053        prefetchrevset = ui.config(b'remotefilelog', b'pullprefetch')
1054        bgrepack = repo.ui.configbool(b'remotefilelog', b'backgroundrepack')
1055        bgprefetch = repo.ui.configbool(b'remotefilelog', b'backgroundprefetch')
1056
1057        if prefetchrevset:
1058            ui.status(_(b"prefetching file contents\n"))
1059            revs = scmutil.revrange(repo, [prefetchrevset])
1060            base = repo[b'.'].rev()
1061            if bgprefetch:
1062                repo.backgroundprefetch(prefetchrevset, repack=bgrepack)
1063            else:
1064                repo.prefetch(revs, base=base)
1065                if bgrepack:
1066                    repackmod.backgroundrepack(repo, incremental=True)
1067        elif bgrepack:
1068            repackmod.backgroundrepack(repo, incremental=True)
1069
1070    return result
1071
1072
1073def exchangepull(orig, repo, remote, *args, **kwargs):
1074    # Hook into the callstream/getbundle to insert bundle capabilities
1075    # during a pull.
1076    def localgetbundle(
1077        orig, source, heads=None, common=None, bundlecaps=None, **kwargs
1078    ):
1079        if not bundlecaps:
1080            bundlecaps = set()
1081        bundlecaps.add(constants.BUNDLE2_CAPABLITY)
1082        return orig(
1083            source, heads=heads, common=common, bundlecaps=bundlecaps, **kwargs
1084        )
1085
1086    if util.safehasattr(remote, b'_callstream'):
1087        remote._localrepo = repo
1088    elif util.safehasattr(remote, b'getbundle'):
1089        extensions.wrapfunction(remote, b'getbundle', localgetbundle)
1090
1091    return orig(repo, remote, *args, **kwargs)
1092
1093
1094def _fileprefetchhook(repo, revmatches):
1095    if isenabled(repo):
1096        allfiles = []
1097        for rev, match in revmatches:
1098            if rev == wdirrev or rev is None:
1099                continue
1100            ctx = repo[rev]
1101            mf = ctx.manifest()
1102            sparsematch = repo.maybesparsematch(ctx.rev())
1103            for path in ctx.walk(match):
1104                if (not sparsematch or sparsematch(path)) and path in mf:
1105                    allfiles.append((path, hex(mf[path])))
1106        repo.fileservice.prefetch(allfiles)
1107
1108
1109@command(
1110    b'debugremotefilelog',
1111    [
1112        (b'd', b'decompress', None, _(b'decompress the filelog first')),
1113    ],
1114    _(b'hg debugremotefilelog <path>'),
1115    norepo=True,
1116)
1117def debugremotefilelog(ui, path, **opts):
1118    return debugcommands.debugremotefilelog(ui, path, **opts)
1119
1120
1121@command(
1122    b'verifyremotefilelog',
1123    [
1124        (b'd', b'decompress', None, _(b'decompress the filelogs first')),
1125    ],
1126    _(b'hg verifyremotefilelogs <directory>'),
1127    norepo=True,
1128)
1129def verifyremotefilelog(ui, path, **opts):
1130    return debugcommands.verifyremotefilelog(ui, path, **opts)
1131
1132
1133@command(
1134    b'debugdatapack',
1135    [
1136        (b'', b'long', None, _(b'print the long hashes')),
1137        (b'', b'node', b'', _(b'dump the contents of node'), b'NODE'),
1138    ],
1139    _(b'hg debugdatapack <paths>'),
1140    norepo=True,
1141)
1142def debugdatapack(ui, *paths, **opts):
1143    return debugcommands.debugdatapack(ui, *paths, **opts)
1144
1145
1146@command(b'debughistorypack', [], _(b'hg debughistorypack <path>'), norepo=True)
1147def debughistorypack(ui, path, **opts):
1148    return debugcommands.debughistorypack(ui, path)
1149
1150
1151@command(b'debugkeepset', [], _(b'hg debugkeepset'))
1152def debugkeepset(ui, repo, **opts):
1153    # The command is used to measure keepset computation time
1154    def keyfn(fname, fnode):
1155        return fileserverclient.getcachekey(repo.name, fname, hex(fnode))
1156
1157    repackmod.keepset(repo, keyfn)
1158    return
1159
1160
1161@command(b'debugwaitonrepack', [], _(b'hg debugwaitonrepack'))
1162def debugwaitonrepack(ui, repo, **opts):
1163    return debugcommands.debugwaitonrepack(repo)
1164
1165
1166@command(b'debugwaitonprefetch', [], _(b'hg debugwaitonprefetch'))
1167def debugwaitonprefetch(ui, repo, **opts):
1168    return debugcommands.debugwaitonprefetch(repo)
1169
1170
1171def resolveprefetchopts(ui, opts):
1172    if not opts.get(b'rev'):
1173        revset = [b'.', b'draft()']
1174
1175        prefetchrevset = ui.config(b'remotefilelog', b'pullprefetch', None)
1176        if prefetchrevset:
1177            revset.append(b'(%s)' % prefetchrevset)
1178        bgprefetchrevs = ui.config(b'remotefilelog', b'bgprefetchrevs', None)
1179        if bgprefetchrevs:
1180            revset.append(b'(%s)' % bgprefetchrevs)
1181        revset = b'+'.join(revset)
1182
1183        # update a revset with a date limit
1184        revset = revdatelimit(ui, revset)
1185
1186        opts[b'rev'] = [revset]
1187
1188    if not opts.get(b'base'):
1189        opts[b'base'] = None
1190
1191    return opts
1192
1193
1194@command(
1195    b'prefetch',
1196    [
1197        (b'r', b'rev', [], _(b'prefetch the specified revisions'), _(b'REV')),
1198        (b'', b'repack', False, _(b'run repack after prefetch')),
1199        (b'b', b'base', b'', _(b"rev that is assumed to already be local")),
1200    ]
1201    + commands.walkopts,
1202    _(b'hg prefetch [OPTIONS] [FILE...]'),
1203    helpcategory=command.CATEGORY_MAINTENANCE,
1204)
1205def prefetch(ui, repo, *pats, **opts):
1206    """prefetch file revisions from the server
1207
1208    Prefetchs file revisions for the specified revs and stores them in the
1209    local remotefilelog cache.  If no rev is specified, the default rev is
1210    used which is the union of dot, draft, pullprefetch and bgprefetchrev.
1211    File names or patterns can be used to limit which files are downloaded.
1212
1213    Return 0 on success.
1214    """
1215    opts = pycompat.byteskwargs(opts)
1216    if not isenabled(repo):
1217        raise error.Abort(_(b"repo is not shallow"))
1218
1219    opts = resolveprefetchopts(ui, opts)
1220    revs = scmutil.revrange(repo, opts.get(b'rev'))
1221    repo.prefetch(revs, opts.get(b'base'), pats, opts)
1222
1223    # Run repack in background
1224    if opts.get(b'repack'):
1225        repackmod.backgroundrepack(repo, incremental=True)
1226
1227
1228@command(
1229    b'repack',
1230    [
1231        (b'', b'background', None, _(b'run in a background process'), None),
1232        (b'', b'incremental', None, _(b'do an incremental repack'), None),
1233        (
1234            b'',
1235            b'packsonly',
1236            None,
1237            _(b'only repack packs (skip loose objects)'),
1238            None,
1239        ),
1240    ],
1241    _(b'hg repack [OPTIONS]'),
1242)
1243def repack_(ui, repo, *pats, **opts):
1244    if opts.get('background'):
1245        repackmod.backgroundrepack(
1246            repo,
1247            incremental=opts.get('incremental'),
1248            packsonly=opts.get('packsonly', False),
1249        )
1250        return
1251
1252    options = {b'packsonly': opts.get('packsonly')}
1253
1254    try:
1255        if opts.get('incremental'):
1256            repackmod.incrementalrepack(repo, options=options)
1257        else:
1258            repackmod.fullrepack(repo, options=options)
1259    except repackmod.RepackAlreadyRunning as ex:
1260        # Don't propogate the exception if the repack is already in
1261        # progress, since we want the command to exit 0.
1262        repo.ui.warn(b'%s\n' % ex)
1263