1# repair.py - functions for repository repair for mercurial
2#
3# Copyright 2005, 2006 Chris Mason <mason@suse.com>
4# Copyright 2007 Olivia Mackall
5#
6# This software may be used and distributed according to the terms of the
7# GNU General Public License version 2 or any later version.
8
9from __future__ import absolute_import
10
11import errno
12
13from .i18n import _
14from .node import (
15    hex,
16    short,
17)
18from . import (
19    bundle2,
20    changegroup,
21    discovery,
22    error,
23    exchange,
24    obsolete,
25    obsutil,
26    pathutil,
27    phases,
28    pycompat,
29    requirements,
30    scmutil,
31    util,
32)
33from .utils import (
34    hashutil,
35    stringutil,
36    urlutil,
37)
38
39
40def backupbundle(
41    repo, bases, heads, node, suffix, compress=True, obsolescence=True
42):
43    """create a bundle with the specified revisions as a backup"""
44
45    backupdir = b"strip-backup"
46    vfs = repo.vfs
47    if not vfs.isdir(backupdir):
48        vfs.mkdir(backupdir)
49
50    # Include a hash of all the nodes in the filename for uniqueness
51    allcommits = repo.set(b'%ln::%ln', bases, heads)
52    allhashes = sorted(c.hex() for c in allcommits)
53    totalhash = hashutil.sha1(b''.join(allhashes)).digest()
54    name = b"%s/%s-%s-%s.hg" % (
55        backupdir,
56        short(node),
57        hex(totalhash[:4]),
58        suffix,
59    )
60
61    cgversion = changegroup.localversion(repo)
62    comp = None
63    if cgversion != b'01':
64        bundletype = b"HG20"
65        if compress:
66            comp = b'BZ'
67    elif compress:
68        bundletype = b"HG10BZ"
69    else:
70        bundletype = b"HG10UN"
71
72    outgoing = discovery.outgoing(repo, missingroots=bases, ancestorsof=heads)
73    contentopts = {
74        b'cg.version': cgversion,
75        b'obsolescence': obsolescence,
76        b'phases': True,
77    }
78    return bundle2.writenewbundle(
79        repo.ui,
80        repo,
81        b'strip',
82        name,
83        bundletype,
84        outgoing,
85        contentopts,
86        vfs,
87        compression=comp,
88    )
89
90
91def _collectfiles(repo, striprev):
92    """find out the filelogs affected by the strip"""
93    files = set()
94
95    for x in pycompat.xrange(striprev, len(repo)):
96        files.update(repo[x].files())
97
98    return sorted(files)
99
100
101def _collectrevlog(revlog, striprev):
102    _, brokenset = revlog.getstrippoint(striprev)
103    return [revlog.linkrev(r) for r in brokenset]
104
105
106def _collectbrokencsets(repo, files, striprev):
107    """return the changesets which will be broken by the truncation"""
108    s = set()
109
110    for revlog in manifestrevlogs(repo):
111        s.update(_collectrevlog(revlog, striprev))
112    for fname in files:
113        s.update(_collectrevlog(repo.file(fname), striprev))
114
115    return s
116
117
118def strip(ui, repo, nodelist, backup=True, topic=b'backup'):
119    # This function requires the caller to lock the repo, but it operates
120    # within a transaction of its own, and thus requires there to be no current
121    # transaction when it is called.
122    if repo.currenttransaction() is not None:
123        raise error.ProgrammingError(b'cannot strip from inside a transaction')
124
125    # Simple way to maintain backwards compatibility for this
126    # argument.
127    if backup in [b'none', b'strip']:
128        backup = False
129
130    repo = repo.unfiltered()
131    repo.destroying()
132    vfs = repo.vfs
133    # load bookmark before changelog to avoid side effect from outdated
134    # changelog (see repo._refreshchangelog)
135    repo._bookmarks
136    cl = repo.changelog
137
138    # TODO handle undo of merge sets
139    if isinstance(nodelist, bytes):
140        nodelist = [nodelist]
141    striplist = [cl.rev(node) for node in nodelist]
142    striprev = min(striplist)
143
144    files = _collectfiles(repo, striprev)
145    saverevs = _collectbrokencsets(repo, files, striprev)
146
147    # Some revisions with rev > striprev may not be descendants of striprev.
148    # We have to find these revisions and put them in a bundle, so that
149    # we can restore them after the truncations.
150    # To create the bundle we use repo.changegroupsubset which requires
151    # the list of heads and bases of the set of interesting revisions.
152    # (head = revision in the set that has no descendant in the set;
153    #  base = revision in the set that has no ancestor in the set)
154    tostrip = set(striplist)
155    saveheads = set(saverevs)
156    for r in cl.revs(start=striprev + 1):
157        if any(p in tostrip for p in cl.parentrevs(r)):
158            tostrip.add(r)
159
160        if r not in tostrip:
161            saverevs.add(r)
162            saveheads.difference_update(cl.parentrevs(r))
163            saveheads.add(r)
164    saveheads = [cl.node(r) for r in saveheads]
165
166    # compute base nodes
167    if saverevs:
168        descendants = set(cl.descendants(saverevs))
169        saverevs.difference_update(descendants)
170    savebases = [cl.node(r) for r in saverevs]
171    stripbases = [cl.node(r) for r in tostrip]
172
173    stripobsidx = obsmarkers = ()
174    if repo.ui.configbool(b'devel', b'strip-obsmarkers'):
175        obsmarkers = obsutil.exclusivemarkers(repo, stripbases)
176    if obsmarkers:
177        stripobsidx = [
178            i for i, m in enumerate(repo.obsstore) if m in obsmarkers
179        ]
180
181    newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
182
183    backupfile = None
184    node = nodelist[-1]
185    if backup:
186        backupfile = _createstripbackup(repo, stripbases, node, topic)
187    # create a changegroup for all the branches we need to keep
188    tmpbundlefile = None
189    if saveheads:
190        # do not compress temporary bundle if we remove it from disk later
191        #
192        # We do not include obsolescence, it might re-introduce prune markers
193        # we are trying to strip.  This is harmless since the stripped markers
194        # are already backed up and we did not touched the markers for the
195        # saved changesets.
196        tmpbundlefile = backupbundle(
197            repo,
198            savebases,
199            saveheads,
200            node,
201            b'temp',
202            compress=False,
203            obsolescence=False,
204        )
205
206    with ui.uninterruptible():
207        try:
208            with repo.transaction(b"strip") as tr:
209                # TODO this code violates the interface abstraction of the
210                # transaction and makes assumptions that file storage is
211                # using append-only files. We'll need some kind of storage
212                # API to handle stripping for us.
213                oldfiles = set(tr._offsetmap.keys())
214                oldfiles.update(tr._newfiles)
215
216                tr.startgroup()
217                cl.strip(striprev, tr)
218                stripmanifest(repo, striprev, tr, files)
219
220                for fn in files:
221                    repo.file(fn).strip(striprev, tr)
222                tr.endgroup()
223
224                entries = tr.readjournal()
225
226                for file, troffset in entries:
227                    if file in oldfiles:
228                        continue
229                    with repo.svfs(file, b'a', checkambig=True) as fp:
230                        fp.truncate(troffset)
231                    if troffset == 0:
232                        repo.store.markremoved(file)
233
234                deleteobsmarkers(repo.obsstore, stripobsidx)
235                del repo.obsstore
236                repo.invalidatevolatilesets()
237                repo._phasecache.filterunknown(repo)
238
239            if tmpbundlefile:
240                ui.note(_(b"adding branch\n"))
241                f = vfs.open(tmpbundlefile, b"rb")
242                gen = exchange.readbundle(ui, f, tmpbundlefile, vfs)
243                # silence internal shuffling chatter
244                maybe_silent = (
245                    repo.ui.silent()
246                    if not repo.ui.verbose
247                    else util.nullcontextmanager()
248                )
249                with maybe_silent:
250                    tmpbundleurl = b'bundle:' + vfs.join(tmpbundlefile)
251                    txnname = b'strip'
252                    if not isinstance(gen, bundle2.unbundle20):
253                        txnname = b"strip\n%s" % urlutil.hidepassword(
254                            tmpbundleurl
255                        )
256                    with repo.transaction(txnname) as tr:
257                        bundle2.applybundle(
258                            repo, gen, tr, source=b'strip', url=tmpbundleurl
259                        )
260                f.close()
261
262            with repo.transaction(b'repair') as tr:
263                bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
264                repo._bookmarks.applychanges(repo, tr, bmchanges)
265
266            # remove undo files
267            for undovfs, undofile in repo.undofiles():
268                try:
269                    undovfs.unlink(undofile)
270                except OSError as e:
271                    if e.errno != errno.ENOENT:
272                        ui.warn(
273                            _(b'error removing %s: %s\n')
274                            % (
275                                undovfs.join(undofile),
276                                stringutil.forcebytestr(e),
277                            )
278                        )
279
280        except:  # re-raises
281            if backupfile:
282                ui.warn(
283                    _(b"strip failed, backup bundle stored in '%s'\n")
284                    % vfs.join(backupfile)
285                )
286            if tmpbundlefile:
287                ui.warn(
288                    _(b"strip failed, unrecovered changes stored in '%s'\n")
289                    % vfs.join(tmpbundlefile)
290                )
291                ui.warn(
292                    _(
293                        b"(fix the problem, then recover the changesets with "
294                        b"\"hg unbundle '%s'\")\n"
295                    )
296                    % vfs.join(tmpbundlefile)
297                )
298            raise
299        else:
300            if tmpbundlefile:
301                # Remove temporary bundle only if there were no exceptions
302                vfs.unlink(tmpbundlefile)
303
304    repo.destroyed()
305    # return the backup file path (or None if 'backup' was False) so
306    # extensions can use it
307    return backupfile
308
309
310def softstrip(ui, repo, nodelist, backup=True, topic=b'backup'):
311    """perform a "soft" strip using the archived phase"""
312    tostrip = [c.node() for c in repo.set(b'sort(%ln::)', nodelist)]
313    if not tostrip:
314        return None
315
316    backupfile = None
317    if backup:
318        node = tostrip[0]
319        backupfile = _createstripbackup(repo, tostrip, node, topic)
320
321    newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
322    with repo.transaction(b'strip') as tr:
323        phases.retractboundary(repo, tr, phases.archived, tostrip)
324        bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
325        repo._bookmarks.applychanges(repo, tr, bmchanges)
326    return backupfile
327
328
329def _bookmarkmovements(repo, tostrip):
330    # compute necessary bookmark movement
331    bm = repo._bookmarks
332    updatebm = []
333    for m in bm:
334        rev = repo[bm[m]].rev()
335        if rev in tostrip:
336            updatebm.append(m)
337    newbmtarget = None
338    # If we need to move bookmarks, compute bookmark
339    # targets. Otherwise we can skip doing this logic.
340    if updatebm:
341        # For a set s, max(parents(s) - s) is the same as max(heads(::s - s)),
342        # but is much faster
343        newbmtarget = repo.revs(b'max(parents(%ld) - (%ld))', tostrip, tostrip)
344        if newbmtarget:
345            newbmtarget = repo[newbmtarget.first()].node()
346        else:
347            newbmtarget = b'.'
348    return newbmtarget, updatebm
349
350
351def _createstripbackup(repo, stripbases, node, topic):
352    # backup the changeset we are about to strip
353    vfs = repo.vfs
354    cl = repo.changelog
355    backupfile = backupbundle(repo, stripbases, cl.heads(), node, topic)
356    repo.ui.status(_(b"saved backup bundle to %s\n") % vfs.join(backupfile))
357    repo.ui.log(
358        b"backupbundle", b"saved backup bundle to %s\n", vfs.join(backupfile)
359    )
360    return backupfile
361
362
363def safestriproots(ui, repo, nodes):
364    """return list of roots of nodes where descendants are covered by nodes"""
365    torev = repo.unfiltered().changelog.rev
366    revs = {torev(n) for n in nodes}
367    # tostrip = wanted - unsafe = wanted - ancestors(orphaned)
368    # orphaned = affected - wanted
369    # affected = descendants(roots(wanted))
370    # wanted = revs
371    revset = b'%ld - ( ::( (roots(%ld):: and not _phase(%s)) -%ld) )'
372    tostrip = set(repo.revs(revset, revs, revs, phases.internal, revs))
373    notstrip = revs - tostrip
374    if notstrip:
375        nodestr = b', '.join(sorted(short(repo[n].node()) for n in notstrip))
376        ui.warn(
377            _(b'warning: orphaned descendants detected, not stripping %s\n')
378            % nodestr
379        )
380    return [c.node() for c in repo.set(b'roots(%ld)', tostrip)]
381
382
383class stripcallback(object):
384    """used as a transaction postclose callback"""
385
386    def __init__(self, ui, repo, backup, topic):
387        self.ui = ui
388        self.repo = repo
389        self.backup = backup
390        self.topic = topic or b'backup'
391        self.nodelist = []
392
393    def addnodes(self, nodes):
394        self.nodelist.extend(nodes)
395
396    def __call__(self, tr):
397        roots = safestriproots(self.ui, self.repo, self.nodelist)
398        if roots:
399            strip(self.ui, self.repo, roots, self.backup, self.topic)
400
401
402def delayedstrip(ui, repo, nodelist, topic=None, backup=True):
403    """like strip, but works inside transaction and won't strip irreverent revs
404
405    nodelist must explicitly contain all descendants. Otherwise a warning will
406    be printed that some nodes are not stripped.
407
408    Will do a backup if `backup` is True. The last non-None "topic" will be
409    used as the backup topic name. The default backup topic name is "backup".
410    """
411    tr = repo.currenttransaction()
412    if not tr:
413        nodes = safestriproots(ui, repo, nodelist)
414        return strip(ui, repo, nodes, backup=backup, topic=topic)
415    # transaction postclose callbacks are called in alphabet order.
416    # use '\xff' as prefix so we are likely to be called last.
417    callback = tr.getpostclose(b'\xffstrip')
418    if callback is None:
419        callback = stripcallback(ui, repo, backup=backup, topic=topic)
420        tr.addpostclose(b'\xffstrip', callback)
421    if topic:
422        callback.topic = topic
423    callback.addnodes(nodelist)
424
425
426def stripmanifest(repo, striprev, tr, files):
427    for revlog in manifestrevlogs(repo):
428        revlog.strip(striprev, tr)
429
430
431def manifestrevlogs(repo):
432    yield repo.manifestlog.getstorage(b'')
433    if scmutil.istreemanifest(repo):
434        # This logic is safe if treemanifest isn't enabled, but also
435        # pointless, so we skip it if treemanifest isn't enabled.
436        for t, unencoded, size in repo.store.datafiles():
437            if unencoded.startswith(b'meta/') and unencoded.endswith(
438                b'00manifest.i'
439            ):
440                dir = unencoded[5:-12]
441                yield repo.manifestlog.getstorage(dir)
442
443
444def rebuildfncache(ui, repo, only_data=False):
445    """Rebuilds the fncache file from repo history.
446
447    Missing entries will be added. Extra entries will be removed.
448    """
449    repo = repo.unfiltered()
450
451    if requirements.FNCACHE_REQUIREMENT not in repo.requirements:
452        ui.warn(
453            _(
454                b'(not rebuilding fncache because repository does not '
455                b'support fncache)\n'
456            )
457        )
458        return
459
460    with repo.lock():
461        fnc = repo.store.fncache
462        fnc.ensureloaded(warn=ui.warn)
463
464        oldentries = set(fnc.entries)
465        newentries = set()
466        seenfiles = set()
467
468        if only_data:
469            # Trust the listing of .i from the fncache, but not the .d. This is
470            # much faster, because we only need to stat every possible .d files,
471            # instead of reading the full changelog
472            for f in fnc:
473                if f[:5] == b'data/' and f[-2:] == b'.i':
474                    seenfiles.add(f[5:-2])
475                    newentries.add(f)
476                    dataf = f[:-2] + b'.d'
477                    if repo.store._exists(dataf):
478                        newentries.add(dataf)
479        else:
480            progress = ui.makeprogress(
481                _(b'rebuilding'), unit=_(b'changesets'), total=len(repo)
482            )
483            for rev in repo:
484                progress.update(rev)
485
486                ctx = repo[rev]
487                for f in ctx.files():
488                    # This is to minimize I/O.
489                    if f in seenfiles:
490                        continue
491                    seenfiles.add(f)
492
493                    i = b'data/%s.i' % f
494                    d = b'data/%s.d' % f
495
496                    if repo.store._exists(i):
497                        newentries.add(i)
498                    if repo.store._exists(d):
499                        newentries.add(d)
500
501            progress.complete()
502
503        if requirements.TREEMANIFEST_REQUIREMENT in repo.requirements:
504            # This logic is safe if treemanifest isn't enabled, but also
505            # pointless, so we skip it if treemanifest isn't enabled.
506            for dir in pathutil.dirs(seenfiles):
507                i = b'meta/%s/00manifest.i' % dir
508                d = b'meta/%s/00manifest.d' % dir
509
510                if repo.store._exists(i):
511                    newentries.add(i)
512                if repo.store._exists(d):
513                    newentries.add(d)
514
515        addcount = len(newentries - oldentries)
516        removecount = len(oldentries - newentries)
517        for p in sorted(oldentries - newentries):
518            ui.write(_(b'removing %s\n') % p)
519        for p in sorted(newentries - oldentries):
520            ui.write(_(b'adding %s\n') % p)
521
522        if addcount or removecount:
523            ui.write(
524                _(b'%d items added, %d removed from fncache\n')
525                % (addcount, removecount)
526            )
527            fnc.entries = newentries
528            fnc._dirty = True
529
530            with repo.transaction(b'fncache') as tr:
531                fnc.write(tr)
532        else:
533            ui.write(_(b'fncache already up to date\n'))
534
535
536def deleteobsmarkers(obsstore, indices):
537    """Delete some obsmarkers from obsstore and return how many were deleted
538
539    'indices' is a list of ints which are the indices
540    of the markers to be deleted.
541
542    Every invocation of this function completely rewrites the obsstore file,
543    skipping the markers we want to be removed. The new temporary file is
544    created, remaining markers are written there and on .close() this file
545    gets atomically renamed to obsstore, thus guaranteeing consistency."""
546    if not indices:
547        # we don't want to rewrite the obsstore with the same content
548        return
549
550    left = []
551    current = obsstore._all
552    n = 0
553    for i, m in enumerate(current):
554        if i in indices:
555            n += 1
556            continue
557        left.append(m)
558
559    newobsstorefile = obsstore.svfs(b'obsstore', b'w', atomictemp=True)
560    for bytes in obsolete.encodemarkers(left, True, obsstore._version):
561        newobsstorefile.write(bytes)
562    newobsstorefile.close()
563    return n
564