1# Copyright 2007 Bryan O'Sullivan <bos@serpentine.com>
2# Copyright 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
3#
4# This software may be used and distributed according to the terms of the
5# GNU General Public License version 2 or any later version.
6
7from __future__ import absolute_import, print_function
8
9import posixpath
10
11from mercurial.i18n import _
12from mercurial import (
13    error,
14    pycompat,
15)
16from . import common
17
18SKIPREV = common.SKIPREV
19
20
21def rpairs(path):
22    """Yield tuples with path split at '/', starting with the full path.
23    No leading, trailing or double '/', please.
24    >>> for x in rpairs(b'foo/bar/baz'): print(x)
25    ('foo/bar/baz', '')
26    ('foo/bar', 'baz')
27    ('foo', 'bar/baz')
28    ('.', 'foo/bar/baz')
29    """
30    i = len(path)
31    while i != -1:
32        yield path[:i], path[i + 1 :]
33        i = path.rfind(b'/', 0, i)
34    yield b'.', path
35
36
37def normalize(path):
38    """We use posixpath.normpath to support cross-platform path format.
39    However, it doesn't handle None input. So we wrap it up."""
40    if path is None:
41        return None
42    return posixpath.normpath(path)
43
44
45class filemapper(object):
46    """Map and filter filenames when importing.
47    A name can be mapped to itself, a new name, or None (omit from new
48    repository)."""
49
50    def __init__(self, ui, path=None):
51        self.ui = ui
52        self.include = {}
53        self.exclude = {}
54        self.rename = {}
55        self.targetprefixes = None
56        if path:
57            if self.parse(path):
58                raise error.Abort(_(b'errors in filemap'))
59
60    def parse(self, path):
61        errs = 0
62
63        def check(name, mapping, listname):
64            if not name:
65                self.ui.warn(
66                    _(b'%s:%d: path to %s is missing\n')
67                    % (lex.infile, lex.lineno, listname)
68                )
69                return 1
70            if name in mapping:
71                self.ui.warn(
72                    _(b'%s:%d: %r already in %s list\n')
73                    % (lex.infile, lex.lineno, name, listname)
74                )
75                return 1
76            if name.startswith(b'/') or name.endswith(b'/') or b'//' in name:
77                self.ui.warn(
78                    _(b'%s:%d: superfluous / in %s %r\n')
79                    % (lex.infile, lex.lineno, listname, pycompat.bytestr(name))
80                )
81                return 1
82            return 0
83
84        lex = common.shlexer(
85            filepath=path, wordchars=b'!@#$%^&*()-=+[]{}|;:,./<>?'
86        )
87        cmd = lex.get_token()
88        while cmd:
89            if cmd == b'include':
90                name = normalize(lex.get_token())
91                errs += check(name, self.exclude, b'exclude')
92                self.include[name] = name
93            elif cmd == b'exclude':
94                name = normalize(lex.get_token())
95                errs += check(name, self.include, b'include')
96                errs += check(name, self.rename, b'rename')
97                self.exclude[name] = name
98            elif cmd == b'rename':
99                src = normalize(lex.get_token())
100                dest = normalize(lex.get_token())
101                errs += check(src, self.exclude, b'exclude')
102                self.rename[src] = dest
103            elif cmd == b'source':
104                errs += self.parse(normalize(lex.get_token()))
105            else:
106                self.ui.warn(
107                    _(b'%s:%d: unknown directive %r\n')
108                    % (lex.infile, lex.lineno, pycompat.bytestr(cmd))
109                )
110                errs += 1
111            cmd = lex.get_token()
112        return errs
113
114    def lookup(self, name, mapping):
115        name = normalize(name)
116        for pre, suf in rpairs(name):
117            try:
118                return mapping[pre], pre, suf
119            except KeyError:
120                pass
121        return b'', name, b''
122
123    def istargetfile(self, filename):
124        """Return true if the given target filename is covered as a destination
125        of the filemap. This is useful for identifying what parts of the target
126        repo belong to the source repo and what parts don't."""
127        if self.targetprefixes is None:
128            self.targetprefixes = set()
129            for before, after in pycompat.iteritems(self.rename):
130                self.targetprefixes.add(after)
131
132        # If "." is a target, then all target files are considered from the
133        # source.
134        if not self.targetprefixes or b'.' in self.targetprefixes:
135            return True
136
137        filename = normalize(filename)
138        for pre, suf in rpairs(filename):
139            # This check is imperfect since it doesn't account for the
140            # include/exclude list, but it should work in filemaps that don't
141            # apply include/exclude to the same source directories they are
142            # renaming.
143            if pre in self.targetprefixes:
144                return True
145        return False
146
147    def __call__(self, name):
148        if self.include:
149            inc = self.lookup(name, self.include)[0]
150        else:
151            inc = name
152        if self.exclude:
153            exc = self.lookup(name, self.exclude)[0]
154        else:
155            exc = b''
156        if (not self.include and exc) or (len(inc) <= len(exc)):
157            return None
158        newpre, pre, suf = self.lookup(name, self.rename)
159        if newpre:
160            if newpre == b'.':
161                return suf
162            if suf:
163                if newpre.endswith(b'/'):
164                    return newpre + suf
165                return newpre + b'/' + suf
166            return newpre
167        return name
168
169    def active(self):
170        return bool(self.include or self.exclude or self.rename)
171
172
173# This class does two additional things compared to a regular source:
174#
175# - Filter and rename files.  This is mostly wrapped by the filemapper
176#   class above. We hide the original filename in the revision that is
177#   returned by getchanges to be able to find things later in getfile.
178#
179# - Return only revisions that matter for the files we're interested in.
180#   This involves rewriting the parents of the original revision to
181#   create a graph that is restricted to those revisions.
182#
183#   This set of revisions includes not only revisions that directly
184#   touch files we're interested in, but also merges that merge two
185#   or more interesting revisions.
186
187
188class filemap_source(common.converter_source):
189    def __init__(self, ui, baseconverter, filemap):
190        super(filemap_source, self).__init__(ui, baseconverter.repotype)
191        self.base = baseconverter
192        self.filemapper = filemapper(ui, filemap)
193        self.commits = {}
194        # if a revision rev has parent p in the original revision graph, then
195        # rev will have parent self.parentmap[p] in the restricted graph.
196        self.parentmap = {}
197        # self.wantedancestors[rev] is the set of all ancestors of rev that
198        # are in the restricted graph.
199        self.wantedancestors = {}
200        self.convertedorder = None
201        self._rebuilt = False
202        self.origparents = {}
203        self.children = {}
204        self.seenchildren = {}
205        # experimental config: convert.ignoreancestorcheck
206        self.ignoreancestorcheck = self.ui.configbool(
207            b'convert', b'ignoreancestorcheck'
208        )
209
210    def before(self):
211        self.base.before()
212
213    def after(self):
214        self.base.after()
215
216    def setrevmap(self, revmap):
217        # rebuild our state to make things restartable
218        #
219        # To avoid calling getcommit for every revision that has already
220        # been converted, we rebuild only the parentmap, delaying the
221        # rebuild of wantedancestors until we need it (i.e. until a
222        # merge).
223        #
224        # We assume the order argument lists the revisions in
225        # topological order, so that we can infer which revisions were
226        # wanted by previous runs.
227        self._rebuilt = not revmap
228        seen = {SKIPREV: SKIPREV}
229        dummyset = set()
230        converted = []
231        for rev in revmap.order:
232            mapped = revmap[rev]
233            wanted = mapped not in seen
234            if wanted:
235                seen[mapped] = rev
236                self.parentmap[rev] = rev
237            else:
238                self.parentmap[rev] = seen[mapped]
239            self.wantedancestors[rev] = dummyset
240            arg = seen[mapped]
241            if arg == SKIPREV:
242                arg = None
243            converted.append((rev, wanted, arg))
244        self.convertedorder = converted
245        return self.base.setrevmap(revmap)
246
247    def rebuild(self):
248        if self._rebuilt:
249            return True
250        self._rebuilt = True
251        self.parentmap.clear()
252        self.wantedancestors.clear()
253        self.seenchildren.clear()
254        for rev, wanted, arg in self.convertedorder:
255            if rev not in self.origparents:
256                try:
257                    self.origparents[rev] = self.getcommit(rev).parents
258                except error.RepoLookupError:
259                    self.ui.debug(b"unknown revmap source: %s\n" % rev)
260                    continue
261            if arg is not None:
262                self.children[arg] = self.children.get(arg, 0) + 1
263
264        for rev, wanted, arg in self.convertedorder:
265            try:
266                parents = self.origparents[rev]
267            except KeyError:
268                continue  # unknown revmap source
269            if wanted:
270                self.mark_wanted(rev, parents)
271            else:
272                self.mark_not_wanted(rev, arg)
273            self._discard(arg, *parents)
274
275        return True
276
277    def getheads(self):
278        return self.base.getheads()
279
280    def getcommit(self, rev):
281        # We want to save a reference to the commit objects to be able
282        # to rewrite their parents later on.
283        c = self.commits[rev] = self.base.getcommit(rev)
284        for p in c.parents:
285            self.children[p] = self.children.get(p, 0) + 1
286        return c
287
288    def numcommits(self):
289        return self.base.numcommits()
290
291    def _cachedcommit(self, rev):
292        if rev in self.commits:
293            return self.commits[rev]
294        return self.base.getcommit(rev)
295
296    def _discard(self, *revs):
297        for r in revs:
298            if r is None:
299                continue
300            self.seenchildren[r] = self.seenchildren.get(r, 0) + 1
301            if self.seenchildren[r] == self.children[r]:
302                self.wantedancestors.pop(r, None)
303                self.parentmap.pop(r, None)
304                del self.seenchildren[r]
305                if self._rebuilt:
306                    del self.children[r]
307
308    def wanted(self, rev, i):
309        # Return True if we're directly interested in rev.
310        #
311        # i is an index selecting one of the parents of rev (if rev
312        # has no parents, i is None).  getchangedfiles will give us
313        # the list of files that are different in rev and in the parent
314        # indicated by i.  If we're interested in any of these files,
315        # we're interested in rev.
316        try:
317            files = self.base.getchangedfiles(rev, i)
318        except NotImplementedError:
319            raise error.Abort(_(b"source repository doesn't support --filemap"))
320        for f in files:
321            if self.filemapper(f):
322                return True
323
324        # The include directive is documented to include nothing else (though
325        # valid branch closes are included).
326        if self.filemapper.include:
327            return False
328
329        # Allow empty commits in the source revision through.  The getchanges()
330        # method doesn't even bother calling this if it determines that the
331        # close marker is significant (i.e. all of the branch ancestors weren't
332        # eliminated).  Therefore if there *is* a close marker, getchanges()
333        # doesn't consider it significant, and this revision should be dropped.
334        return not files and b'close' not in self.commits[rev].extra
335
336    def mark_not_wanted(self, rev, p):
337        # Mark rev as not interesting and update data structures.
338
339        if p is None:
340            # A root revision. Use SKIPREV to indicate that it doesn't
341            # map to any revision in the restricted graph.  Put SKIPREV
342            # in the set of wanted ancestors to simplify code elsewhere
343            self.parentmap[rev] = SKIPREV
344            self.wantedancestors[rev] = {SKIPREV}
345            return
346
347        # Reuse the data from our parent.
348        self.parentmap[rev] = self.parentmap[p]
349        self.wantedancestors[rev] = self.wantedancestors[p]
350
351    def mark_wanted(self, rev, parents):
352        # Mark rev ss wanted and update data structures.
353
354        # rev will be in the restricted graph, so children of rev in
355        # the original graph should still have rev as a parent in the
356        # restricted graph.
357        self.parentmap[rev] = rev
358
359        # The set of wanted ancestors of rev is the union of the sets
360        # of wanted ancestors of its parents. Plus rev itself.
361        wrev = set()
362        for p in parents:
363            if p in self.wantedancestors:
364                wrev.update(self.wantedancestors[p])
365            else:
366                self.ui.warn(
367                    _(b'warning: %s parent %s is missing\n') % (rev, p)
368                )
369        wrev.add(rev)
370        self.wantedancestors[rev] = wrev
371
372    def getchanges(self, rev, full):
373        parents = self.commits[rev].parents
374        if len(parents) > 1 and not self.ignoreancestorcheck:
375            self.rebuild()
376
377        # To decide whether we're interested in rev we:
378        #
379        # - calculate what parents rev will have if it turns out we're
380        #   interested in it.  If it's going to have more than 1 parent,
381        #   we're interested in it.
382        #
383        # - otherwise, we'll compare it with the single parent we found.
384        #   If any of the files we're interested in is different in the
385        #   the two revisions, we're interested in rev.
386
387        # A parent p is interesting if its mapped version (self.parentmap[p]):
388        # - is not SKIPREV
389        # - is still not in the list of parents (we don't want duplicates)
390        # - is not an ancestor of the mapped versions of the other parents or
391        #   there is no parent in the same branch than the current revision.
392        mparents = []
393        knownparents = set()
394        branch = self.commits[rev].branch
395        hasbranchparent = False
396        for i, p1 in enumerate(parents):
397            mp1 = self.parentmap[p1]
398            if mp1 == SKIPREV or mp1 in knownparents:
399                continue
400
401            isancestor = not self.ignoreancestorcheck and any(
402                p2
403                for p2 in parents
404                if p1 != p2
405                and mp1 != self.parentmap[p2]
406                and mp1 in self.wantedancestors[p2]
407            )
408            if not isancestor and not hasbranchparent and len(parents) > 1:
409                # This could be expensive, avoid unnecessary calls.
410                if self._cachedcommit(p1).branch == branch:
411                    hasbranchparent = True
412            mparents.append((p1, mp1, i, isancestor))
413            knownparents.add(mp1)
414        # Discard parents ancestors of other parents if there is a
415        # non-ancestor one on the same branch than current revision.
416        if hasbranchparent:
417            mparents = [p for p in mparents if not p[3]]
418        wp = None
419        if mparents:
420            wp = max(p[2] for p in mparents)
421            mparents = [p[1] for p in mparents]
422        elif parents:
423            wp = 0
424
425        self.origparents[rev] = parents
426
427        closed = False
428        if b'close' in self.commits[rev].extra:
429            # A branch closing revision is only useful if one of its
430            # parents belong to the branch being closed
431            pbranches = [self._cachedcommit(p).branch for p in mparents]
432            if branch in pbranches:
433                closed = True
434
435        if len(mparents) < 2 and not closed and not self.wanted(rev, wp):
436            # We don't want this revision.
437            # Update our state and tell the convert process to map this
438            # revision to the same revision its parent as mapped to.
439            p = None
440            if parents:
441                p = parents[wp]
442            self.mark_not_wanted(rev, p)
443            self.convertedorder.append((rev, False, p))
444            self._discard(*parents)
445            return self.parentmap[rev]
446
447        # We want this revision.
448        # Rewrite the parents of the commit object
449        self.commits[rev].parents = mparents
450        self.mark_wanted(rev, parents)
451        self.convertedorder.append((rev, True, None))
452        self._discard(*parents)
453
454        # Get the real changes and do the filtering/mapping. To be
455        # able to get the files later on in getfile, we hide the
456        # original filename in the rev part of the return value.
457        changes, copies, cleanp2 = self.base.getchanges(rev, full)
458        files = {}
459        ncleanp2 = set(cleanp2)
460        for f, r in changes:
461            newf = self.filemapper(f)
462            if newf and (newf != f or newf not in files):
463                files[newf] = (f, r)
464                if newf != f:
465                    ncleanp2.discard(f)
466        files = sorted(files.items())
467
468        ncopies = {}
469        for c in copies:
470            newc = self.filemapper(c)
471            if newc:
472                newsource = self.filemapper(copies[c])
473                if newsource:
474                    ncopies[newc] = newsource
475
476        return files, ncopies, ncleanp2
477
478    def targetfilebelongstosource(self, targetfilename):
479        return self.filemapper.istargetfile(targetfilename)
480
481    def getfile(self, name, rev):
482        realname, realrev = rev
483        return self.base.getfile(realname, realrev)
484
485    def gettags(self):
486        return self.base.gettags()
487
488    def hasnativeorder(self):
489        return self.base.hasnativeorder()
490
491    def lookuprev(self, rev):
492        return self.base.lookuprev(rev)
493
494    def getbookmarks(self):
495        return self.base.getbookmarks()
496
497    def converted(self, rev, sinkrev):
498        self.base.converted(rev, sinkrev)
499