1# fileset.py - file set queries for mercurial
2#
3# Copyright 2010 Olivia Mackall <olivia@selenic.com>
4#
5# This software may be used and distributed according to the terms of the
6# GNU General Public License version 2 or any later version.
7
8from __future__ import absolute_import
9
10import errno
11import re
12
13from .i18n import _
14from .pycompat import getattr
15from . import (
16    error,
17    filesetlang,
18    match as matchmod,
19    mergestate as mergestatemod,
20    pycompat,
21    registrar,
22    scmutil,
23    util,
24)
25from .utils import stringutil
26
27# common weight constants
28_WEIGHT_CHECK_FILENAME = filesetlang.WEIGHT_CHECK_FILENAME
29_WEIGHT_READ_CONTENTS = filesetlang.WEIGHT_READ_CONTENTS
30_WEIGHT_STATUS = filesetlang.WEIGHT_STATUS
31_WEIGHT_STATUS_THOROUGH = filesetlang.WEIGHT_STATUS_THOROUGH
32
33# helpers for processing parsed tree
34getsymbol = filesetlang.getsymbol
35getstring = filesetlang.getstring
36_getkindpat = filesetlang.getkindpat
37getpattern = filesetlang.getpattern
38getargs = filesetlang.getargs
39
40
41def getmatch(mctx, x):
42    if not x:
43        raise error.ParseError(_(b"missing argument"))
44    return methods[x[0]](mctx, *x[1:])
45
46
47def getmatchwithstatus(mctx, x, hint):
48    keys = set(getstring(hint, b'status hint must be a string').split())
49    return getmatch(mctx.withstatus(keys), x)
50
51
52def stringmatch(mctx, x):
53    return mctx.matcher([x])
54
55
56def kindpatmatch(mctx, x, y):
57    return stringmatch(
58        mctx,
59        _getkindpat(
60            x, y, matchmod.allpatternkinds, _(b"pattern must be a string")
61        ),
62    )
63
64
65def patternsmatch(mctx, *xs):
66    allkinds = matchmod.allpatternkinds
67    patterns = [
68        getpattern(x, allkinds, _(b"pattern must be a string")) for x in xs
69    ]
70    return mctx.matcher(patterns)
71
72
73def andmatch(mctx, x, y):
74    xm = getmatch(mctx, x)
75    ym = getmatch(mctx.narrowed(xm), y)
76    return matchmod.intersectmatchers(xm, ym)
77
78
79def ormatch(mctx, *xs):
80    ms = [getmatch(mctx, x) for x in xs]
81    return matchmod.unionmatcher(ms)
82
83
84def notmatch(mctx, x):
85    m = getmatch(mctx, x)
86    return mctx.predicate(lambda f: not m(f), predrepr=(b'<not %r>', m))
87
88
89def minusmatch(mctx, x, y):
90    xm = getmatch(mctx, x)
91    ym = getmatch(mctx.narrowed(xm), y)
92    return matchmod.differencematcher(xm, ym)
93
94
95def listmatch(mctx, *xs):
96    raise error.ParseError(
97        _(b"can't use a list in this context"),
98        hint=_(b'see \'hg help "filesets.x or y"\''),
99    )
100
101
102def func(mctx, a, b):
103    funcname = getsymbol(a)
104    if funcname in symbols:
105        return symbols[funcname](mctx, b)
106
107    keep = lambda fn: getattr(fn, '__doc__', None) is not None
108
109    syms = [s for (s, fn) in symbols.items() if keep(fn)]
110    raise error.UnknownIdentifier(funcname, syms)
111
112
113# symbols are callable like:
114#  fun(mctx, x)
115# with:
116#  mctx - current matchctx instance
117#  x - argument in tree form
118symbols = filesetlang.symbols
119
120predicate = registrar.filesetpredicate(symbols)
121
122
123@predicate(b'modified()', callstatus=True, weight=_WEIGHT_STATUS)
124def modified(mctx, x):
125    """File that is modified according to :hg:`status`."""
126    # i18n: "modified" is a keyword
127    getargs(x, 0, 0, _(b"modified takes no arguments"))
128    s = set(mctx.status().modified)
129    return mctx.predicate(s.__contains__, predrepr=b'modified')
130
131
132@predicate(b'added()', callstatus=True, weight=_WEIGHT_STATUS)
133def added(mctx, x):
134    """File that is added according to :hg:`status`."""
135    # i18n: "added" is a keyword
136    getargs(x, 0, 0, _(b"added takes no arguments"))
137    s = set(mctx.status().added)
138    return mctx.predicate(s.__contains__, predrepr=b'added')
139
140
141@predicate(b'removed()', callstatus=True, weight=_WEIGHT_STATUS)
142def removed(mctx, x):
143    """File that is removed according to :hg:`status`."""
144    # i18n: "removed" is a keyword
145    getargs(x, 0, 0, _(b"removed takes no arguments"))
146    s = set(mctx.status().removed)
147    return mctx.predicate(s.__contains__, predrepr=b'removed')
148
149
150@predicate(b'deleted()', callstatus=True, weight=_WEIGHT_STATUS)
151def deleted(mctx, x):
152    """Alias for ``missing()``."""
153    # i18n: "deleted" is a keyword
154    getargs(x, 0, 0, _(b"deleted takes no arguments"))
155    s = set(mctx.status().deleted)
156    return mctx.predicate(s.__contains__, predrepr=b'deleted')
157
158
159@predicate(b'missing()', callstatus=True, weight=_WEIGHT_STATUS)
160def missing(mctx, x):
161    """File that is missing according to :hg:`status`."""
162    # i18n: "missing" is a keyword
163    getargs(x, 0, 0, _(b"missing takes no arguments"))
164    s = set(mctx.status().deleted)
165    return mctx.predicate(s.__contains__, predrepr=b'deleted')
166
167
168@predicate(b'unknown()', callstatus=True, weight=_WEIGHT_STATUS_THOROUGH)
169def unknown(mctx, x):
170    """File that is unknown according to :hg:`status`."""
171    # i18n: "unknown" is a keyword
172    getargs(x, 0, 0, _(b"unknown takes no arguments"))
173    s = set(mctx.status().unknown)
174    return mctx.predicate(s.__contains__, predrepr=b'unknown')
175
176
177@predicate(b'ignored()', callstatus=True, weight=_WEIGHT_STATUS_THOROUGH)
178def ignored(mctx, x):
179    """File that is ignored according to :hg:`status`."""
180    # i18n: "ignored" is a keyword
181    getargs(x, 0, 0, _(b"ignored takes no arguments"))
182    s = set(mctx.status().ignored)
183    return mctx.predicate(s.__contains__, predrepr=b'ignored')
184
185
186@predicate(b'clean()', callstatus=True, weight=_WEIGHT_STATUS)
187def clean(mctx, x):
188    """File that is clean according to :hg:`status`."""
189    # i18n: "clean" is a keyword
190    getargs(x, 0, 0, _(b"clean takes no arguments"))
191    s = set(mctx.status().clean)
192    return mctx.predicate(s.__contains__, predrepr=b'clean')
193
194
195@predicate(b'tracked()')
196def tracked(mctx, x):
197    """File that is under Mercurial control."""
198    # i18n: "tracked" is a keyword
199    getargs(x, 0, 0, _(b"tracked takes no arguments"))
200    return mctx.predicate(mctx.ctx.__contains__, predrepr=b'tracked')
201
202
203@predicate(b'binary()', weight=_WEIGHT_READ_CONTENTS)
204def binary(mctx, x):
205    """File that appears to be binary (contains NUL bytes)."""
206    # i18n: "binary" is a keyword
207    getargs(x, 0, 0, _(b"binary takes no arguments"))
208    return mctx.fpredicate(
209        lambda fctx: fctx.isbinary(), predrepr=b'binary', cache=True
210    )
211
212
213@predicate(b'exec()')
214def exec_(mctx, x):
215    """File that is marked as executable."""
216    # i18n: "exec" is a keyword
217    getargs(x, 0, 0, _(b"exec takes no arguments"))
218    ctx = mctx.ctx
219    return mctx.predicate(lambda f: ctx.flags(f) == b'x', predrepr=b'exec')
220
221
222@predicate(b'symlink()')
223def symlink(mctx, x):
224    """File that is marked as a symlink."""
225    # i18n: "symlink" is a keyword
226    getargs(x, 0, 0, _(b"symlink takes no arguments"))
227    ctx = mctx.ctx
228    return mctx.predicate(lambda f: ctx.flags(f) == b'l', predrepr=b'symlink')
229
230
231@predicate(b'resolved()', weight=_WEIGHT_STATUS)
232def resolved(mctx, x):
233    """File that is marked resolved according to :hg:`resolve -l`."""
234    # i18n: "resolved" is a keyword
235    getargs(x, 0, 0, _(b"resolved takes no arguments"))
236    if mctx.ctx.rev() is not None:
237        return mctx.never()
238    ms = mergestatemod.mergestate.read(mctx.ctx.repo())
239    return mctx.predicate(
240        lambda f: f in ms and ms[f] == b'r', predrepr=b'resolved'
241    )
242
243
244@predicate(b'unresolved()', weight=_WEIGHT_STATUS)
245def unresolved(mctx, x):
246    """File that is marked unresolved according to :hg:`resolve -l`."""
247    # i18n: "unresolved" is a keyword
248    getargs(x, 0, 0, _(b"unresolved takes no arguments"))
249    if mctx.ctx.rev() is not None:
250        return mctx.never()
251    ms = mergestatemod.mergestate.read(mctx.ctx.repo())
252    return mctx.predicate(
253        lambda f: f in ms and ms[f] == b'u', predrepr=b'unresolved'
254    )
255
256
257@predicate(b'hgignore()', weight=_WEIGHT_STATUS)
258def hgignore(mctx, x):
259    """File that matches the active .hgignore pattern."""
260    # i18n: "hgignore" is a keyword
261    getargs(x, 0, 0, _(b"hgignore takes no arguments"))
262    return mctx.ctx.repo().dirstate._ignore
263
264
265@predicate(b'portable()', weight=_WEIGHT_CHECK_FILENAME)
266def portable(mctx, x):
267    """File that has a portable name. (This doesn't include filenames with case
268    collisions.)
269    """
270    # i18n: "portable" is a keyword
271    getargs(x, 0, 0, _(b"portable takes no arguments"))
272    return mctx.predicate(
273        lambda f: util.checkwinfilename(f) is None, predrepr=b'portable'
274    )
275
276
277@predicate(b'grep(regex)', weight=_WEIGHT_READ_CONTENTS)
278def grep(mctx, x):
279    """File contains the given regular expression."""
280    try:
281        # i18n: "grep" is a keyword
282        r = re.compile(getstring(x, _(b"grep requires a pattern")))
283    except re.error as e:
284        raise error.ParseError(
285            _(b'invalid match pattern: %s') % stringutil.forcebytestr(e)
286        )
287    return mctx.fpredicate(
288        lambda fctx: r.search(fctx.data()),
289        predrepr=(b'grep(%r)', r.pattern),
290        cache=True,
291    )
292
293
294def _sizetomax(s):
295    try:
296        s = s.strip().lower()
297        for k, v in util._sizeunits:
298            if s.endswith(k):
299                # max(4k) = 5k - 1, max(4.5k) = 4.6k - 1
300                n = s[: -len(k)]
301                inc = 1.0
302                if b"." in n:
303                    inc /= 10 ** len(n.split(b".")[1])
304                return int((float(n) + inc) * v) - 1
305        # no extension, this is a precise value
306        return int(s)
307    except ValueError:
308        raise error.ParseError(_(b"couldn't parse size: %s") % s)
309
310
311def sizematcher(expr):
312    """Return a function(size) -> bool from the ``size()`` expression"""
313    expr = expr.strip()
314    if b'-' in expr:  # do we have a range?
315        a, b = expr.split(b'-', 1)
316        a = util.sizetoint(a)
317        b = util.sizetoint(b)
318        return lambda x: x >= a and x <= b
319    elif expr.startswith(b"<="):
320        a = util.sizetoint(expr[2:])
321        return lambda x: x <= a
322    elif expr.startswith(b"<"):
323        a = util.sizetoint(expr[1:])
324        return lambda x: x < a
325    elif expr.startswith(b">="):
326        a = util.sizetoint(expr[2:])
327        return lambda x: x >= a
328    elif expr.startswith(b">"):
329        a = util.sizetoint(expr[1:])
330        return lambda x: x > a
331    else:
332        a = util.sizetoint(expr)
333        b = _sizetomax(expr)
334        return lambda x: x >= a and x <= b
335
336
337@predicate(b'size(expression)', weight=_WEIGHT_STATUS)
338def size(mctx, x):
339    """File size matches the given expression. Examples:
340
341    - size('1k') - files from 1024 to 2047 bytes
342    - size('< 20k') - files less than 20480 bytes
343    - size('>= .5MB') - files at least 524288 bytes
344    - size('4k - 1MB') - files from 4096 bytes to 1048576 bytes
345    """
346    # i18n: "size" is a keyword
347    expr = getstring(x, _(b"size requires an expression"))
348    m = sizematcher(expr)
349    return mctx.fpredicate(
350        lambda fctx: m(fctx.size()), predrepr=(b'size(%r)', expr), cache=True
351    )
352
353
354@predicate(b'encoding(name)', weight=_WEIGHT_READ_CONTENTS)
355def encoding(mctx, x):
356    """File can be successfully decoded with the given character
357    encoding. May not be useful for encodings other than ASCII and
358    UTF-8.
359    """
360
361    # i18n: "encoding" is a keyword
362    enc = getstring(x, _(b"encoding requires an encoding name"))
363
364    def encp(fctx):
365        d = fctx.data()
366        try:
367            d.decode(pycompat.sysstr(enc))
368            return True
369        except LookupError:
370            raise error.Abort(_(b"unknown encoding '%s'") % enc)
371        except UnicodeDecodeError:
372            return False
373
374    return mctx.fpredicate(encp, predrepr=(b'encoding(%r)', enc), cache=True)
375
376
377@predicate(b'eol(style)', weight=_WEIGHT_READ_CONTENTS)
378def eol(mctx, x):
379    """File contains newlines of the given style (dos, unix, mac). Binary
380    files are excluded, files with mixed line endings match multiple
381    styles.
382    """
383
384    # i18n: "eol" is a keyword
385    enc = getstring(x, _(b"eol requires a style name"))
386
387    def eolp(fctx):
388        if fctx.isbinary():
389            return False
390        d = fctx.data()
391        if (enc == b'dos' or enc == b'win') and b'\r\n' in d:
392            return True
393        elif enc == b'unix' and re.search(b'(?<!\r)\n', d):
394            return True
395        elif enc == b'mac' and re.search(b'\r(?!\n)', d):
396            return True
397        return False
398
399    return mctx.fpredicate(eolp, predrepr=(b'eol(%r)', enc), cache=True)
400
401
402@predicate(b'copied()')
403def copied(mctx, x):
404    """File that is recorded as being copied."""
405    # i18n: "copied" is a keyword
406    getargs(x, 0, 0, _(b"copied takes no arguments"))
407
408    def copiedp(fctx):
409        p = fctx.parents()
410        return p and p[0].path() != fctx.path()
411
412    return mctx.fpredicate(copiedp, predrepr=b'copied', cache=True)
413
414
415@predicate(b'revs(revs, pattern)', weight=_WEIGHT_STATUS)
416def revs(mctx, x):
417    """Evaluate set in the specified revisions. If the revset match multiple
418    revs, this will return file matching pattern in any of the revision.
419    """
420    # i18n: "revs" is a keyword
421    r, x = getargs(x, 2, 2, _(b"revs takes two arguments"))
422    # i18n: "revs" is a keyword
423    revspec = getstring(r, _(b"first argument to revs must be a revision"))
424    repo = mctx.ctx.repo()
425    revs = scmutil.revrange(repo, [revspec])
426
427    matchers = []
428    for r in revs:
429        ctx = repo[r]
430        mc = mctx.switch(ctx.p1(), ctx)
431        matchers.append(getmatch(mc, x))
432    if not matchers:
433        return mctx.never()
434    if len(matchers) == 1:
435        return matchers[0]
436    return matchmod.unionmatcher(matchers)
437
438
439@predicate(b'status(base, rev, pattern)', weight=_WEIGHT_STATUS)
440def status(mctx, x):
441    """Evaluate predicate using status change between ``base`` and
442    ``rev``. Examples:
443
444    - ``status(3, 7, added())`` - matches files added from "3" to "7"
445    """
446    repo = mctx.ctx.repo()
447    # i18n: "status" is a keyword
448    b, r, x = getargs(x, 3, 3, _(b"status takes three arguments"))
449    # i18n: "status" is a keyword
450    baseerr = _(b"first argument to status must be a revision")
451    baserevspec = getstring(b, baseerr)
452    if not baserevspec:
453        raise error.ParseError(baseerr)
454    reverr = _(b"second argument to status must be a revision")
455    revspec = getstring(r, reverr)
456    if not revspec:
457        raise error.ParseError(reverr)
458    basectx, ctx = scmutil.revpair(repo, [baserevspec, revspec])
459    mc = mctx.switch(basectx, ctx)
460    return getmatch(mc, x)
461
462
463@predicate(b'subrepo([pattern])')
464def subrepo(mctx, x):
465    """Subrepositories whose paths match the given pattern."""
466    # i18n: "subrepo" is a keyword
467    getargs(x, 0, 1, _(b"subrepo takes at most one argument"))
468    ctx = mctx.ctx
469    sstate = ctx.substate
470    if x:
471        pat = getpattern(
472            x,
473            matchmod.allpatternkinds,
474            # i18n: "subrepo" is a keyword
475            _(b"subrepo requires a pattern or no arguments"),
476        )
477        fast = not matchmod.patkind(pat)
478        if fast:
479
480            def m(s):
481                return s == pat
482
483        else:
484            m = matchmod.match(ctx.repo().root, b'', [pat], ctx=ctx)
485        return mctx.predicate(
486            lambda f: f in sstate and m(f), predrepr=(b'subrepo(%r)', pat)
487        )
488    else:
489        return mctx.predicate(sstate.__contains__, predrepr=b'subrepo')
490
491
492methods = {
493    b'withstatus': getmatchwithstatus,
494    b'string': stringmatch,
495    b'symbol': stringmatch,
496    b'kindpat': kindpatmatch,
497    b'patterns': patternsmatch,
498    b'and': andmatch,
499    b'or': ormatch,
500    b'minus': minusmatch,
501    b'list': listmatch,
502    b'not': notmatch,
503    b'func': func,
504}
505
506
507class matchctx(object):
508    def __init__(self, basectx, ctx, cwd, badfn=None):
509        self._basectx = basectx
510        self.ctx = ctx
511        self._badfn = badfn
512        self._match = None
513        self._status = None
514        self.cwd = cwd
515
516    def narrowed(self, match):
517        """Create matchctx for a sub-tree narrowed by the given matcher"""
518        mctx = matchctx(self._basectx, self.ctx, self.cwd, self._badfn)
519        mctx._match = match
520        # leave wider status which we don't have to care
521        mctx._status = self._status
522        return mctx
523
524    def switch(self, basectx, ctx):
525        mctx = matchctx(basectx, ctx, self.cwd, self._badfn)
526        mctx._match = self._match
527        return mctx
528
529    def withstatus(self, keys):
530        """Create matchctx which has precomputed status specified by the keys"""
531        mctx = matchctx(self._basectx, self.ctx, self.cwd, self._badfn)
532        mctx._match = self._match
533        mctx._buildstatus(keys)
534        return mctx
535
536    def _buildstatus(self, keys):
537        self._status = self._basectx.status(
538            self.ctx,
539            self._match,
540            listignored=b'ignored' in keys,
541            listclean=b'clean' in keys,
542            listunknown=b'unknown' in keys,
543        )
544
545    def status(self):
546        return self._status
547
548    def matcher(self, patterns):
549        return self.ctx.match(patterns, badfn=self._badfn, cwd=self.cwd)
550
551    def predicate(self, predfn, predrepr=None, cache=False):
552        """Create a matcher to select files by predfn(filename)"""
553        if cache:
554            predfn = util.cachefunc(predfn)
555        return matchmod.predicatematcher(
556            predfn, predrepr=predrepr, badfn=self._badfn
557        )
558
559    def fpredicate(self, predfn, predrepr=None, cache=False):
560        """Create a matcher to select files by predfn(fctx) at the current
561        revision
562
563        Missing files are ignored.
564        """
565        ctx = self.ctx
566        if ctx.rev() is None:
567
568            def fctxpredfn(f):
569                try:
570                    fctx = ctx[f]
571                except error.LookupError:
572                    return False
573                try:
574                    fctx.audit()
575                except error.Abort:
576                    return False
577                try:
578                    return predfn(fctx)
579                except (IOError, OSError) as e:
580                    # open()-ing a directory fails with EACCES on Windows
581                    if e.errno in (
582                        errno.ENOENT,
583                        errno.EACCES,
584                        errno.ENOTDIR,
585                        errno.EISDIR,
586                    ):
587                        return False
588                    raise
589
590        else:
591
592            def fctxpredfn(f):
593                try:
594                    fctx = ctx[f]
595                except error.LookupError:
596                    return False
597                return predfn(fctx)
598
599        return self.predicate(fctxpredfn, predrepr=predrepr, cache=cache)
600
601    def never(self):
602        """Create a matcher to select nothing"""
603        return matchmod.never(badfn=self._badfn)
604
605
606def match(ctx, cwd, expr, badfn=None):
607    """Create a matcher for a single fileset expression"""
608    tree = filesetlang.parse(expr)
609    tree = filesetlang.analyze(tree)
610    tree = filesetlang.optimize(tree)
611    mctx = matchctx(ctx.p1(), ctx, cwd, badfn=badfn)
612    return getmatch(mctx, tree)
613
614
615def loadpredicate(ui, extname, registrarobj):
616    """Load fileset predicates from specified registrarobj"""
617    for name, func in pycompat.iteritems(registrarobj._table):
618        symbols[name] = func
619
620
621# tell hggettext to extract docstrings from these functions:
622i18nfunctions = symbols.values()
623