1# filesetlang.py - parser, tokenizer and utility for file set language
2#
3# Copyright 2010 Olivia Mackall <olivia@selenic.com>
4#
5# This software may be used and distributed according to the terms of the
6# GNU General Public License version 2 or any later version.
7
8from __future__ import absolute_import
9
10from .i18n import _
11from .pycompat import getattr
12from . import (
13    error,
14    parser,
15    pycompat,
16)
17
18# common weight constants for static optimization
19# (see registrar.filesetpredicate for details)
20WEIGHT_CHECK_FILENAME = 0.5
21WEIGHT_READ_CONTENTS = 30
22WEIGHT_STATUS = 10
23WEIGHT_STATUS_THOROUGH = 50
24
25elements = {
26    # token-type: binding-strength, primary, prefix, infix, suffix
27    b"(": (20, None, (b"group", 1, b")"), (b"func", 1, b")"), None),
28    b":": (15, None, None, (b"kindpat", 15), None),
29    b"-": (5, None, (b"negate", 19), (b"minus", 5), None),
30    b"not": (10, None, (b"not", 10), None, None),
31    b"!": (10, None, (b"not", 10), None, None),
32    b"and": (5, None, None, (b"and", 5), None),
33    b"&": (5, None, None, (b"and", 5), None),
34    b"or": (4, None, None, (b"or", 4), None),
35    b"|": (4, None, None, (b"or", 4), None),
36    b"+": (4, None, None, (b"or", 4), None),
37    b",": (2, None, None, (b"list", 2), None),
38    b")": (0, None, None, None, None),
39    b"symbol": (0, b"symbol", None, None, None),
40    b"string": (0, b"string", None, None, None),
41    b"end": (0, None, None, None, None),
42}
43
44keywords = {b'and', b'or', b'not'}
45
46symbols = {}
47
48globchars = b".*{}[]?/\\_"
49
50
51def tokenize(program):
52    pos, l = 0, len(program)
53    program = pycompat.bytestr(program)
54    while pos < l:
55        c = program[pos]
56        if c.isspace():  # skip inter-token whitespace
57            pass
58        elif c in b"(),-:|&+!":  # handle simple operators
59            yield (c, None, pos)
60        elif (
61            c in b'"\''
62            or c == b'r'
63            and program[pos : pos + 2] in (b"r'", b'r"')
64        ):  # handle quoted strings
65            if c == b'r':
66                pos += 1
67                c = program[pos]
68                decode = lambda x: x
69            else:
70                decode = parser.unescapestr
71            pos += 1
72            s = pos
73            while pos < l:  # find closing quote
74                d = program[pos]
75                if d == b'\\':  # skip over escaped characters
76                    pos += 2
77                    continue
78                if d == c:
79                    yield (b'string', decode(program[s:pos]), s)
80                    break
81                pos += 1
82            else:
83                raise error.ParseError(_(b"unterminated string"), s)
84        elif c.isalnum() or c in globchars or ord(c) > 127:
85            # gather up a symbol/keyword
86            s = pos
87            pos += 1
88            while pos < l:  # find end of symbol
89                d = program[pos]
90                if not (d.isalnum() or d in globchars or ord(d) > 127):
91                    break
92                pos += 1
93            sym = program[s:pos]
94            if sym in keywords:  # operator keywords
95                yield (sym, None, s)
96            else:
97                yield (b'symbol', sym, s)
98            pos -= 1
99        else:
100            raise error.ParseError(_(b"syntax error"), pos)
101        pos += 1
102    yield (b'end', None, pos)
103
104
105def parse(expr):
106    p = parser.parser(elements)
107    tree, pos = p.parse(tokenize(expr))
108    if pos != len(expr):
109        raise error.ParseError(_(b"invalid token"), pos)
110    return parser.simplifyinfixops(tree, {b'list', b'or'})
111
112
113def getsymbol(x):
114    if x and x[0] == b'symbol':
115        return x[1]
116    raise error.ParseError(_(b'not a symbol'))
117
118
119def getstring(x, err):
120    if x and (x[0] == b'string' or x[0] == b'symbol'):
121        return x[1]
122    raise error.ParseError(err)
123
124
125def getkindpat(x, y, allkinds, err):
126    kind = getsymbol(x)
127    pat = getstring(y, err)
128    if kind not in allkinds:
129        raise error.ParseError(_(b"invalid pattern kind: %s") % kind)
130    return b'%s:%s' % (kind, pat)
131
132
133def getpattern(x, allkinds, err):
134    if x and x[0] == b'kindpat':
135        return getkindpat(x[1], x[2], allkinds, err)
136    return getstring(x, err)
137
138
139def getlist(x):
140    if not x:
141        return []
142    if x[0] == b'list':
143        return list(x[1:])
144    return [x]
145
146
147def getargs(x, min, max, err):
148    l = getlist(x)
149    if len(l) < min or len(l) > max:
150        raise error.ParseError(err)
151    return l
152
153
154def _analyze(x):
155    if x is None:
156        return x
157
158    op = x[0]
159    if op in {b'string', b'symbol'}:
160        return x
161    if op == b'kindpat':
162        getsymbol(x[1])  # kind must be a symbol
163        t = _analyze(x[2])
164        return (op, x[1], t)
165    if op == b'group':
166        return _analyze(x[1])
167    if op == b'negate':
168        raise error.ParseError(_(b"can't use negate operator in this context"))
169    if op == b'not':
170        t = _analyze(x[1])
171        return (op, t)
172    if op == b'and':
173        ta = _analyze(x[1])
174        tb = _analyze(x[2])
175        return (op, ta, tb)
176    if op == b'minus':
177        return _analyze((b'and', x[1], (b'not', x[2])))
178    if op in {b'list', b'or'}:
179        ts = tuple(_analyze(y) for y in x[1:])
180        return (op,) + ts
181    if op == b'func':
182        getsymbol(x[1])  # function name must be a symbol
183        ta = _analyze(x[2])
184        return (op, x[1], ta)
185    raise error.ProgrammingError(b'invalid operator %r' % op)
186
187
188def _insertstatushints(x):
189    """Insert hint nodes where status should be calculated (first path)
190
191    This works in bottom-up way, summing up status names and inserting hint
192    nodes at 'and' and 'or' as needed. Thus redundant hint nodes may be left.
193
194    Returns (status-names, new-tree) at the given subtree, where status-names
195    is a sum of status names referenced in the given subtree.
196    """
197    if x is None:
198        return (), x
199
200    op = x[0]
201    if op in {b'string', b'symbol', b'kindpat'}:
202        return (), x
203    if op == b'not':
204        h, t = _insertstatushints(x[1])
205        return h, (op, t)
206    if op == b'and':
207        ha, ta = _insertstatushints(x[1])
208        hb, tb = _insertstatushints(x[2])
209        hr = ha + hb
210        if ha and hb:
211            return hr, (b'withstatus', (op, ta, tb), (b'string', b' '.join(hr)))
212        return hr, (op, ta, tb)
213    if op == b'or':
214        hs, ts = zip(*(_insertstatushints(y) for y in x[1:]))
215        hr = sum(hs, ())
216        if sum(bool(h) for h in hs) > 1:
217            return hr, (b'withstatus', (op,) + ts, (b'string', b' '.join(hr)))
218        return hr, (op,) + ts
219    if op == b'list':
220        hs, ts = zip(*(_insertstatushints(y) for y in x[1:]))
221        return sum(hs, ()), (op,) + ts
222    if op == b'func':
223        f = getsymbol(x[1])
224        # don't propagate 'ha' crossing a function boundary
225        ha, ta = _insertstatushints(x[2])
226        if getattr(symbols.get(f), '_callstatus', False):
227            return (f,), (b'withstatus', (op, x[1], ta), (b'string', f))
228        return (), (op, x[1], ta)
229    raise error.ProgrammingError(b'invalid operator %r' % op)
230
231
232def _mergestatushints(x, instatus):
233    """Remove redundant status hint nodes (second path)
234
235    This is the top-down path to eliminate inner hint nodes.
236    """
237    if x is None:
238        return x
239
240    op = x[0]
241    if op == b'withstatus':
242        if instatus:
243            # drop redundant hint node
244            return _mergestatushints(x[1], instatus)
245        t = _mergestatushints(x[1], instatus=True)
246        return (op, t, x[2])
247    if op in {b'string', b'symbol', b'kindpat'}:
248        return x
249    if op == b'not':
250        t = _mergestatushints(x[1], instatus)
251        return (op, t)
252    if op == b'and':
253        ta = _mergestatushints(x[1], instatus)
254        tb = _mergestatushints(x[2], instatus)
255        return (op, ta, tb)
256    if op in {b'list', b'or'}:
257        ts = tuple(_mergestatushints(y, instatus) for y in x[1:])
258        return (op,) + ts
259    if op == b'func':
260        # don't propagate 'instatus' crossing a function boundary
261        ta = _mergestatushints(x[2], instatus=False)
262        return (op, x[1], ta)
263    raise error.ProgrammingError(b'invalid operator %r' % op)
264
265
266def analyze(x):
267    """Transform raw parsed tree to evaluatable tree which can be fed to
268    optimize() or getmatch()
269
270    All pseudo operations should be mapped to real operations or functions
271    defined in methods or symbols table respectively.
272    """
273    t = _analyze(x)
274    _h, t = _insertstatushints(t)
275    return _mergestatushints(t, instatus=False)
276
277
278def _optimizeandops(op, ta, tb):
279    if tb is not None and tb[0] == b'not':
280        return (b'minus', ta, tb[1])
281    return (op, ta, tb)
282
283
284def _optimizeunion(xs):
285    # collect string patterns so they can be compiled into a single regexp
286    ws, ts, ss = [], [], []
287    for x in xs:
288        w, t = _optimize(x)
289        if t is not None and t[0] in {b'string', b'symbol', b'kindpat'}:
290            ss.append(t)
291            continue
292        ws.append(w)
293        ts.append(t)
294    if ss:
295        ws.append(WEIGHT_CHECK_FILENAME)
296        ts.append((b'patterns',) + tuple(ss))
297    return ws, ts
298
299
300def _optimize(x):
301    if x is None:
302        return 0, x
303
304    op = x[0]
305    if op == b'withstatus':
306        w, t = _optimize(x[1])
307        return w, (op, t, x[2])
308    if op in {b'string', b'symbol'}:
309        return WEIGHT_CHECK_FILENAME, x
310    if op == b'kindpat':
311        w, t = _optimize(x[2])
312        return w, (op, x[1], t)
313    if op == b'not':
314        w, t = _optimize(x[1])
315        return w, (op, t)
316    if op == b'and':
317        wa, ta = _optimize(x[1])
318        wb, tb = _optimize(x[2])
319        if wa <= wb:
320            return wa, _optimizeandops(op, ta, tb)
321        else:
322            return wb, _optimizeandops(op, tb, ta)
323    if op == b'or':
324        ws, ts = _optimizeunion(x[1:])
325        if len(ts) == 1:
326            return ws[0], ts[0]  # 'or' operation is fully optimized out
327        ts = tuple(
328            it[1] for it in sorted(enumerate(ts), key=lambda it: ws[it[0]])
329        )
330        return max(ws), (op,) + ts
331    if op == b'list':
332        ws, ts = zip(*(_optimize(y) for y in x[1:]))
333        return sum(ws), (op,) + ts
334    if op == b'func':
335        f = getsymbol(x[1])
336        w = getattr(symbols.get(f), '_weight', 1)
337        wa, ta = _optimize(x[2])
338        return w + wa, (op, x[1], ta)
339    raise error.ProgrammingError(b'invalid operator %r' % op)
340
341
342def optimize(x):
343    """Reorder/rewrite evaluatable tree for optimization
344
345    All pseudo operations should be transformed beforehand.
346    """
347    _w, t = _optimize(x)
348    return t
349
350
351def prettyformat(tree):
352    return parser.prettyformat(tree, (b'string', b'symbol'))
353