1# filesetlang.py - parser, tokenizer and utility for file set language 2# 3# Copyright 2010 Olivia Mackall <olivia@selenic.com> 4# 5# This software may be used and distributed according to the terms of the 6# GNU General Public License version 2 or any later version. 7 8from __future__ import absolute_import 9 10from .i18n import _ 11from .pycompat import getattr 12from . import ( 13 error, 14 parser, 15 pycompat, 16) 17 18# common weight constants for static optimization 19# (see registrar.filesetpredicate for details) 20WEIGHT_CHECK_FILENAME = 0.5 21WEIGHT_READ_CONTENTS = 30 22WEIGHT_STATUS = 10 23WEIGHT_STATUS_THOROUGH = 50 24 25elements = { 26 # token-type: binding-strength, primary, prefix, infix, suffix 27 b"(": (20, None, (b"group", 1, b")"), (b"func", 1, b")"), None), 28 b":": (15, None, None, (b"kindpat", 15), None), 29 b"-": (5, None, (b"negate", 19), (b"minus", 5), None), 30 b"not": (10, None, (b"not", 10), None, None), 31 b"!": (10, None, (b"not", 10), None, None), 32 b"and": (5, None, None, (b"and", 5), None), 33 b"&": (5, None, None, (b"and", 5), None), 34 b"or": (4, None, None, (b"or", 4), None), 35 b"|": (4, None, None, (b"or", 4), None), 36 b"+": (4, None, None, (b"or", 4), None), 37 b",": (2, None, None, (b"list", 2), None), 38 b")": (0, None, None, None, None), 39 b"symbol": (0, b"symbol", None, None, None), 40 b"string": (0, b"string", None, None, None), 41 b"end": (0, None, None, None, None), 42} 43 44keywords = {b'and', b'or', b'not'} 45 46symbols = {} 47 48globchars = b".*{}[]?/\\_" 49 50 51def tokenize(program): 52 pos, l = 0, len(program) 53 program = pycompat.bytestr(program) 54 while pos < l: 55 c = program[pos] 56 if c.isspace(): # skip inter-token whitespace 57 pass 58 elif c in b"(),-:|&+!": # handle simple operators 59 yield (c, None, pos) 60 elif ( 61 c in b'"\'' 62 or c == b'r' 63 and program[pos : pos + 2] in (b"r'", b'r"') 64 ): # handle quoted strings 65 if c == b'r': 66 pos += 1 67 c = program[pos] 68 decode = lambda x: x 69 else: 70 decode = parser.unescapestr 71 pos += 1 72 s = pos 73 while pos < l: # find closing quote 74 d = program[pos] 75 if d == b'\\': # skip over escaped characters 76 pos += 2 77 continue 78 if d == c: 79 yield (b'string', decode(program[s:pos]), s) 80 break 81 pos += 1 82 else: 83 raise error.ParseError(_(b"unterminated string"), s) 84 elif c.isalnum() or c in globchars or ord(c) > 127: 85 # gather up a symbol/keyword 86 s = pos 87 pos += 1 88 while pos < l: # find end of symbol 89 d = program[pos] 90 if not (d.isalnum() or d in globchars or ord(d) > 127): 91 break 92 pos += 1 93 sym = program[s:pos] 94 if sym in keywords: # operator keywords 95 yield (sym, None, s) 96 else: 97 yield (b'symbol', sym, s) 98 pos -= 1 99 else: 100 raise error.ParseError(_(b"syntax error"), pos) 101 pos += 1 102 yield (b'end', None, pos) 103 104 105def parse(expr): 106 p = parser.parser(elements) 107 tree, pos = p.parse(tokenize(expr)) 108 if pos != len(expr): 109 raise error.ParseError(_(b"invalid token"), pos) 110 return parser.simplifyinfixops(tree, {b'list', b'or'}) 111 112 113def getsymbol(x): 114 if x and x[0] == b'symbol': 115 return x[1] 116 raise error.ParseError(_(b'not a symbol')) 117 118 119def getstring(x, err): 120 if x and (x[0] == b'string' or x[0] == b'symbol'): 121 return x[1] 122 raise error.ParseError(err) 123 124 125def getkindpat(x, y, allkinds, err): 126 kind = getsymbol(x) 127 pat = getstring(y, err) 128 if kind not in allkinds: 129 raise error.ParseError(_(b"invalid pattern kind: %s") % kind) 130 return b'%s:%s' % (kind, pat) 131 132 133def getpattern(x, allkinds, err): 134 if x and x[0] == b'kindpat': 135 return getkindpat(x[1], x[2], allkinds, err) 136 return getstring(x, err) 137 138 139def getlist(x): 140 if not x: 141 return [] 142 if x[0] == b'list': 143 return list(x[1:]) 144 return [x] 145 146 147def getargs(x, min, max, err): 148 l = getlist(x) 149 if len(l) < min or len(l) > max: 150 raise error.ParseError(err) 151 return l 152 153 154def _analyze(x): 155 if x is None: 156 return x 157 158 op = x[0] 159 if op in {b'string', b'symbol'}: 160 return x 161 if op == b'kindpat': 162 getsymbol(x[1]) # kind must be a symbol 163 t = _analyze(x[2]) 164 return (op, x[1], t) 165 if op == b'group': 166 return _analyze(x[1]) 167 if op == b'negate': 168 raise error.ParseError(_(b"can't use negate operator in this context")) 169 if op == b'not': 170 t = _analyze(x[1]) 171 return (op, t) 172 if op == b'and': 173 ta = _analyze(x[1]) 174 tb = _analyze(x[2]) 175 return (op, ta, tb) 176 if op == b'minus': 177 return _analyze((b'and', x[1], (b'not', x[2]))) 178 if op in {b'list', b'or'}: 179 ts = tuple(_analyze(y) for y in x[1:]) 180 return (op,) + ts 181 if op == b'func': 182 getsymbol(x[1]) # function name must be a symbol 183 ta = _analyze(x[2]) 184 return (op, x[1], ta) 185 raise error.ProgrammingError(b'invalid operator %r' % op) 186 187 188def _insertstatushints(x): 189 """Insert hint nodes where status should be calculated (first path) 190 191 This works in bottom-up way, summing up status names and inserting hint 192 nodes at 'and' and 'or' as needed. Thus redundant hint nodes may be left. 193 194 Returns (status-names, new-tree) at the given subtree, where status-names 195 is a sum of status names referenced in the given subtree. 196 """ 197 if x is None: 198 return (), x 199 200 op = x[0] 201 if op in {b'string', b'symbol', b'kindpat'}: 202 return (), x 203 if op == b'not': 204 h, t = _insertstatushints(x[1]) 205 return h, (op, t) 206 if op == b'and': 207 ha, ta = _insertstatushints(x[1]) 208 hb, tb = _insertstatushints(x[2]) 209 hr = ha + hb 210 if ha and hb: 211 return hr, (b'withstatus', (op, ta, tb), (b'string', b' '.join(hr))) 212 return hr, (op, ta, tb) 213 if op == b'or': 214 hs, ts = zip(*(_insertstatushints(y) for y in x[1:])) 215 hr = sum(hs, ()) 216 if sum(bool(h) for h in hs) > 1: 217 return hr, (b'withstatus', (op,) + ts, (b'string', b' '.join(hr))) 218 return hr, (op,) + ts 219 if op == b'list': 220 hs, ts = zip(*(_insertstatushints(y) for y in x[1:])) 221 return sum(hs, ()), (op,) + ts 222 if op == b'func': 223 f = getsymbol(x[1]) 224 # don't propagate 'ha' crossing a function boundary 225 ha, ta = _insertstatushints(x[2]) 226 if getattr(symbols.get(f), '_callstatus', False): 227 return (f,), (b'withstatus', (op, x[1], ta), (b'string', f)) 228 return (), (op, x[1], ta) 229 raise error.ProgrammingError(b'invalid operator %r' % op) 230 231 232def _mergestatushints(x, instatus): 233 """Remove redundant status hint nodes (second path) 234 235 This is the top-down path to eliminate inner hint nodes. 236 """ 237 if x is None: 238 return x 239 240 op = x[0] 241 if op == b'withstatus': 242 if instatus: 243 # drop redundant hint node 244 return _mergestatushints(x[1], instatus) 245 t = _mergestatushints(x[1], instatus=True) 246 return (op, t, x[2]) 247 if op in {b'string', b'symbol', b'kindpat'}: 248 return x 249 if op == b'not': 250 t = _mergestatushints(x[1], instatus) 251 return (op, t) 252 if op == b'and': 253 ta = _mergestatushints(x[1], instatus) 254 tb = _mergestatushints(x[2], instatus) 255 return (op, ta, tb) 256 if op in {b'list', b'or'}: 257 ts = tuple(_mergestatushints(y, instatus) for y in x[1:]) 258 return (op,) + ts 259 if op == b'func': 260 # don't propagate 'instatus' crossing a function boundary 261 ta = _mergestatushints(x[2], instatus=False) 262 return (op, x[1], ta) 263 raise error.ProgrammingError(b'invalid operator %r' % op) 264 265 266def analyze(x): 267 """Transform raw parsed tree to evaluatable tree which can be fed to 268 optimize() or getmatch() 269 270 All pseudo operations should be mapped to real operations or functions 271 defined in methods or symbols table respectively. 272 """ 273 t = _analyze(x) 274 _h, t = _insertstatushints(t) 275 return _mergestatushints(t, instatus=False) 276 277 278def _optimizeandops(op, ta, tb): 279 if tb is not None and tb[0] == b'not': 280 return (b'minus', ta, tb[1]) 281 return (op, ta, tb) 282 283 284def _optimizeunion(xs): 285 # collect string patterns so they can be compiled into a single regexp 286 ws, ts, ss = [], [], [] 287 for x in xs: 288 w, t = _optimize(x) 289 if t is not None and t[0] in {b'string', b'symbol', b'kindpat'}: 290 ss.append(t) 291 continue 292 ws.append(w) 293 ts.append(t) 294 if ss: 295 ws.append(WEIGHT_CHECK_FILENAME) 296 ts.append((b'patterns',) + tuple(ss)) 297 return ws, ts 298 299 300def _optimize(x): 301 if x is None: 302 return 0, x 303 304 op = x[0] 305 if op == b'withstatus': 306 w, t = _optimize(x[1]) 307 return w, (op, t, x[2]) 308 if op in {b'string', b'symbol'}: 309 return WEIGHT_CHECK_FILENAME, x 310 if op == b'kindpat': 311 w, t = _optimize(x[2]) 312 return w, (op, x[1], t) 313 if op == b'not': 314 w, t = _optimize(x[1]) 315 return w, (op, t) 316 if op == b'and': 317 wa, ta = _optimize(x[1]) 318 wb, tb = _optimize(x[2]) 319 if wa <= wb: 320 return wa, _optimizeandops(op, ta, tb) 321 else: 322 return wb, _optimizeandops(op, tb, ta) 323 if op == b'or': 324 ws, ts = _optimizeunion(x[1:]) 325 if len(ts) == 1: 326 return ws[0], ts[0] # 'or' operation is fully optimized out 327 ts = tuple( 328 it[1] for it in sorted(enumerate(ts), key=lambda it: ws[it[0]]) 329 ) 330 return max(ws), (op,) + ts 331 if op == b'list': 332 ws, ts = zip(*(_optimize(y) for y in x[1:])) 333 return sum(ws), (op,) + ts 334 if op == b'func': 335 f = getsymbol(x[1]) 336 w = getattr(symbols.get(f), '_weight', 1) 337 wa, ta = _optimize(x[2]) 338 return w + wa, (op, x[1], ta) 339 raise error.ProgrammingError(b'invalid operator %r' % op) 340 341 342def optimize(x): 343 """Reorder/rewrite evaluatable tree for optimization 344 345 All pseudo operations should be transformed beforehand. 346 """ 347 _w, t = _optimize(x) 348 return t 349 350 351def prettyformat(tree): 352 return parser.prettyformat(tree, (b'string', b'symbol')) 353