1# Copyright (C) 2005-2009, Parrot Foundation. 2 3.namespace [ "PGE";"P5Regex" ] 4 5.sub "compile_p5regex" 6 .param pmc source 7 .param pmc adverbs :slurpy :named 8 9 $I0 = exists adverbs['grammar'] 10 if $I0 goto have_grammar 11 adverbs['grammar'] = 'PGE::Grammar' 12 have_grammar: 13 14 .local string target 15 target = adverbs['target'] 16 target = downcase target 17 18 ## If we're passed the results of a previous parse, use it. 19 .local pmc match 20 $I0 = isa source, ['PGE';'Match'] 21 if $I0 == 0 goto parse 22 $P0 = source['expr'] 23 if null $P0 goto parse 24 $I0 = isa $P0, ['PGE';'Exp'] 25 if $I0 == 0 goto parse 26 match = source 27 goto analyze 28 29 parse: 30 $P0 = get_global "p5regex" 31 match = $P0(source, adverbs :flat :named) 32 if target != 'parse' goto check 33 .return (match) 34 35 check: 36 unless match goto check_1 37 $S0 = source 38 $S1 = match 39 if $S0 == $S1 goto analyze 40 check_1: 41 null $P0 42 .return ($P0) 43 44 analyze: 45 .local pmc exp, pad 46 exp = match['expr'] 47 pad = new 'Hash' 48 pad['subpats'] = 0 49 exp = exp.'p5analyze'(pad) 50 .tailcall exp.'compile'(adverbs :flat :named) 51.end 52 53 54.sub "p5regex" 55 .param pmc mob 56 .param pmc adverbs :slurpy :named 57 58 .local string stop, tighter 59 .local pmc stopstack, optable 60 61 stopstack = get_hll_global ['PGE';'P5Regex'], '@!stopstack' 62 optable = get_hll_global ["PGE";"P5Regex"], "$optable" 63 64 stop = adverbs['stop'] 65 tighter = adverbs['tighter'] 66 push stopstack, stop 67 $P0 = optable."parse"(mob, 'stop'=>stop, 'tighter'=>tighter) 68 $S0 = pop stopstack 69 70 .return ($P0) 71.end 72 73 74.include "cclass.pasm" 75 76.const int PGE_INF = 2147483647 77 78.sub "__onload" :load 79 .local pmc optable 80 81 optable = new ['PGE';'OPTable'] 82 set_hll_global ["PGE";"P5Regex"], "$optable", optable 83 84 $P0 = get_hll_global ["PGE";"P5Regex"], "parse_lit" 85 optable.'newtok'('term:', 'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0) 86 87 optable.'newtok'('term:\b', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 88 optable.'newtok'('term:\B', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 89 optable.'newtok'('term:^', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 90 optable.'newtok'('term:$', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 91 92 optable.'newtok'('term:\d', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut') 93 optable.'newtok'('term:\D', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut') 94 optable.'newtok'('term:\s', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut') 95 optable.'newtok'('term:\S', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut') 96 optable.'newtok'('term:\w', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut') 97 optable.'newtok'('term:\W', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut') 98 99 optable.'newtok'('circumfix:( )', 'equiv'=>'term:', 'nows'=>1, 'nullterm'=>1, 'match'=>'PGE::Exp::CGroup') 100 optable.'newtok'('circumfix:(?: )', 'equiv'=>'term:', 'nows'=>1, 'nullterm'=>1, 'match'=>'PGE::Exp::Group') 101 102 $P0 = get_hll_global ['PGE';'P5Regex'], 'parse_enumclass' 103 optable.'newtok'('term:[', 'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0) 104 optable.'newtok'('term:.', 'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0) 105 106 $P0 = get_hll_global ['PGE';'P5Regex'], 'parse_quant' 107 optable.'newtok'('postfix:*', 'looser'=>'term:', 'left'=>1, 'nows'=>1, 'parsed'=>$P0) 108 optable.'newtok'('postfix:+', 'equiv'=>'postfix:*', 'left'=>1, 'nows'=>1, 'parsed'=>$P0) 109 optable.'newtok'('postfix:?', 'equiv'=>'postfix:*', 'left'=>1, 'nows'=>1, 'parsed'=>$P0) 110 optable.'newtok'('postfix:{', 'equiv'=>'postfix:*', 'left'=>1, 'nows'=>1, 'parsed'=>$P0) 111 112 optable.'newtok'('infix:', 'looser'=>'postfix:*', 'right'=>1, 'nows'=>1, 'match'=>'PGE::Exp::Concat') 113 optable.'newtok'('infix:|', 'looser'=>'infix:', 'left'=>1, 'nows'=>1, 'match'=>'PGE::Exp::Alt') 114 115 optable.'newtok'('close:}', 'looser'=>'infix:|', 'nows'=>1) # XXX: hack 116 117 # Create a stack for holding stop tokens 118 $P0 = new 'ResizablePMCArray' 119 set_hll_global ['PGE';'P5Regex'], '@!stopstack', $P0 120 121 $P0 = get_hll_global ["PGE";"P5Regex"], "compile_p5regex" 122 compreg "PGE::P5Regex", $P0 123.end 124 125 126.sub 'parse_error' 127 .param pmc mob 128 .param int pos 129 .param string message 130 $P0 = getattribute mob, '$.pos' 131 $P0 = pos 132 $P0 = new 'Exception' 133 $S0 = 'p5regex parse error: ' 134 $S0 .= message 135 $S0 .= ' at offset ' 136 $S1 = pos 137 $S0 .= $S1 138 $S0 .= ", found '" 139 $P1 = getattribute mob, '$.target' 140 $S1 = $P1 141 $S1 = substr $S1, pos, 1 142 $S0 .= $S1 143 $S0 .= "'" 144 $P0 = $S0 145 throw $P0 146 .return () 147.end 148 149 150.sub "parse_lit" 151 .param pmc mob 152 .param pmc adverbs :slurpy :named 153 .local string target 154 .local int pos, lastpos 155 .local int litstart, litlen 156 .local string initchar 157 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal') 158 lastpos = length target 159 initchar = substr target, pos, 1 160 unless initchar == '*' goto initchar_ok 161 parse_error(mob, pos, "Quantifier follows nothing") 162 163 initchar_ok: 164 if initchar == ')' goto end 165 inc pos 166 if initchar != "\\" goto term_literal 167 term_backslash: 168 initchar = substr target, pos, 1 169 inc pos 170 if pos <= lastpos goto term_backslash_ok 171 parse_error(mob, pos, "Search pattern not terminated") 172 term_backslash_ok: 173 $I0 = index "nrteab", initchar 174 if $I0 < 0 goto term_literal 175 initchar = substr "\n\r\t\e\a\b", $I0, 1 176 term_literal: 177 litstart = pos 178 litlen = 0 179 .local string stop 180 .local int stoplen 181 $P0 = get_hll_global ['PGE';'P5Regex'], '@!stopstack' 182 stop = $P0[-1] 183 stoplen = length stop 184 term_literal_loop: 185 if pos >= lastpos goto term_literal_end 186 if stoplen == 0 goto not_stop 187 $S0 = substr target, pos, stoplen 188 if $S0 == stop goto term_literal_end 189 not_stop: 190 $S0 = substr target, pos, 1 191 $I0 = index "[](){}*?+\\|^$.", $S0 192 # if not in circumfix:( ) throw error on end paren 193 if $I0 >= 0 goto term_literal_end 194 inc pos 195 inc litlen 196 goto term_literal_loop 197 term_literal_end: 198 if litlen < 1 goto term_literal_one 199 dec pos 200 term_literal_one: 201 $I0 = pos - litstart 202 $S0 = substr target, litstart, $I0 203 $S0 = concat initchar, $S0 204 mob.'!make'($S0) 205 goto end 206 end: 207 mob.'to'(pos) 208 .return (mob) 209.end 210 211.sub "parse_quant" 212 .param pmc mob 213 .param pmc adverbs :slurpy :named 214 .local string target 215 .local int min, max, backtrack 216 .local int pos, lastpos 217 .local string key 218 key = mob['KEY'] 219 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Quant') 220 lastpos = length target 221 min = 0 222 max = PGE_INF 223 backtrack = 0 224 if key == '{' goto quant_range 225 if key != '+' goto quant_max 226 min = 1 227 quant_max: 228 if key != "?" goto quant_lazy 229 max = 1 230 goto quant_lazy 231 quant_range: 232 $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos 233 if $I1 <= pos goto quant_range_max 234 $S0 = substr target, pos 235 min = $S0 236 max = $S0 237 pos = $I1 238 quant_range_max: 239 $S0 = substr target, pos, 1 240 if $S0 != "," goto quant_range_end 241 inc pos 242 max = PGE_INF 243 $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos 244 if $I1 <= pos goto quant_range_end 245 $S0 = substr target, pos 246 max = $S0 247 pos = $I1 248 quant_range_end: 249 $S0 = substr target, pos, 1 250 if $S0 != "}" goto err_range 251 inc pos 252 quant_lazy: 253 $S0 = substr target, pos, 1 254 if $S0 != "?" goto end 255 backtrack = PGE_BACKTRACK_EAGER 256 inc pos 257 end: 258 mob["min"] = min 259 mob["max"] = max 260 mob["backtrack"] = backtrack 261 mob.'to'(pos) 262 .return (mob) 263 err_range: 264 parse_error(mob, pos, "Error in quantified range") 265.end 266 267 268.sub parse_group 269 .param pmc mob 270 .param pmc adverbs :slurpy :named 271 .local string target 272 .local int pos, lastpos 273 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::CGroup') 274 inc pos 275 $S0 = substr target, pos, 2 276 if $S0 == "?:" goto nocapture 277 goto end 278 nocapture: 279 pos += 2 280 end: 281 mob.'to'(pos) 282 .return (mob) 283.end 284 285.sub "parse_enumclass" 286 .param pmc mob 287 .param pmc adverbs :slurpy :named 288 .local string target 289 .local int pos, lastpos 290 .local int isrange 291 .local string charlist 292 .local string key 293 key = mob['KEY'] 294 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::EnumCharList') 295 if key == '.' goto dot 296 lastpos = length target 297 charlist = "" 298 mob["isnegated"] = 0 299 isrange = 0 300 $S0 = substr target, pos, 1 301 if $S0 != "^" goto scan_first 302 mob["isnegated"] = 1 303 inc pos 304 scan_first: 305 if pos >= lastpos goto err_close 306 $S0 = substr target, pos, 1 307 inc pos 308 if $S0 == "\\" goto backslash 309 goto addchar 310 scan: 311 if pos >= lastpos goto err_close 312 $S0 = substr target, pos, 1 313 inc pos 314 if $S0 == "]" goto endclass 315 if $S0 == "-" goto hyphenrange 316 if $S0 != "\\" goto addchar 317 backslash: 318 $S0 = substr target, pos, 1 319 inc pos 320 $I0 = index "nrtfae0b", $S0 321 if $I0 == -1 goto addchar 322 $S0 = substr "\n\r\t\f\a\e\0\b", $I0, 1 323 addchar: 324 if isrange goto addrange 325 charlist .= $S0 326 goto scan 327 addrange: 328 isrange = 0 329 $I2 = ord charlist, -1 330 $I0 = ord $S0 331 if $I0 < $I2 goto err_range 332 addrange_1: 333 inc $I2 334 if $I2 > $I0 goto scan 335 $S1 = chr $I2 336 charlist .= $S1 337 goto addrange_1 338 hyphenrange: 339 if isrange goto addrange 340 isrange = 1 341 goto scan 342 endclass: 343 if isrange == 0 goto end 344 charlist .= "-" 345 goto end 346 dot: 347 charlist = "\n" 348 mob["isnegated"] = 1 349 end: 350 mob.'to'(pos) 351 mob.'!make'(charlist) 352 .return (mob) 353 354 err_close: 355 parse_error(mob, pos, "Unmatched [") 356 err_range: 357 $S0 = 'Invalid [] range "' 358 $S1 = chr $I2 359 $S0 .= $S1 360 $S0 .= '-' 361 $S1 = chr $I0 362 $S0 .= $S1 363 $S0 .= '"' 364 parse_error(mob, pos, $S0) 365.end 366 367 368.namespace [ "PGE";"Exp" ] 369 370.sub "p5analyze" :method 371 .param pmc pad 372 .local pmc exp 373 $I0 = 0 374 loop: 375 $I1 = defined self[$I0] 376 if $I1 == 0 goto end 377 $P0 = self[$I0] 378 $P0 = $P0."p5analyze"(pad) 379 self[$I0] = $P0 380 inc $I0 381 goto loop 382 end: 383 .return (self) 384.end 385 386.namespace [ "PGE";"Exp";"CGroup" ] 387 388.sub "p5analyze" :method 389 .param pmc pad 390 .local pmc exp 391 392 self["iscapture"] = 0 393 if self != "(" goto end 394 self["iscapture"] = 1 395 self["isscope"] = 0 396 self["isarray"] = 0 397 $I0 = pad["subpats"] 398 self["cname"] = $I0 399 inc $I0 400 pad["subpats"] = $I0 401 end: 402 exp = self[0] 403 exp = exp."p5analyze"(pad) 404 self[0] = exp 405 .return (self) 406.end 407 408 409 410# Local Variables: 411# mode: pir 412# fill-column: 100 413# End: 414# vim: expandtab shiftwidth=4 ft=pir: 415