1# Copyright (C) 2006-2009, Parrot Foundation. 2 3=head1 DESCRIPTION 4 5Perl6Regex - compiler and parser for Perl 6 regex 6 7=over 4 8 9=item C<compile_perl6regex(PMC source, PMC adverbs :slurpy :named)> 10 11Return the result of compiling C<source> according to Perl 6 12regex syntax and the associated C<adverbs>. Normally this 13function is obtained using C<compreg 'PGE::Perl6Regex'> instead 14of calling it directly. 15 16Returns the compiled regular expression. If a C<target> 17named parameter is supplied, then it will return the parse tree 18(target='parse'), the expression tree (target='exp'), 19or the resulting PIR code (target='PIR'). 20 21=cut 22 23.namespace [ 'PGE';'Perl6Regex' ] 24 25.sub 'compile_perl6regex' 26 .param pmc source 27 .param pmc args :slurpy 28 .param pmc adverbs :slurpy :named 29 30 unless null adverbs goto set_adverbs 31 adverbs = new 'Hash' 32 33 set_adverbs: 34 $I0 = exists adverbs['grammar'] 35 if $I0 goto with_grammar 36 unless args goto adverb_grammar_1 37 $S0 = shift args 38 adverbs['grammar'] = $S0 39 goto with_grammar 40 adverb_grammar_1: 41 adverbs['grammar'] = 'PGE::Grammar' 42 with_grammar: 43 $I0 = exists adverbs['name'] 44 if $I0 goto with_name 45 unless args goto with_name 46 $S0 = shift args 47 adverbs['name'] = $S0 48 with_name: 49 $I0 = exists adverbs['lang'] 50 if $I0 goto with_lang 51 adverbs['lang'] = 'PIR' 52 with_lang: 53 $I0 = exists adverbs['ignorecase'] 54 if $I0 goto with_ignorecase 55 $I0 = adverbs['i'] 56 adverbs['ignorecase'] = $I0 57 with_ignorecase: 58 $I0 = exists adverbs['sigspace'] 59 if $I0 goto with_sigspace 60 $I0 = exists adverbs['s'] 61 if $I0 goto with_s 62 $I0 = exists adverbs['words'] 63 if $I0 goto with_words 64 $I0 = adverbs['w'] 65 adverbs['sigspace'] = $I0 66 goto with_sigspace 67 with_s: 68 $I0 = adverbs['s'] 69 adverbs['sigspace'] = $I0 70 goto with_sigspace 71 with_words: 72 $I0 = adverbs['words'] 73 adverbs['sigspace'] = $I0 74 with_sigspace: 75 76 .local string target 77 target = adverbs['target'] 78 target = downcase target 79 80 ## If we're passed the results of a previous parse, use it. 81 .local pmc match, exp 82 $I0 = isa source, ['PGE';'Match'] 83 if $I0 == 0 goto parse 84 $P0 = source['expr'] 85 if null $P0 goto parse 86 $I0 = isa $P0, ['PGE';'Exp'] 87 if $I0 == 0 goto parse 88 match = source 89 goto analyze 90 91 parse: 92 ## Let's parse the source as a regex 93 $P0 = get_global 'regex' 94 match = $P0(source, adverbs :flat :named) 95 if source == '' goto err_null 96 if target != 'parse' goto check 97 .return (match) 98 99 check: 100 unless match goto check_1 101 $S0 = source 102 $S1 = match 103 if $S0 == $S1 goto analyze 104 check_1: 105 null $P0 106 .return ($P0) 107 108 analyze: 109 .local pmc pad 110 exp = match['expr'] 111 pad = clone adverbs 112 $P0 = new 'Hash' 113 pad['lexscope'] = $P0 114 exp = exp.'perl6exp'(pad) 115 if null exp goto err_null 116 .tailcall exp.'compile'(adverbs :flat :named) 117 118 err_null: 119 $I0 = match.'from'() 120 'parse_error'(match, $I0, 'Null pattern illegal') 121.end 122 123 124=item C<regex(PMC mob, PMC adverbs :slurpy :named)> 125 126Parses a regex according to Perl 6 regex syntax, and returns 127the corresponding parse tree. 128 129=cut 130 131.sub 'regex' 132 .param pmc mob 133 .param pmc adverbs :slurpy :named 134 135 .local string stop, tighter 136 .local pmc stopstack, optable, match 137 138 stopstack = get_global '@!stopstack' 139 optable = get_global '$optable' 140 141 stop = adverbs['stop'] 142 tighter = adverbs['tighter'] 143 push stopstack, stop 144 match = optable.'parse'(mob, 'stop'=>stop, 'tighter'=>tighter) 145 $S0 = pop stopstack 146 147 .return (match) 148.end 149 150=item C<p6escapes> 151 152Parse and calculate various Perl 6 string escapes, such as \n, \r, 153\x, \o, and \c. For the latter escapes, also handle the bracketed 154forms and other special forms. 155 156Note that this function is used directly by PCT::Grammar and Rakudo, 157and someday may be refactored to a different location. 158 159=cut 160 161.sub 'trim' 162 .param string s 163 .local int rpos, lpos 164 rpos = length s 165 lpos = find_not_cclass .CCLASS_WHITESPACE, s, 0, rpos 166 rtrim_loop: 167 unless rpos > lpos goto rtrim_done 168 dec rpos 169 $I0 = is_cclass .CCLASS_WHITESPACE, s, rpos 170 if $I0 goto rtrim_loop 171 rtrim_done: 172 inc rpos 173 $I0 = rpos - lpos 174 $S0 = substr s, lpos, $I0 175 .return ($S0) 176.end 177 178 179.sub 'p6escapes' 180 .param pmc mob 181 .param pmc adverbs :slurpy :named 182 .local string target, backchar, literal 183 .local int pos, lastpos 184 $P0 = get_hll_global ['PGE'], '$!MATCH' 185 (mob, pos, target) = $P0.'new'(mob, adverbs :flat :named) 186 lastpos = length target 187 if pos >= lastpos goto fail 188 $S0 = substr target, pos, 1 189 inc pos 190 if $S0 != "\\" goto fail 191 if pos >= lastpos goto fail 192 backchar = substr target, pos, 1 193 inc pos 194 backchar = downcase backchar 195 $I0 = index "\\0abefnrtxco", backchar 196 if $I0 < 0 goto fail 197 if $I0 >= 9 goto scan_xco 198 literal = substr "\\\0\a\b\e\f\n\r\t", $I0, 1 199 goto succeed 200 scan_xco: 201 ## Handle \x, \c, and \o escapes. Start by converting 202 ## backchar into the appropriate radix, then loop through 203 ## the characters that follow to compute the decimal value 204 ## of codepoints, and concatenate the codepoints into a 205 ## literal. 206 .local int base, decnum, isbracketed 207 base = index ' o c x', backchar 208 literal = '' 209 $S0 = substr target, pos, 1 210 isbracketed = iseq $S0, '[' 211 pos += isbracketed 212 ## Handle the case of \cC (control escape). 213 if base != 10 goto scan_xco_char 214 if isbracketed goto scan_xco_char 215 $I0 = is_cclass .CCLASS_NUMERIC, $S0, 0 216 if $I0 goto scan_xco_char 217 ## xor the 64 bit 218 $I0 = ord $S0 219 bxor $I0, 64 220 literal = chr $I0 221 inc pos 222 goto succeed 223 scan_xco_char: 224 decnum = 0 225 # inside brackets, skip leading ws 226 unless isbracketed goto scan_xco_char_ws 227 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos 228 scan_xco_char_ws: 229 if base != 10 goto scan_xco_char_digits 230 unless isbracketed goto scan_xco_char_digits 231 $I0 = is_cclass .CCLASS_NUMERIC, target, pos 232 if $I0 goto scan_xco_char_digits 233 ## look up character by name 234 .local int namepos 235 namepos = index target, ']', pos 236 if namepos < 0 goto err_missing_bracket 237 $I0 = index target, ',', pos 238 if $I0 < 0 goto have_namepos 239 if namepos < $I0 goto have_namepos 240 namepos = $I0 241 have_namepos: 242 $I0 = namepos - pos 243 $S0 = substr target, pos, $I0 244 $S0 = 'trim'($S0) 245 decnum = find_codepoint $S0 246 if decnum < 0 goto err_unicode_name 247 pos = namepos 248 goto scan_xco_char_end 249 scan_xco_char_digits: 250 $S0 = substr target, pos, 1 251 $I0 = index "0123456789abcdef0123456789ABCDEF", $S0 252 if $I0 < 0 goto scan_xco_char_end 253 $I0 %= 16 254 if $I0 >= base goto scan_xco_char_end 255 decnum *= base 256 decnum += $I0 257 inc pos 258 goto scan_xco_char_digits 259 scan_xco_char_end: 260 $S1 = chr decnum 261 literal = concat literal, $S1 262 unless isbracketed goto scan_xco_end 263 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos 264 $S0 = substr target, pos, 1 265 if $S0 == ']' goto scan_xco_end 266 if $S0 == '' goto err_missing_bracket 267 if $S0 != ',' goto err_digit 268 inc pos 269 goto scan_xco_char 270 scan_xco_end: 271 pos += isbracketed 272 succeed: 273 mob.'!make'(literal) 274 mob.'to'(pos) 275 fail: 276 .return (mob) 277 278 err_unicode_name: 279 $S0 = concat "Unrecognized character name ", $S0 280 'parse_error'(mob, pos, $S0) 281 err_missing_bracket: 282 'parse_error'(mob, pos, "Missing close bracket for \\x[...], \\o[...], or \\c[...]") 283 err_digit: 284 'parse_error'(mob, pos, "Invalid digit in \\x[...], \\o[...], or \\c[...]") 285.end 286 287 288=item C<onload()> 289 290Initializes the Perl6Regex parser and other data structures 291needed for compiling regexes. 292 293=cut 294 295.include 'cclass.pasm' 296 297.namespace [ 'PGE';'Perl6Regex' ] 298 299.sub '__onload' :load 300 .local pmc p6meta 301 p6meta = new 'P6metaclass' 302 p6meta.'new_class'('PGE::Exp::WS', 'parent'=>'PGE::Exp::Subrule') 303 p6meta.'new_class'('PGE::Exp::Alias', 'parent'=>'PGE::Exp') 304 305 .local pmc optable 306 optable = new ['PGE';'OPTable'] 307 set_global '$optable', optable 308 309 $P0 = get_global 'parse_term' 310 optable.'newtok'('term:', 'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0) 311 312 $P0 = get_global 'parse_term_ws' 313 optable.'newtok'('term:#', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 314 315 $P0 = get_global 'parse_term_backslash' 316 optable.'newtok'("term:\\", 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 317 318 optable.'newtok'('term:^', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 319 optable.'newtok'('term:^^', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 320 optable.'newtok'('term:$$', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 321 optable.'newtok'('term:\b', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 322 optable.'newtok'('term:\B', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 323 optable.'newtok'('term:<<', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 324 optable.'newtok'('term:>>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 325 optable.'newtok'('term:<?>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 326 optable.'newtok'('term:<!>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 327 optable.'newtok'(unicode:"term:\xab", 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 328 optable.'newtok'(unicode:"term:\xbb", 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor') 329 330 optable.'newtok'('term:.', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut') 331 optable.'newtok'('term:\d', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut') 332 optable.'newtok'('term:\D', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut') 333 optable.'newtok'('term:\s', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut') 334 optable.'newtok'('term:\S', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut') 335 optable.'newtok'('term:\w', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut') 336 optable.'newtok'('term:\W', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut') 337 optable.'newtok'('term:\N', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut') 338 optable.'newtok'('term:\n', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Newline') 339 340 $P0 = get_global 'parse_dollar' 341 optable.'newtok'('term:$', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 342 343 $P0 = get_global 'parse_subrule' 344 optable.'newtok'('term:<', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 345 optable.'newtok'('term:<?', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 346 optable.'newtok'('term:<!', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 347 optable.'newtok'('term:<.', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 348 349 $P0 = get_global 'parse_enumcharclass' 350 optable.'newtok'('term:<[', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 351 optable.'newtok'('term:<+', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 352 optable.'newtok'('term:<-', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 353 optable.'newtok'('term:<![', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 354 355 $P0 = get_global 'parse_quoted_literal' 356 optable.'newtok'("term:'", 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 357 358 $P0 = get_global 'parse_goal' 359 optable.'newtok'('term:~', 'equiv'=>'term:', 'parsed'=>$P0) 360 361 optable.'newtok'('term:::', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut') 362 optable.'newtok'('term::::', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut') 363 optable.'newtok'('term:<cut>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut') 364 optable.'newtok'('term:<commit>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut') 365 366 $P0 = get_global 'parse_closure' 367 optable.'newtok'("term:{{", 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 368 optable.'newtok'("term:<?{{", 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 369 optable.'newtok'("term:<!{{", 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 370 371 $P0 = get_global 'parse_action' 372 optable.'newtok'("term:{*}", 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0) 373 374 375 optable.'newtok'('circumfix:[ ]', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Group') 376 optable.'newtok'('circumfix:( )', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CGroup') 377 378 $P0 = get_global 'parse_quant' 379 optable.'newtok'('postfix:*', 'looser'=>'term:', 'parsed'=>$P0) 380 optable.'newtok'('postfix:+', 'equiv'=>'postfix:*', 'parsed'=>$P0) 381 optable.'newtok'('postfix:?', 'equiv'=>'postfix:*', 'parsed'=>$P0) 382 optable.'newtok'('postfix::', 'equiv'=>'postfix:*', 'parsed'=>$P0) 383 optable.'newtok'('postfix:**', 'equiv'=>'postfix:*', 'parsed'=>$P0) 384 $P0 = get_global 'parse_quant_error' 385 optable.'newtok'('term:*', 'equiv'=>'term:', 'parsed'=>$P0) 386 optable.'newtok'('term:+', 'equiv'=>'term:', 'parsed'=>$P0) 387 optable.'newtok'('term:?', 'equiv'=>'term:', 'parsed'=>$P0) 388 389 optable.'newtok'('infix:', 'looser'=>'postfix:*', 'assoc'=>'list', 'nows'=>1, 'match'=>'PGE::Exp::Concat') 390 optable.'newtok'('infix:&', 'looser'=>'infix:', 'nows'=>1, 'match'=>'PGE::Exp::Conj') 391 optable.'newtok'('infix:|', 'looser'=>'infix:&', 'nows'=>1, 'match'=>'PGE::Exp::Alt') 392 optable.'newtok'('prefix:|', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt') 393 optable.'newtok'('infix:||', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt') 394 optable.'newtok'('prefix:||', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt') 395 396 optable.'newtok'('infix::=', 'tighter'=>'infix:', 'assoc'=>'right', 'match'=>'PGE::Exp::Alias') 397 optable.'newtok'('infix:=', 'tighter'=>'infix:', 'assoc'=>'right', 'match'=>'PGE::Exp::Alias') 398 399 $P0 = get_global 'parse_modifier' 400 optable.'newtok'('prefix::', 'looser'=>'infix:|', 'parsed'=>$P0) 401 402 optable.'newtok'('close:}', 'precedence'=>'<', 'nows'=>1) 403 404 .local pmc esclist 405 esclist = new 'Hash' 406 set_global '%esclist', esclist 407 esclist['e'] = "\e" 408 esclist['f'] = "\f" 409 esclist['r'] = "\r" 410 esclist['t'] = "\t" 411 esclist['v'] = unicode:"\x0a\x0b\x0c\x0d\x85\u2028\u2029" 412 esclist['h'] = unicode:"\x09\x20\xa0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000" 413 esclist['n'] = unicode:"\x0a\x0d\x0c\x85\u2028\u2029" 414 # See http://www.unicode.org/Public/UNIDATA/PropList.txt for above 415 416 # Create and store closure preprocessors in %closure_pp 417 $P0 = new 'Hash' 418 set_hll_global ['PGE';'Perl6Regex'], '%closure_pp', $P0 419 $P1 = get_hll_global ['PGE';'Perl6Regex'], 'PIR_closure' 420 $P0["PIR"] = $P1 421 422 # Create an array for holding stop tokens 423 $P0 = new 'ResizablePMCArray' 424 set_hll_global ['PGE';'Perl6Regex'], '@!stopstack', $P0 425 426 $P0 = get_global 'compile_perl6regex' 427 compreg 'PGE::Perl6Regex', $P0 428 .return () 429.end 430 431 432=item C<parse_term(PMC mob [, PMC adverbs :slurpy :named])> 433 434Parses literal strings and whitespace. 435Return a failed match if the stoptoken is found. 436 437=cut 438 439.sub 'parse_term' 440 .param pmc mob 441 .param pmc adverbs :slurpy :named 442 443 .local string target 444 .local int pos, lastpos 445 $P0 = getattribute mob, '$.target' 446 target = $P0 447 $P0 = getattribute mob, '$.pos' 448 pos = $P0 449 lastpos = length target 450 451 .local string stop 452 $P0 = get_hll_global ['PGE';'Perl6Regex'], '@!stopstack' 453 stop = $P0[-1] 454 455 $I0 = is_cclass .CCLASS_WHITESPACE, target, pos 456 if $I0 goto term_ws 457 $I0 = length stop 458 if $I0 == 0 goto not_stop 459 $S0 = substr target, pos, $I0 460 if $S0 == stop goto end_noterm 461 not_stop: 462 ## find length of word character sequence 463 .local int litlen 464 $I0 = find_not_cclass .CCLASS_WORD, target, pos, lastpos 465 litlen = $I0 - pos 466 467 ## if we didn't find any, return no term 468 if litlen == 0 goto end_noterm 469 470 ## for multi-char unquoted literals, leave the last character 471 ## in case it's quantified (it gets processed as a subsequent term) 472 if litlen < 2 goto term_literal 473 dec litlen 474 term_literal: 475 $S0 = substr target, pos, litlen 476 pos += litlen 477 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal') 478 mob.'!make'($S0) 479 mob.'to'(pos) 480 .return (mob) 481 482 term_ws: 483 .tailcall 'parse_term_ws'(mob) 484 485 end_noterm: 486 $S0 = substr target, pos, 1 487 if $S0 == ':' goto err_cut 488 (mob) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal') 489 .return (mob) 490 err_cut: 491 'parse_error'(mob, pos, 'Quantifier follows nothing in regex') 492 .return (mob) 493.end 494 495 496=item C<parse_term_backslash(mob [, adverbs :slurpy :named])> 497 498Parses terms beginning with backslash. 499 500=cut 501 502.sub 'parse_term_backslash' 503 .param pmc mob 504 .param pmc adverbs :slurpy :named 505 506 .local string target 507 .local int pos, lastpos, isnegated 508 $P0 = getattribute mob, '$.target' 509 target = $P0 510 $P0 = getattribute mob, '$.pos' 511 pos = $P0 512 lastpos = length target 513 isnegated = 0 514 515 .local string backchar, charlist 516 ## get whatever follows the backslash 517 backchar = substr target, pos, 1 518 charlist = backchar 519 inc pos 520 521 ## if it's not a word character, it's a quoted metachar 522 $I0 = is_cclass .CCLASS_WORD, backchar, 0 523 unless $I0 goto term_literal 524 525 ## if it's a word character, it may be negated 526 isnegated = is_cclass .CCLASS_UPPERCASE, backchar, 0 527 ## $S0 = downcase charlist 528 $I0 = ord backchar 529 $S0 = chr $I0 530 backchar = downcase $S0 531 532 ## if it's \x, \c, or \o, parse as string escape 533 $I0 = index 'xco', backchar 534 if $I0 < 0 goto meta_esclist 535 meta_xco: 536 $I0 = pos - 2 537 $P0 = 'p6escapes'(mob, 'pos' => $I0) 538 unless $P0 goto err_xcoparse 539 pos = $P0.'to'() 540 charlist = $P0.'ast'() 541 unless isnegated goto term_literal 542 $I0 = length charlist 543 if $I0 > 1 goto err_negated_brackets 544 goto term_charlist 545 546 meta_esclist: 547 $P0 = get_global '%esclist' 548 $I0 = exists $P0[backchar] 549 unless $I0 goto err_reserved_metachar 550 charlist = $P0[backchar] 551 if isnegated goto term_charlist 552 $I0 = length charlist 553 if $I0 > 1 goto term_charlist 554 555 term_literal: 556 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal') 557 mob.'!make'(charlist) 558 mob.'to'(pos) 559 .return (mob) 560 561 term_charlist: 562 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::EnumCharList') 563 mob.'!make'(charlist) 564 mob['isnegated'] = isnegated 565 mob.'to'(pos) 566 .return (mob) 567 568 err_xcoparse: 569 parse_error(mob, pos, 'Unable to parse \x, \c, or \o value') 570 err_negated_brackets: 571 pos = mob.'from'() 572 parse_error(mob, pos, 'Cannot use comma in \\X[...] or \\O[...]') 573 err_reserved_metachar: 574 parse_error(mob, pos, 'Alphanumeric metacharacters are reserved') 575.end 576 577 578=item C<parse_term_ws(PMC mob)> 579 580Parses a whitespace term. 581 582=cut 583 584.sub 'parse_term_ws' 585 .param pmc mob 586 .param pmc adverbs :slurpy :named 587 588 .local string target 589 .local int pos, lastpos 590 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::WS') 591 lastpos = length target 592 593 term_ws_loop: 594 ## scan for the next non-whitespace character 595 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos 596 $S0 = substr target, pos, 1 597 if $S0 != '#' goto end 598 ## we have a #-comment, determine its closing delimiter 599 inc pos 600 .local string closedelim 601 closedelim = "\n" 602 $S0 = substr target, pos, 1 603 $I0 = index '<[{(', $S0 604 if $I0 < 0 goto term_ws_loop_1 605 closedelim = substr '>]})', $I0, 1 606 term_ws_loop_1: 607 $I0 = index target, closedelim, pos 608 pos = $I0 + 1 609 if pos > 0 goto term_ws_loop 610 pos = lastpos 611 end: 612 mob.'to'(pos) 613 .return (mob) 614.end 615 616 617=item C<parse_quant(PMC mob)> 618 619Parses a quantifier, such as *, +, ?, :, and all of their wondrous 620combinations. 621 622=cut 623 624.sub 'parse_quant' 625 .param pmc mob 626 .param pmc adverbs :slurpy :named 627 628 .local string target 629 .local pmc key 630 .local int pos, lastpos 631 key = mob['KEY'] 632 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Quant') 633 lastpos = length target 634 635 .local int min, max, suffixpos, sepws 636 .local string suffix 637 min = 1 638 max = 1 639 sepws = is_cclass .CCLASS_WHITESPACE, target, pos 640 suffixpos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos 641 642 if key == '**' goto quant_suffix 643 if key == ':' goto quant_cut 644 if key == '+' goto quant_max 645 ## quantifier is '?' or '*' 646 min = 0 647 quant_max: 648 if key == '?' goto quant_suffix 649 ## quantifier is '+' or '*' 650 max = PGE_INF 651 goto quant_suffix 652 653 quant_cut: 654 # The postfix:<:> operator may bring us here when it's really a 655 # term:<::> term. So, we check for that here and fail this match 656 # if we really have a cut term. 657 if key != ':' goto quant_suffix 658 $S0 = substr target, pos, 1 659 if $S0 == ':' goto end 660 mob['backtrack'] = PGE_BACKTRACK_NONE 661 662 quant_suffix: 663 suffix = substr target, suffixpos, 2 664 if suffix == ':?' goto quant_eager 665 if suffix == ':!' goto quant_greedy 666 quant_suffix_1: 667 suffix = substr target, suffixpos, 1 668 if suffix == '?' goto quant_eager 669 if suffix == '!' goto quant_greedy 670 if suffix != ':' goto quant 671 quant_none: 672 mob['backtrack'] = PGE_BACKTRACK_NONE 673 goto quant_skip_suffix 674 quant_eager: 675 mob['backtrack'] = PGE_BACKTRACK_EAGER 676 goto quant_skip_suffix 677 quant_greedy: 678 mob['backtrack'] = PGE_BACKTRACK_GREEDY 679 quant_skip_suffix: 680 $I0 = length suffix 681 pos = suffixpos + $I0 682 683 quant: 684 if key != '**' goto quant_set 685 quant_closure: 686 $I0 = is_cclass .CCLASS_WHITESPACE, target, pos 687 sepws |= $I0 688 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos 689 .local int isconst 690 isconst = is_cclass .CCLASS_NUMERIC, target, pos 691 if isconst goto brace_skip 692 $S0 = substr target, pos, 1 693 if $S0 != "{" goto parse_repetition_controller 694 inc pos 695 brace_skip: 696 $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos 697 if $I1 <= pos goto err_closure 698 $S0 = substr target, pos 699 min = $S0 700 max = $S0 701 pos = $I1 702 $S0 = substr target, pos, 2 703 if $S0 != '..' goto quant_closure_end 704 pos += 2 705 max = PGE_INF 706 $S0 = substr target, pos, 1 707 if $S0 != '*' goto quant_range_end 708 inc pos 709 goto quant_closure_end 710 quant_range_end: 711 $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos 712 if $I1 <= pos goto err_closure 713 $S0 = substr target, pos 714 max = $S0 715 pos = $I1 716 quant_closure_end: 717 if isconst goto brace_skip2 718 $S0 = substr target, pos, 1 719 if $S0 != "}" goto err_closure 720 inc pos 721 brace_skip2: 722 suffixpos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos 723 724 quant_set: 725 mob['min'] = min 726 mob['max'] = max 727 mob.'to'(pos) 728 end: 729 .return (mob) 730 731 parse_repetition_controller: 732 .local pmc regex, repetition_controller 733 mob.'to'(pos) 734 regex = get_global 'regex' 735 #parse everything down to concatenation precedence 736 repetition_controller = regex(mob, 'tighter'=>'infix:') 737 unless repetition_controller goto err_repetition_controller 738 739 #update pos to after the matched 740 pos = repetition_controller.'to'() 741 repetition_controller = repetition_controller['expr'] 742 743 # if there's surrounding ws, then add WS nodes 744 unless sepws goto sepws_done 745 $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat') 746 $P0.'to'(pos) 747 $P1 = mob.'new'(mob, 'grammar'=>'PGE::Exp::WS') 748 $P1.'to'(pos) 749 push $P0, $P1 750 push $P0, repetition_controller 751 $P1 = mob.'new'(mob, 'grammar'=>'PGE::Exp::WS') 752 $P1.'to'(pos) 753 push $P0, $P1 754 repetition_controller = $P0 755 sepws_done: 756 757 #save the matched in the mob as sep 758 mob['sep'] = repetition_controller 759 760 #force the match to be 1..Inf 761 mob['min'] = 1 762 mob['max'] = PGE_INF 763 764 #move position to after the matched 765 mob.'to'(pos) 766 .return (mob) 767 768 err_repetition_controller: 769 'parse_error'(mob, pos, "Error in repetition controller") 770 err_closure: 771 'parse_error'(mob, pos, "Error in closure quantifier") 772.end 773 774 775=item C<parse_quant_error(mob)> 776 777Throw an exception for quantifiers in term position. 778 779=cut 780 781.sub 'parse_quant_error' 782 .param pmc mob 783 .param pmc adverbs :slurpy :named 784 785 .local int pos 786 pos = mob.'to'() 787 'parse_error'(mob, pos, "Quantifier follows nothing in regex") 788.end 789 790 791=item C<parse_dollar(PMC mob)> 792 793Parse things that begin with a dollar sign, such as scalars, 794anchors, and match subscripts. 795 796=cut 797 798.sub "parse_dollar" 799 .param pmc mob 800 .param pmc adverbs :slurpy :named 801 802 .local string target 803 .local int pos, lastpos 804 .local string cname 805 $P0 = getattribute mob, '$.target' 806 target = $P0 807 $P0 = getattribute mob, '$.pos' 808 pos = $P0 809 lastpos = length target 810 $S0 = substr target, pos, 1 811 if $S0 == '<' goto name 812 $I0 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos 813 if $I0 > pos goto numeric 814 $I0 = find_not_cclass .CCLASS_WORD, target, pos, lastpos 815 if $I0 > pos goto scalar 816 817 eos_anchor: 818 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Anchor') 819 mob.'to'(pos) 820 .return (mob) 821 822 scalar: 823 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar') 824 dec pos 825 $I1 = $I0 - pos 826 cname = substr target, pos, $I1 827 cname = concat '"', cname 828 cname = concat cname, '"' 829 mob["cname"] = cname 830 mob.'to'($I0) 831 .return (mob) 832 833 numeric: 834 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar') 835 $I1 = $I0 - pos 836 cname = substr target, pos, $I1 837 mob["cname"] = cname 838 mob.'to'($I0) 839 .return (mob) 840 841 name: 842 inc pos 843 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar') 844 $I0 = index target, ">", pos 845 if $I0 < pos goto err_close 846 name_1: 847 $I1 = $I0 - pos 848 cname = substr target, pos, $I1 849 cname = escape cname 850 cname = concat '"', cname 851 cname = concat cname, '"' 852 mob["cname"] = cname 853 pos = $I0 + 1 854 mob.'to'(pos) 855 .return (mob) 856 857 err_close: 858 parse_error(mob, pos, "Missing close '>' in scalar") 859 .return (mob) 860.end 861 862 863=item C<parse_subname(STR target, INT pos)> 864 865Scan C<target> starting at C<pos> looking for a subrule name 866(following Perl 6's identifier syntax). Returns any subrule 867name found, and the ending position of the name. 868 869=cut 870 871 872.sub 'parse_subname' 873 .param string target 874 .param int pos 875 .local int startpos, targetlen 876 877 targetlen = length target 878 startpos = pos 879 $I0 = pos 880 loop: 881 $I1 = find_not_cclass .CCLASS_WORD, target, $I0, targetlen 882 if $I1 == $I0 goto end 883 pos = $I1 884 $S0 = substr target, pos, 2 885 if $S0 != '::' goto end 886 $I0 = pos + 2 887 goto loop 888 end: 889 $I0 = pos - startpos 890 $S0 = substr target, startpos, $I0 891 .return ($S0, pos) 892.end 893 894 895=item C<parse_subrule(PMC mob)> 896 897Parses a subrule token. 898 899=cut 900 901.sub 'parse_subrule' 902 .param pmc mob 903 .param pmc adverbs :slurpy :named 904 905 .local string target 906 .local pmc mobsave 907 .local int pos, lastpos 908 .local string key 909 key = mob['KEY'] 910 mobsave = mob 911 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Subrule') 912 lastpos = length target 913 914 ## default to non-capturing rule 915 .local int iscapture 916 iscapture = 0 917 918 ## see what type of subrule this is 919 if key == '<.' goto scan_subname 920 if key == '<?' goto zerowidth 921 if key == '<!' goto negated 922 923 ## capturing subrule, get its name/alias 924 iscapture = 1 925 .local string subname, cname 926 (subname, pos) = 'parse_subname'(target, pos) 927 cname = subname 928 $S0 = substr target, pos, 1 929 unless $S0 == '=' goto subrule_arg 930 ## aliased subrule, skip the '=' and get the real name 931 inc pos 932 goto scan_subname 933 934 negated: 935 mob['isnegated'] = 1 936 zerowidth: 937 mob['iszerowidth'] = 1 938 939 scan_subname: 940 (subname, pos) = 'parse_subname'(target, pos) 941 942 subrule_arg: 943 mob['subname'] = subname 944 $S0 = substr target, pos, 1 945 if $S0 == ':' goto subrule_text_arg 946 if $S0 != ' ' goto subrule_end 947 subrule_pattern_arg: 948 inc pos 949 mob.'to'(pos) 950 .local pmc regex 951 regex = get_global 'regex' 952 $P1 = regex(mob, 'stop'=>'>') 953 unless $P1 goto end 954 $S0 = $P1 955 mob['arg'] = $S0 956 pos = $P1.'to'() 957 mob.'to'(-1) 958 goto subrule_end 959 subrule_text_arg: 960 $I0 = pos + 1 961 pos = find_not_cclass .CCLASS_WHITESPACE, target, $I0, lastpos 962 if pos == $I0 goto end 963 if pos >= lastpos goto end 964 .local string textarg, closedelim 965 textarg = '' 966 closedelim = '>' 967 $S0 = substr target, pos, 1 968 if $S0 == '"' goto subrule_text_quote 969 if $S0 != "'" goto subrule_text_loop 970 subrule_text_quote: 971 closedelim = $S0 972 inc pos 973 subrule_text_loop: 974 if pos >= lastpos goto end 975 $S0 = substr target, pos, 1 976 if $S0 == closedelim goto subrule_text_end 977 if $S0 != "\\" goto subrule_text_add 978 inc pos 979 $S0 = substr target, pos, 1 980 if $S0 == closedelim goto subrule_text_add 981 if $S0 == "\\" goto subrule_text_add 982 textarg .= "\\" 983 subrule_text_add: 984 textarg .= $S0 985 inc pos 986 goto subrule_text_loop 987 subrule_text_end: 988 mob['arg'] = textarg 989 if closedelim == '>' goto subrule_end 990 inc pos 991 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos 992 subrule_end: 993 $S0 = substr target, pos, 1 994 if $S0 != '>' goto end 995 inc pos 996 mob.'to'(pos) 997 mob['iscapture'] = iscapture 998 unless iscapture goto end 999 $S0 = escape cname 1000 $S0 = concat '"', $S0 1001 $S0 = concat $S0, '"' 1002 mob['cname'] = $S0 1003 end: 1004 .return (mob) 1005.end 1006 1007 1008=item C<parse_enumcharclass(PMC mob)> 1009 1010Extract an enumerated character list. 1011 1012=cut 1013 1014.sub 'parse_enumcharclass' 1015 .param pmc mob 1016 .param pmc adverbs :slurpy :named 1017 1018 .local string target 1019 .local pmc term 1020 .local string op 1021 .local int pos, lastpos 1022 1023 $P0 = getattribute mob, '$.target' 1024 target = $P0 1025 pos = mob.'to'() 1026 lastpos = length target 1027 op = mob['KEY'] 1028 1029 ## handle the case of <[, <+[, <-[, and <![ as the token 1030 ## by converting to <, <+, <-, or <! 1031 $S0 = substr op, -1, 1 1032 if $S0 != '[' goto parse_loop 1033 op = chopn op, 1 1034 goto enum 1035 1036 parse_loop: 1037 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos 1038 if pos >= lastpos goto err_close 1039 $S0 = substr target, pos, 1 1040 if $S0 != '[' goto subrule 1041 inc pos 1042 1043 enum: 1044 .local string charlist 1045 .local int isrange 1046 charlist = '' 1047 isrange = 0 1048 1049 enum_loop: 1050 ## skip leading whitespace and get next character 1051 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos 1052 if pos >= lastpos goto err_close 1053 $S0 = substr target, pos, 1 1054 if $S0 == ']' goto enum_close 1055 if $S0 == '-' goto err_hyphen 1056 if $S0 == '.' goto enum_dotrange 1057 if $S0 != "\\" goto enum_addchar 1058 enum_backslash: 1059 inc pos 1060 ## get escaped character 1061 $S0 = substr target, pos, 1 1062 ## handle metas such as \n, \t, \r, etc. 1063 $I0 = index 'nrtfae0xco', $S0 1064 if $I0 == -1 goto enum_addchar 1065 if $I0 >= 7 goto enum_xco 1066 $S0 = substr "\n\r\t\f\a\e\0", $I0, 1 1067 goto enum_addchar 1068 enum_xco: 1069 $I0 = pos - 1 1070 $P0 = 'p6escapes'(mob, 'pos'=>$I0) 1071 $S0 = $P0.'ast'() 1072 pos = $P0.'to'() 1073 goto enum_addchar_1 1074 enum_addchar: 1075 inc pos 1076 enum_addchar_1: 1077 if isrange goto enum_addrange 1078 charlist .= $S0 1079 goto enum_loop 1080 enum_dotrange: 1081 ## check if we have a .. range marker 1082 if isrange goto enum_addrange 1083 $S1 = substr target, pos, 2 1084 if $S1 != '..' goto enum_addchar 1085 pos += 2 1086 isrange = 1 1087 goto enum_loop 1088 enum_addrange: 1089 ## add character range to charlist 1090 isrange = 0 1091 $I2 = ord charlist, -1 1092 $I0 = ord $S0 1093 enum_addrange_1: 1094 inc $I2 1095 if $I2 > $I0 goto enum_loop 1096 $S1 = chr $I2 1097 charlist .= $S1 1098 goto enum_addrange_1 1099 enum_close: 1100 inc pos 1101 ## create a node for the charlist 1102 term = mob.'new'(mob, 'grammar'=>'PGE::Exp::EnumCharList') 1103 term.'to'(pos) 1104 term.'!make'(charlist) 1105 goto combine 1106 1107 subrule: 1108 $I0 = pos 1109 .local string subname 1110 (subname, pos) = 'parse_subname'(target, $I0) 1111 if pos == $I0 goto err 1112 term = mob.'new'(mob, 'grammar'=>'PGE::Exp::Subrule') 1113 term.'from'($I0) 1114 term.'to'(pos) 1115 term['subname'] = subname 1116 term['iscapture'] = 0 1117 1118 combine: 1119 ## find out what operator preceded this term 1120 if op == '+' goto combine_plus 1121 if op == '-' goto combine_minus 1122 if op == '<' goto combine_init 1123 if op == '<+' goto combine_init 1124 ## token was '<-' or '<!' 1125 term['isnegated'] = 1 1126 term['iszerowidth'] = 1 1127 if op == '<!' goto combine_init 1128 ## token is '<-', we need to match a char by concat dot 1129 $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::CCShortcut') 1130 $P0.'to'(pos) 1131 $P0.'!make'('.') 1132 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat') 1133 mob.'to'(pos) 1134 mob[0] = term 1135 mob[1] = $P0 1136 goto next_op 1137 1138 combine_init: 1139 mob = term 1140 goto next_op 1141 1142 combine_plus: 1143 ## <a+b> ==> <a> | <b> 1144 $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::Alt') 1145 $P0.'to'(pos) 1146 $P0[0] = mob 1147 $P0[1] = term 1148 mob = $P0 1149 goto next_op 1150 1151 combine_minus: 1152 ## <a-b> ==> <!b> <a> 1153 term['isnegated'] = 1 1154 term['iszerowidth'] = 1 1155 $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat') 1156 $P0.'to'(pos) 1157 $P0[0] = term 1158 $P0[1] = mob 1159 mob = $P0 1160 goto next_op 1161 1162 next_op: 1163 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos 1164 if pos >= lastpos goto err_close 1165 1166 op = substr target, pos, 1 1167 inc pos 1168 if op == '+' goto parse_loop 1169 if op == '-' goto parse_loop 1170 if op != '>' goto err 1171 mob.'to'(pos) 1172 goto end 1173 1174 err: 1175 parse_error(mob, pos, "Error parsing enumerated character class") 1176 goto end 1177 err_hyphen: 1178 parse_error(mob, pos, "Unescaped '-' in charlist (use '..' or '\\-')") 1179 goto end 1180 err_close: 1181 parse_error(mob, pos, "Missing close '>' or ']>' in enumerated character class") 1182 end: 1183 .return (mob) 1184.end 1185 1186 1187=item C<parse_quoted_literal> 1188 1189Parses '...' literals. 1190 1191=cut 1192 1193.sub 'parse_quoted_literal' 1194 .param pmc mob 1195 .param pmc adverbs :slurpy :named 1196 1197 .local int pos, lastpos 1198 .local string target 1199 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal') 1200 lastpos = length target 1201 lastpos -= 1 1202 .local string lit 1203 lit = '' 1204 literal_iter: 1205 if pos > lastpos goto literal_error 1206 $S0 = substr target, pos, 1 1207 if $S0 == "'" goto literal_end 1208 if $S0 != "\\" goto literal_add 1209 inc pos 1210 $S0 = substr target, pos, 1 1211 literal_add: 1212 inc pos 1213 lit .= $S0 1214 goto literal_iter 1215 literal_end: 1216 inc pos 1217 mob.'!make'(lit) 1218 mob.'to'(pos) 1219 .return (mob) 1220 literal_error: 1221 parse_error(mob, pos, "No closing ' in quoted literal") 1222 .return (mob) 1223.end 1224 1225 1226=item C<parse_goal> 1227 1228Parse a goal. 1229 1230=cut 1231 1232.sub 'parse_goal' 1233 .param pmc mob 1234 .param pmc adverbs :slurpy :named 1235 1236 .local int pos, lastpos 1237 .local string target 1238 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat') 1239 lastpos = length target 1240 ## skip any leading whitespace before goal 1241 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos 1242 .local pmc regex, goal, expr, alt, failsub 1243 regex = get_global 'regex' 1244 ## parse the goal, down to concatenation precedence 1245 mob.'to'(pos) 1246 goal = regex(mob, 'tighter'=>'infix:') 1247 unless goal goto fail_goal 1248 pos = goal.'to'() 1249 goal = goal['expr'] 1250 ## skip any leading whitespace before expression 1251 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos 1252 ## parse the goal, down to concatenation precedence 1253 mob.'to'(pos) 1254 expr = regex(mob, 'tighter'=>'infix:') 1255 unless expr goto fail_expr 1256 pos = expr.'to'() 1257 expr = expr['expr'] 1258 mob.'to'(pos) 1259 failsub = mob.'new'(mob, 'grammar'=>'PGE::Exp::Subrule') 1260 failsub.'to'(pos) 1261 failsub['subname'] = 'FAILGOAL' 1262 $S0 = goal.'Str'() 1263 failsub['arg'] = $S0 1264 alt = mob.'new'(mob, 'grammar'=>'PGE::Exp::Alt') 1265 alt.'to'(pos) 1266 push alt, goal 1267 push alt, failsub 1268 push mob, expr 1269 push mob, alt 1270 .return (mob) 1271 fail_goal: 1272 'parse_error'(mob, pos, 'Unable to parse goal after ~') 1273 fail_expr: 1274 'parse_error'(mob, pos, 'Unable to parse expression after ~') 1275.end 1276 1277 1278=item C<parse_modifier> 1279 1280Parse a modifier. 1281 1282=cut 1283 1284.sub 'parse_modifier' 1285 .param pmc mob 1286 .param pmc adverbs :slurpy :named 1287 1288 .local int pos, lastpos 1289 .local string target, value 1290 .local string key 1291 key = mob['KEY'] 1292 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Modifier') 1293 lastpos = length target 1294 value = "1" 1295 $I0 = pos 1296 pos = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos 1297 if pos == $I0 goto name 1298 $I1 = pos - $I0 1299 value = substr target, $I0, $I1 1300 $I0 = pos 1301 name: 1302 pos = find_not_cclass .CCLASS_WORD, target, pos, lastpos 1303 $I1 = pos - $I0 1304 if $I1 == 0 goto fail 1305 $S0 = substr target, $I0, $I1 1306 mob['key'] = $S0 1307 mob.'!make'(value) 1308 $S0 = substr target, pos, 1 1309 if $S0 != '(' goto end 1310 $I0 = pos + 1 1311 pos = index target, ')', pos 1312 $I1 = pos - $I0 1313 $S0 = substr target, $I0, $I1 1314 mob.'!make'($S0) 1315 inc pos 1316 end: 1317 ### XXX pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos 1318 mob.'to'(pos) 1319 .return (mob) 1320 fail: 1321 .return (mob) 1322.end 1323 1324 1325.sub 'parse_closure' 1326 .param pmc mob 1327 .param pmc adverbs :slurpy :named 1328 1329 .local pmc key 1330 key = mob['KEY'] 1331 .local string target 1332 .local int pos, len 1333 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Closure') 1334 len = 2 1335 init: 1336 $S0 = substr target, pos, 1 1337 if $S0 != "{" goto body 1338 inc len 1339 inc pos 1340 goto init 1341 body: 1342 .local string close 1343 close = repeat "}", len 1344 if key == '<?{{' goto assert_pos 1345 if key == '<!{{' goto assert_neg 1346 goto have_close 1347 assert_neg: 1348 mob['isnegated'] = 1 1349 assert_pos: 1350 mob['iszerowidth'] = 1 1351 close = concat close, '>' 1352 inc len 1353 have_close: 1354 $I0 = index target, close, pos 1355 if $I0 < pos goto err_noclose 1356 $I1 = $I0 - pos 1357 $S1 = substr target, pos, $I1 1358 mob.'!make'($S1) 1359 pos = $I0 + len 1360 mob.'to'(pos) 1361 .return (mob) 1362 err_noclose: 1363 parse_error(mob, pos, "Missing closing braces for closure") 1364 .return (mob) 1365.end 1366 1367 1368.sub 'parse_action' 1369 .param pmc mob 1370 .param pmc adverbs :slurpy :named 1371 1372 .local string target 1373 .local int pos, keypos 1374 (mob, pos, target) = mob.'new'(mob, 'grammar' => 'PGE::Exp::Action') 1375 keypos = index target, '#= ', pos 1376 if keypos < 0 goto end 1377 $I0 = find_cclass .CCLASS_NEWLINE, target, pos, keypos 1378 if $I0 < keypos goto end 1379 .local string actionkey 1380 keypos += 3 1381 $I0 -= keypos 1382 actionkey = substr target, keypos, $I0 1383 actionkey = 'trim'(actionkey) 1384 mob['actionkey'] = actionkey 1385 end: 1386 mob.'to'(pos) 1387 .return (mob) 1388.end 1389 1390 1391.sub 'parse_error' 1392 .param pmc mob 1393 .param int pos 1394 .param string message 1395 $P0 = getattribute mob, '$.pos' 1396 $P0 = pos 1397 $P0 = new 'Exception' 1398 $S0 = 'perl6regex parse error: ' 1399 $S0 .= message 1400 $S0 .= ' at offset ' 1401 $S1 = pos 1402 $S0 .= $S1 1403 $S0 .= ", found '" 1404 $P1 = getattribute mob, '$.target' 1405 $S1 = $P1 1406 $S1 = substr $S1, pos, 1 1407 $S0 .= $S1 1408 $S0 .= "'" 1409 $P0 = $S0 1410 throw $P0 1411 .return () 1412.end 1413 1414 1415 1416 1417.namespace [ 'PGE';'Exp' ] 1418 1419.sub 'perl6exp' :method 1420 .param pmc pad 1421 .return (self) 1422.end 1423 1424 1425.namespace [ 'PGE';'Exp';'Literal' ] 1426 1427.sub 'perl6exp' :method 1428 .param pmc pad 1429 $I0 = pad['ignorecase'] 1430 self['ignorecase'] = $I0 1431 .return (self) 1432.end 1433 1434 1435.namespace [ 'PGE';'Exp';'Concat' ] 1436 1437.sub 'perl6exp' :method 1438 .param pmc pad 1439 1440 .local pmc array, exp 1441 .local int i, j, n 1442 array = self.'list'() 1443 n = elements array 1444 i = 0 1445 j = 0 1446 iter_loop: 1447 if i >= n goto iter_end 1448 exp = self[i] 1449 inc i 1450 exp = exp.'perl6exp'(pad) 1451 if null exp goto iter_loop 1452 self[j] = exp 1453 inc j 1454 goto iter_loop 1455 iter_end: 1456 array = j 1457 if j > 1 goto end 1458 $P0 = array[0] 1459 .return ($P0) 1460 end: 1461 .return (self) 1462.end 1463 1464 1465.namespace [ 'PGE';'Exp';'Quant' ] 1466 1467.sub 'perl6exp' :method 1468 .param pmc pad 1469 1470 $I0 = exists self['backtrack'] 1471 if $I0 goto backtrack_done 1472 self['backtrack'] = PGE_BACKTRACK_GREEDY 1473 $I0 = pad['ratchet'] 1474 if $I0 == 0 goto backtrack_done 1475 self['backtrack'] = PGE_BACKTRACK_NONE 1476 backtrack_done: 1477 1478 .local pmc exp0, sep 1479 .local int isarray 1480 isarray = pad['isarray'] 1481 pad['isarray'] = 1 1482 exp0 = self[0] 1483 $I0 = isa exp0, ['PGE';'Exp';'WS'] 1484 if $I0 goto err_parse_quant 1485 exp0['isquant'] = 1 1486 exp0 = exp0.'perl6exp'(pad) 1487 self[0] = exp0 1488 sep = self['sep'] 1489 if null sep goto sep_done 1490 sep = sep.'perl6exp'(pad) 1491 self['sep'] = sep 1492 sep_done: 1493 pad['isarray'] = isarray 1494 .return (self) 1495 err_parse_quant: 1496 $P0 = get_hll_global ['PGE';'Perl6Regex'], 'parse_quant_error' 1497 $P0(self) 1498 .return (self) 1499.end 1500 1501 1502.namespace [ 'PGE';'Exp';'Group' ] 1503 1504.sub 'perl6exp' :method 1505 .param pmc pad 1506 .local pmc exp0 1507 1508 $I0 = self['isquant'] 1509 if $I0 goto backtrack_done 1510 $I0 = exists self['backtrack'] 1511 if $I0 goto backtrack_done 1512 $I0 = pad['ratchet'] 1513 if $I0 == 0 goto backtrack_done 1514 self['backtrack'] = PGE_BACKTRACK_NONE 1515 backtrack_done: 1516 1517 exp0 = self[0] 1518 exp0 = exp0.'perl6exp'(pad) 1519 self[0] = exp0 1520 .return (self) 1521.end 1522 1523 1524.namespace [ 'PGE';'Exp';'CGroup' ] 1525 1526.sub 'perl6exp' :method 1527 .param pmc pad 1528 .local pmc exp 1529 1530 $I0 = self['isquant'] 1531 if $I0 goto backtrack_done 1532 $I0 = exists self['backtrack'] 1533 if $I0 goto backtrack_done 1534 $I0 = pad['ratchet'] 1535 if $I0 == 0 goto backtrack_done 1536 self['backtrack'] = PGE_BACKTRACK_NONE 1537 backtrack_done: 1538 1539 self['iscapture'] = 1 1540 $I0 = exists self['isscope'] 1541 if $I0 goto set_cname 1542 self['isscope'] = 1 1543 1544 set_cname: 1545 $I0 = exists self['cname'] 1546 if $I0 goto set_subpats 1547 $I0 = pad['subpats'] 1548 self['cname'] = $I0 1549 1550 set_subpats: 1551 .local string cname 1552 cname = self['cname'] 1553 $S0 = substr cname, 0, 1 1554 if $S0 == '"' goto set_lexicals 1555 $I0 = cname 1556 inc $I0 1557 pad['subpats'] = $I0 1558 1559 set_lexicals: 1560 .local int isarray 1561 isarray = 0 1562 .local pmc lexscope 1563 lexscope = pad['lexscope'] 1564 $I0 = exists lexscope[cname] 1565 if $I0 == 0 goto set_lexicals_1 1566 $P0 = lexscope[cname] 1567 $P0['isarray'] = 1 1568 isarray = 1 1569 set_lexicals_1: 1570 lexscope[cname] = self 1571 1572 .local int padarray 1573 padarray = pad['isarray'] 1574 isarray |= padarray 1575 self['isarray'] = isarray 1576 $I0 = self['isscope'] 1577 if $I0 == 0 goto unscoped 1578 1579 scoped: 1580 .local int subpats 1581 subpats = pad['subpats'] 1582 pad['subpats'] = 0 1583 pad['isarray'] = 0 1584 $P0 = new 'Hash' 1585 pad['lexscope'] = $P0 1586 exp = self[0] 1587 exp = exp.'perl6exp'(pad) 1588 self[0] = exp 1589 pad['lexscope'] = lexscope 1590 pad['isarray'] = padarray 1591 pad['subpats'] = subpats 1592 goto end 1593 1594 unscoped: 1595 exp = self[0] 1596 exp = exp.'perl6exp'(pad) 1597 self[0] = exp 1598 end: 1599 .return (self) 1600.end 1601 1602 1603.namespace [ 'PGE';'Exp';'Subrule' ] 1604 1605.sub 'perl6exp' :method 1606 .param pmc pad 1607 1608 $I0 = self['isquant'] 1609 if $I0 goto backtrack_done 1610 $I0 = exists self['backtrack'] 1611 if $I0 goto backtrack_done 1612 $I0 = pad['ratchet'] 1613 if $I0 == 0 goto backtrack_done 1614 self['backtrack'] = PGE_BACKTRACK_NONE 1615 backtrack_done: 1616 1617 .local int iscapture, isarray 1618 .local pmc lexscope 1619 iscapture = self['iscapture'] 1620 if iscapture == 0 goto end 1621 .local string cname 1622 cname = self['cname'] 1623 isarray = pad['isarray'] 1624 lexscope = pad['lexscope'] 1625 $I0 = exists lexscope[cname] 1626 if $I0 == 0 goto lexscope_1 1627 $P0 = lexscope[cname] 1628 $P0['isarray'] = 1 1629 isarray = 1 1630 lexscope_1: 1631 lexscope[cname] = self 1632 self['isarray'] = isarray 1633 next_cname: 1634 $S0 = substr cname, 0, 1 1635 if $S0 == '"' goto end 1636 $I0 = cname 1637 inc $I0 1638 pad['subpats'] = $I0 1639 end: 1640 $S0 = pad['dba'] 1641 self['dba'] = $S0 1642 .return (self) 1643.end 1644 1645 1646.namespace [ 'PGE';'Exp';'WS' ] 1647 1648.sub 'perl6exp' :method 1649 .param pmc pad 1650 1651 $I0 = pad['sigspace'] 1652 if $I0 goto end 1653 null $P0 1654 .return ($P0) 1655 end: 1656 self['subname'] = 'ws' 1657 self['iscapture'] = 0 1658 $I0 = pad['ratchet'] 1659 unless $I0 goto end_1 1660 self['backtrack'] = PGE_BACKTRACK_NONE 1661 end_1: 1662 .return (self) 1663.end 1664 1665 1666.namespace [ 'PGE';'Exp';'Alt' ] 1667 1668.sub 'perl6exp' :method 1669 .param pmc pad 1670 1671 .local pmc exp0, exp1 1672 exp0 = self[0] 1673 exp1 = self[1] 1674 1675 ## if we only have one operand (prefix:|), 1676 ## reduce and return it. 1677 $I0 = defined self[1] 1678 if $I0 goto with_rhs 1679 .tailcall exp0.'perl6exp'(pad) 1680 with_rhs: 1681 1682 ## if lhs is whitespace, then this is a prefix-alt and 1683 ## we ignore it (by simply returning its rhs) 1684 $I0 = isa exp0, ['PGE';'Exp';'WS'] 1685 if $I0 == 0 goto with_lhs 1686 .tailcall exp1.'perl6exp'(pad) 1687 with_lhs: 1688 1689 .local pmc lexscope, savescope, it 1690 lexscope = pad['lexscope'] 1691 savescope = new 'Hash' 1692 it = iter lexscope 1693 iter_loop: 1694 unless it goto iter_end 1695 $P1 = shift it 1696 $P2 = it[$P1] 1697 savescope[$P1] = $P2 1698 goto iter_loop 1699 iter_end: 1700 $I0 = pad['subpats'] 1701 exp0 = exp0.'perl6exp'(pad) 1702 self[0] = exp0 1703 1704 $I1 = pad['subpats'] 1705 pad['subpats'] = $I0 1706 pad['lexscope'] = savescope 1707 exp1 = exp1.'perl6exp'(pad) 1708 self[1] = exp1 1709 $I0 = pad['subpats'] 1710 if $I0 >= $I1 goto end 1711 pad['subpats'] = $I1 1712 end: 1713 .return (self) 1714.end 1715 1716 1717.namespace [ 'PGE';'Exp';'Alias' ] 1718 1719.sub 'perl6exp' :method 1720 .param pmc pad 1721 .local string cname 1722 .local pmc exp0, exp1 1723 1724 exp0 = self[0] 1725 $I0 = isa exp0, ['PGE';'Exp';'Scalar'] 1726 unless $I0 goto err_no_lvalue 1727 1728 cname = exp0['cname'] 1729 exp1 = self[1] 1730 1731 ## If we're aliasing a capture group or a quantified capture 1732 ## group, then we just move the alias name to that group. 1733 ## Otherwise, we need to create a capture group for this 1734 ## alias and return that. 1735 1736 $I0 = isa exp1, ['PGE';'Exp';'CGroup'] 1737 if $I0 == 1 goto make_alias 1738 $I0 = isa exp1, ['PGE';'Exp';'Subrule'] 1739 if $I0 == 1 goto make_alias 1740 $I0 = isa exp1, ['PGE';'Exp';'Quant'] 1741 if $I0 == 0 goto add_cgroup 1742 $P0 = exp1[0] 1743 $I0 = isa $P0, ['PGE';'Exp';'CGroup'] 1744 if $I0 == 0 goto add_cgroup 1745 $P0['cname'] = cname 1746 goto end 1747 1748 add_cgroup: 1749 .local pmc cexp 1750 cexp = self.'new'(self, 'grammar'=>'PGE::Exp::CGroup') 1751 $I0 = self.'from'() 1752 cexp.'from'($I0) 1753 $I0 = self.'to'() 1754 cexp.'to'($I0) 1755 cexp[0] = exp1 1756 cexp['isscope'] = 0 1757 cexp['iscapture'] = 1 1758 cexp['cname'] = cname 1759 cexp = cexp.'perl6exp'(pad) 1760 .return (cexp) 1761 1762 make_alias: 1763 exp1['cname'] = cname 1764 exp1['iscapture'] = 1 1765 end: 1766 exp1 = exp1.'perl6exp'(pad) 1767 .return (exp1) 1768 1769 err_no_lvalue: 1770 $P0 = get_hll_global ['PGE';'Perl6Regex'], 'parse_error' 1771 $I0 = self.'from'() 1772 $P0(self, $I0, 'LHS of alias must be lvalue') 1773.end 1774 1775 1776.namespace [ 'PGE';'Exp';'Modifier' ] 1777 1778.sub 'perl6exp' :method 1779 .param pmc pad 1780 .local string key 1781 .local string value 1782 key = self['key'] 1783 value = self.'ast'() 1784 if key == 'words' goto sigspace 1785 if key == 's' goto sigspace 1786 if key == 'w' goto sigspace 1787 if key == 'i' goto ignorecase 1788 goto setpad 1789 sigspace: 1790 key = 'sigspace' 1791 goto setpad 1792 ignorecase: 1793 key = 'ignorecase' 1794 setpad: 1795 $P0 = pad[key] 1796 pad[key] = value 1797 .local pmc exp 1798 exp = self[0] 1799 exp = exp.'perl6exp'(pad) 1800 self[0] = exp 1801 pad[key] = $P0 1802 .return (exp) 1803.end 1804 1805.namespace [ 'PGE';'Exp';'Conj' ] 1806 1807.sub 'perl6exp' :method 1808 .param pmc pad 1809 $P0 = self[0] 1810 $P0 = $P0.'perl6exp'(pad) 1811 self[0] = $P0 1812 $P1 = self[1] 1813 $P1 = $P1.'perl6exp'(pad) 1814 self[1] = $P1 1815 .return (self) 1816.end 1817 1818 1819.namespace [ 'PGE';'Exp';'Closure' ] 1820 1821.sub 'perl6exp' :method 1822 .param pmc pad 1823 .local string lang 1824 .local pmc closure_pp 1825 .local pmc closure_fn 1826 lang = pad['lang'] 1827 self['lang'] = lang 1828 # see if we need to do any pre-processing of the closure 1829 closure_pp = get_hll_global ['PGE';'Perl6Regex'], '%closure_pp' 1830 $I0 = defined closure_pp[lang] 1831 if $I0 == 0 goto end 1832 closure_fn = closure_pp[lang] 1833 $S1 = self.'ast'() 1834 $S1 = closure_fn($S1) 1835 self.'!make'($S1) 1836 end: 1837 .return (self) 1838.end 1839 1840=back 1841 1842=head1 Functions 1843 1844=over 4 1845 1846=item C<PIR_closure(string code)> 1847 1848This helper function helps with :lang(PIR) closures in rules 1849by adding a ".sub" wrapper around the code if one isn't 1850already present. 1851 1852=back 1853 1854=cut 1855 1856.namespace [ 'PGE';'Perl6Regex' ] 1857 1858.sub 'PIR_closure' 1859 .param string code 1860 $I0 = index code, '.sub' 1861 if $I0 >= 0 goto end 1862 code = concat ".sub anon :anon\n.param pmc match\n", code 1863 code .= "\n.end\n" 1864 end: 1865 .return (code) 1866.end 1867 1868 1869.namespace [ 'PGE';'Exp';'Action' ] 1870 1871.sub 'perl6exp' :method 1872 .param pmc pad 1873 $S0 = pad['name'] 1874 self['actionname'] = $S0 1875 .return (self) 1876.end 1877 1878 1879.namespace [ 'PGE';'Exp';'Cut' ] 1880 1881.sub 'perl6exp' :method 1882 .param pmc pad 1883 $S0 = self.'ast'() 1884 if $S0 == ':::' goto cut_rule 1885 if $S0 == '<commit>' goto cut_match 1886 self['cutmark'] = PGE_CUT_GROUP 1887 .return (self) 1888 cut_rule: 1889 self['cutmark'] = PGE_CUT_RULE 1890 .return (self) 1891 cut_match: 1892 self['cutmark'] = PGE_CUT_MATCH 1893 .return (self) 1894.end 1895 1896# Local Variables: 1897# mode: pir 1898# fill-column: 100 1899# End: 1900# vim: expandtab shiftwidth=4 ft=pir: 1901