1# Copyright (C) 2006-2009, Parrot Foundation.
2
3=head1 DESCRIPTION
4
5Perl6Regex - compiler and parser for Perl 6 regex
6
7=over 4
8
9=item C<compile_perl6regex(PMC source, PMC adverbs :slurpy :named)>
10
11Return the result of compiling C<source> according to Perl 6
12regex syntax and the associated C<adverbs>.  Normally this
13function is obtained using C<compreg 'PGE::Perl6Regex'> instead
14of calling it directly.
15
16Returns the compiled regular expression.  If a C<target>
17named parameter is supplied, then it will return the parse tree
18(target='parse'), the expression tree (target='exp'),
19or the resulting PIR code (target='PIR').
20
21=cut
22
23.namespace [ 'PGE';'Perl6Regex' ]
24
25.sub 'compile_perl6regex'
26    .param pmc source
27    .param pmc args            :slurpy
28    .param pmc adverbs         :slurpy :named
29
30    unless null adverbs goto set_adverbs
31    adverbs = new 'Hash'
32
33  set_adverbs:
34    $I0 = exists adverbs['grammar']
35    if $I0 goto with_grammar
36    unless args goto adverb_grammar_1
37    $S0 = shift args
38    adverbs['grammar'] = $S0
39    goto with_grammar
40  adverb_grammar_1:
41    adverbs['grammar'] = 'PGE::Grammar'
42  with_grammar:
43    $I0 = exists adverbs['name']
44    if $I0 goto with_name
45    unless args goto with_name
46    $S0 = shift args
47    adverbs['name'] = $S0
48  with_name:
49    $I0 = exists adverbs['lang']
50    if $I0 goto with_lang
51    adverbs['lang'] = 'PIR'
52  with_lang:
53    $I0 = exists adverbs['ignorecase']
54    if $I0 goto with_ignorecase
55    $I0 = adverbs['i']
56    adverbs['ignorecase'] = $I0
57  with_ignorecase:
58    $I0 = exists adverbs['sigspace']
59    if $I0 goto with_sigspace
60    $I0 = exists adverbs['s']
61    if $I0 goto with_s
62    $I0 = exists adverbs['words']
63    if $I0 goto with_words
64    $I0 = adverbs['w']
65    adverbs['sigspace'] = $I0
66    goto with_sigspace
67  with_s:
68    $I0 = adverbs['s']
69    adverbs['sigspace'] = $I0
70    goto with_sigspace
71  with_words:
72    $I0 = adverbs['words']
73    adverbs['sigspace'] = $I0
74  with_sigspace:
75
76    .local string target
77    target = adverbs['target']
78    target = downcase target
79
80    ##   If we're passed the results of a previous parse,  use it.
81    .local pmc match, exp
82    $I0 = isa source, ['PGE';'Match']
83    if $I0 == 0 goto parse
84    $P0 = source['expr']
85    if null $P0 goto parse
86    $I0 = isa $P0, ['PGE';'Exp']
87    if $I0 == 0 goto parse
88    match = source
89    goto analyze
90
91  parse:
92    ##   Let's parse the source as a regex
93    $P0 = get_global 'regex'
94    match = $P0(source, adverbs :flat :named)
95    if source == '' goto err_null
96    if target != 'parse' goto check
97    .return (match)
98
99  check:
100    unless match goto check_1
101    $S0 = source
102    $S1 = match
103    if $S0 == $S1 goto analyze
104  check_1:
105    null $P0
106    .return ($P0)
107
108  analyze:
109    .local pmc pad
110    exp = match['expr']
111    pad = clone adverbs
112    $P0 = new 'Hash'
113    pad['lexscope'] = $P0
114    exp = exp.'perl6exp'(pad)
115    if null exp goto err_null
116    .tailcall exp.'compile'(adverbs :flat :named)
117
118  err_null:
119    $I0 = match.'from'()
120    'parse_error'(match, $I0, 'Null pattern illegal')
121.end
122
123
124=item C<regex(PMC mob, PMC adverbs :slurpy :named)>
125
126Parses a regex according to Perl 6 regex syntax, and returns
127the corresponding parse tree.
128
129=cut
130
131.sub 'regex'
132    .param pmc mob
133    .param pmc adverbs         :slurpy :named
134
135    .local string stop, tighter
136    .local pmc stopstack, optable, match
137
138    stopstack = get_global '@!stopstack'
139    optable = get_global '$optable'
140
141    stop = adverbs['stop']
142    tighter = adverbs['tighter']
143    push stopstack, stop
144    match = optable.'parse'(mob, 'stop'=>stop, 'tighter'=>tighter)
145    $S0 = pop stopstack
146
147    .return (match)
148.end
149
150=item C<p6escapes>
151
152Parse and calculate various Perl 6 string escapes, such as \n, \r,
153\x, \o, and \c.  For the latter escapes, also handle the bracketed
154forms and other special forms.
155
156Note that this function is used directly by PCT::Grammar and Rakudo,
157and someday may be refactored to a different location.
158
159=cut
160
161.sub 'trim'
162    .param string s
163    .local int rpos, lpos
164    rpos = length s
165    lpos = find_not_cclass .CCLASS_WHITESPACE, s, 0, rpos
166  rtrim_loop:
167    unless rpos > lpos goto rtrim_done
168    dec rpos
169    $I0 = is_cclass .CCLASS_WHITESPACE, s, rpos
170    if $I0 goto rtrim_loop
171  rtrim_done:
172    inc rpos
173    $I0 = rpos - lpos
174    $S0 = substr s, lpos, $I0
175    .return ($S0)
176.end
177
178
179.sub 'p6escapes'
180    .param pmc mob
181    .param pmc adverbs         :slurpy :named
182    .local string target, backchar, literal
183    .local int pos, lastpos
184    $P0 = get_hll_global ['PGE'], '$!MATCH'
185    (mob, pos, target) = $P0.'new'(mob, adverbs :flat :named)
186    lastpos = length target
187    if pos >= lastpos goto fail
188    $S0 = substr target, pos, 1
189    inc pos
190    if $S0 != "\\" goto fail
191    if pos >= lastpos goto fail
192    backchar = substr target, pos, 1
193    inc pos
194    backchar = downcase backchar
195    $I0 = index "\\0abefnrtxco", backchar
196    if $I0 < 0 goto fail
197    if $I0 >= 9 goto scan_xco
198    literal = substr "\\\0\a\b\e\f\n\r\t", $I0, 1
199    goto succeed
200  scan_xco:
201    ##  Handle \x, \c, and \o escapes.  Start by converting
202    ##  backchar into the appropriate radix, then loop through
203    ##  the characters that follow to compute the decimal value
204    ##  of codepoints, and concatenate the codepoints into a
205    ##  literal.
206    .local int base, decnum, isbracketed
207    base = index '        o c     x', backchar
208    literal = ''
209    $S0 = substr target, pos, 1
210    isbracketed = iseq $S0, '['
211    pos += isbracketed
212    ##  Handle the case of \cC (control escape).
213    if base != 10 goto scan_xco_char
214    if isbracketed goto scan_xco_char
215    $I0 = is_cclass .CCLASS_NUMERIC, $S0, 0
216    if $I0 goto scan_xco_char
217    ##  xor the 64 bit
218    $I0 = ord $S0
219    bxor $I0, 64
220    literal = chr $I0
221    inc pos
222    goto succeed
223  scan_xco_char:
224    decnum = 0
225    # inside brackets, skip leading ws
226    unless isbracketed goto scan_xco_char_ws
227    pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
228  scan_xco_char_ws:
229    if base != 10 goto scan_xco_char_digits
230    unless isbracketed goto scan_xco_char_digits
231    $I0 = is_cclass .CCLASS_NUMERIC, target, pos
232    if $I0 goto scan_xco_char_digits
233    ##  look up character by name
234    .local int namepos
235    namepos = index target, ']', pos
236    if namepos < 0 goto err_missing_bracket
237    $I0 = index target, ',', pos
238    if $I0 < 0 goto have_namepos
239    if namepos < $I0 goto have_namepos
240    namepos = $I0
241  have_namepos:
242    $I0 = namepos - pos
243    $S0 = substr target, pos, $I0
244    $S0 = 'trim'($S0)
245    decnum = find_codepoint $S0
246    if decnum < 0 goto err_unicode_name
247    pos = namepos
248    goto scan_xco_char_end
249  scan_xco_char_digits:
250    $S0 = substr target, pos, 1
251    $I0 = index "0123456789abcdef0123456789ABCDEF", $S0
252    if $I0 < 0 goto scan_xco_char_end
253    $I0 %= 16
254    if $I0 >= base goto scan_xco_char_end
255    decnum *= base
256    decnum += $I0
257    inc pos
258    goto scan_xco_char_digits
259  scan_xco_char_end:
260    $S1 = chr decnum
261    literal = concat literal, $S1
262    unless isbracketed goto scan_xco_end
263    pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
264    $S0 = substr target, pos, 1
265    if $S0 == ']' goto scan_xco_end
266    if $S0 == '' goto err_missing_bracket
267    if $S0 != ',' goto err_digit
268    inc pos
269    goto scan_xco_char
270  scan_xco_end:
271    pos += isbracketed
272  succeed:
273    mob.'!make'(literal)
274    mob.'to'(pos)
275  fail:
276    .return (mob)
277
278  err_unicode_name:
279    $S0 = concat "Unrecognized character name ", $S0
280    'parse_error'(mob, pos, $S0)
281  err_missing_bracket:
282    'parse_error'(mob, pos, "Missing close bracket for \\x[...], \\o[...], or \\c[...]")
283  err_digit:
284    'parse_error'(mob, pos, "Invalid digit in \\x[...], \\o[...], or \\c[...]")
285.end
286
287
288=item C<onload()>
289
290Initializes the Perl6Regex parser and other data structures
291needed for compiling regexes.
292
293=cut
294
295.include 'cclass.pasm'
296
297.namespace [ 'PGE';'Perl6Regex' ]
298
299.sub '__onload' :load
300    .local pmc p6meta
301    p6meta = new 'P6metaclass'
302    p6meta.'new_class'('PGE::Exp::WS', 'parent'=>'PGE::Exp::Subrule')
303    p6meta.'new_class'('PGE::Exp::Alias', 'parent'=>'PGE::Exp')
304
305    .local pmc optable
306    optable = new ['PGE';'OPTable']
307    set_global '$optable', optable
308
309    $P0 = get_global 'parse_term'
310    optable.'newtok'('term:',    'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0)
311
312    $P0 = get_global 'parse_term_ws'
313    optable.'newtok'('term:#',   'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
314
315    $P0 = get_global 'parse_term_backslash'
316    optable.'newtok'("term:\\",  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
317
318    optable.'newtok'('term:^',   'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
319    optable.'newtok'('term:^^',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
320    optable.'newtok'('term:$$',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
321    optable.'newtok'('term:\b',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
322    optable.'newtok'('term:\B',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
323    optable.'newtok'('term:<<',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
324    optable.'newtok'('term:>>',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
325    optable.'newtok'('term:<?>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
326    optable.'newtok'('term:<!>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
327    optable.'newtok'(unicode:"term:\xab", 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
328    optable.'newtok'(unicode:"term:\xbb", 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
329
330    optable.'newtok'('term:.',   'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
331    optable.'newtok'('term:\d',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
332    optable.'newtok'('term:\D',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
333    optable.'newtok'('term:\s',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
334    optable.'newtok'('term:\S',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
335    optable.'newtok'('term:\w',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
336    optable.'newtok'('term:\W',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
337    optable.'newtok'('term:\N',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
338    optable.'newtok'('term:\n',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Newline')
339
340    $P0 = get_global 'parse_dollar'
341    optable.'newtok'('term:$',   'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
342
343    $P0 = get_global 'parse_subrule'
344    optable.'newtok'('term:<',   'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
345    optable.'newtok'('term:<?',  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
346    optable.'newtok'('term:<!',  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
347    optable.'newtok'('term:<.',  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
348
349    $P0 = get_global 'parse_enumcharclass'
350    optable.'newtok'('term:<[',  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
351    optable.'newtok'('term:<+', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
352    optable.'newtok'('term:<-', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
353    optable.'newtok'('term:<![', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
354
355    $P0 = get_global 'parse_quoted_literal'
356    optable.'newtok'("term:'",  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
357
358    $P0 = get_global 'parse_goal'
359    optable.'newtok'('term:~', 'equiv'=>'term:', 'parsed'=>$P0)
360
361    optable.'newtok'('term:::',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
362    optable.'newtok'('term::::', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
363    optable.'newtok'('term:<cut>',    'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
364    optable.'newtok'('term:<commit>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
365
366    $P0 = get_global 'parse_closure'
367    optable.'newtok'("term:{{",   'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
368    optable.'newtok'("term:<?{{", 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
369    optable.'newtok'("term:<!{{", 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
370
371    $P0 = get_global 'parse_action'
372    optable.'newtok'("term:{*}",      'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
373
374
375    optable.'newtok'('circumfix:[ ]', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Group')
376    optable.'newtok'('circumfix:( )', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CGroup')
377
378    $P0 = get_global 'parse_quant'
379    optable.'newtok'('postfix:*',  'looser'=>'term:', 'parsed'=>$P0)
380    optable.'newtok'('postfix:+',  'equiv'=>'postfix:*', 'parsed'=>$P0)
381    optable.'newtok'('postfix:?',  'equiv'=>'postfix:*', 'parsed'=>$P0)
382    optable.'newtok'('postfix::',  'equiv'=>'postfix:*', 'parsed'=>$P0)
383    optable.'newtok'('postfix:**', 'equiv'=>'postfix:*', 'parsed'=>$P0)
384    $P0 = get_global 'parse_quant_error'
385    optable.'newtok'('term:*', 'equiv'=>'term:', 'parsed'=>$P0)
386    optable.'newtok'('term:+', 'equiv'=>'term:', 'parsed'=>$P0)
387    optable.'newtok'('term:?', 'equiv'=>'term:', 'parsed'=>$P0)
388
389    optable.'newtok'('infix:',   'looser'=>'postfix:*', 'assoc'=>'list', 'nows'=>1, 'match'=>'PGE::Exp::Concat')
390    optable.'newtok'('infix:&',  'looser'=>'infix:', 'nows'=>1, 'match'=>'PGE::Exp::Conj')
391    optable.'newtok'('infix:|',  'looser'=>'infix:&', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
392    optable.'newtok'('prefix:|', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
393    optable.'newtok'('infix:||', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
394    optable.'newtok'('prefix:||', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
395
396    optable.'newtok'('infix::=', 'tighter'=>'infix:', 'assoc'=>'right', 'match'=>'PGE::Exp::Alias')
397    optable.'newtok'('infix:=', 'tighter'=>'infix:', 'assoc'=>'right', 'match'=>'PGE::Exp::Alias')
398
399    $P0 = get_global 'parse_modifier'
400    optable.'newtok'('prefix::', 'looser'=>'infix:|', 'parsed'=>$P0)
401
402    optable.'newtok'('close:}',  'precedence'=>'<', 'nows'=>1)
403
404    .local pmc esclist
405    esclist = new 'Hash'
406    set_global '%esclist', esclist
407    esclist['e'] = "\e"
408    esclist['f'] = "\f"
409    esclist['r'] = "\r"
410    esclist['t'] = "\t"
411    esclist['v'] = unicode:"\x0a\x0b\x0c\x0d\x85\u2028\u2029"
412    esclist['h'] = unicode:"\x09\x20\xa0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"
413    esclist['n'] = unicode:"\x0a\x0d\x0c\x85\u2028\u2029"
414    # See http://www.unicode.org/Public/UNIDATA/PropList.txt for above
415
416    # Create and store closure preprocessors in %closure_pp
417    $P0 = new 'Hash'
418    set_hll_global ['PGE';'Perl6Regex'], '%closure_pp', $P0
419    $P1 = get_hll_global ['PGE';'Perl6Regex'], 'PIR_closure'
420    $P0["PIR"] = $P1
421
422    # Create an array for holding stop tokens
423    $P0 = new 'ResizablePMCArray'
424    set_hll_global ['PGE';'Perl6Regex'], '@!stopstack', $P0
425
426    $P0 = get_global 'compile_perl6regex'
427    compreg 'PGE::Perl6Regex', $P0
428    .return ()
429.end
430
431
432=item C<parse_term(PMC mob [, PMC adverbs :slurpy :named])>
433
434Parses literal strings and whitespace.
435Return a failed match if the stoptoken is found.
436
437=cut
438
439.sub 'parse_term'
440    .param pmc mob
441    .param pmc adverbs         :slurpy :named
442
443    .local string target
444    .local int pos, lastpos
445    $P0 = getattribute mob, '$.target'
446    target = $P0
447    $P0 = getattribute mob, '$.pos'
448    pos = $P0
449    lastpos = length target
450
451    .local string stop
452    $P0 = get_hll_global ['PGE';'Perl6Regex'], '@!stopstack'
453    stop = $P0[-1]
454
455    $I0 = is_cclass .CCLASS_WHITESPACE, target, pos
456    if $I0 goto term_ws
457    $I0 = length stop
458    if $I0 == 0 goto not_stop
459    $S0 = substr target, pos, $I0
460    if $S0 == stop goto end_noterm
461  not_stop:
462    ##   find length of word character sequence
463    .local int litlen
464    $I0 = find_not_cclass .CCLASS_WORD, target, pos, lastpos
465    litlen = $I0 - pos
466
467    ##   if we didn't find any, return no term
468    if litlen == 0 goto end_noterm
469
470    ##   for multi-char unquoted literals, leave the last character
471    ##   in case it's quantified (it gets processed as a subsequent term)
472    if litlen < 2 goto term_literal
473    dec litlen
474  term_literal:
475    $S0 = substr target, pos, litlen
476    pos += litlen
477    mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
478    mob.'!make'($S0)
479    mob.'to'(pos)
480    .return (mob)
481
482  term_ws:
483    .tailcall 'parse_term_ws'(mob)
484
485  end_noterm:
486    $S0 = substr target, pos, 1
487    if $S0 == ':' goto err_cut
488    (mob) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
489    .return (mob)
490  err_cut:
491    'parse_error'(mob, pos, 'Quantifier follows nothing in regex')
492    .return (mob)
493.end
494
495
496=item C<parse_term_backslash(mob [, adverbs :slurpy :named])>
497
498Parses terms beginning with backslash.
499
500=cut
501
502.sub 'parse_term_backslash'
503    .param pmc mob
504    .param pmc adverbs         :slurpy :named
505
506    .local string target
507    .local int pos, lastpos, isnegated
508    $P0 = getattribute mob, '$.target'
509    target = $P0
510    $P0 = getattribute mob, '$.pos'
511    pos = $P0
512    lastpos = length target
513    isnegated = 0
514
515    .local string backchar, charlist
516    ##  get whatever follows the backslash
517    backchar = substr target, pos, 1
518    charlist = backchar
519    inc pos
520
521    ##  if it's not a word character, it's a quoted metachar
522    $I0 = is_cclass .CCLASS_WORD, backchar, 0
523    unless $I0 goto term_literal
524
525    ##  if it's a word character, it may be negated
526    isnegated = is_cclass .CCLASS_UPPERCASE, backchar, 0
527    ##  $S0 = downcase charlist
528            $I0 = ord backchar
529            $S0 = chr $I0
530            backchar = downcase $S0
531
532    ##  if it's \x, \c, or \o, parse as string escape
533    $I0 = index 'xco', backchar
534    if $I0 < 0 goto meta_esclist
535  meta_xco:
536    $I0 = pos - 2
537    $P0 = 'p6escapes'(mob, 'pos' => $I0)
538    unless $P0 goto err_xcoparse
539    pos = $P0.'to'()
540    charlist = $P0.'ast'()
541    unless isnegated goto term_literal
542    $I0 = length charlist
543    if $I0 > 1 goto err_negated_brackets
544    goto term_charlist
545
546  meta_esclist:
547    $P0 = get_global '%esclist'
548    $I0 = exists $P0[backchar]
549    unless $I0 goto err_reserved_metachar
550    charlist = $P0[backchar]
551    if isnegated goto term_charlist
552    $I0 = length charlist
553    if $I0 > 1 goto term_charlist
554
555  term_literal:
556    mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
557    mob.'!make'(charlist)
558    mob.'to'(pos)
559    .return (mob)
560
561  term_charlist:
562    mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::EnumCharList')
563    mob.'!make'(charlist)
564    mob['isnegated'] = isnegated
565    mob.'to'(pos)
566    .return (mob)
567
568  err_xcoparse:
569    parse_error(mob, pos, 'Unable to parse \x, \c, or \o value')
570  err_negated_brackets:
571    pos = mob.'from'()
572    parse_error(mob, pos, 'Cannot use comma in \\X[...] or \\O[...]')
573  err_reserved_metachar:
574    parse_error(mob, pos, 'Alphanumeric metacharacters are reserved')
575.end
576
577
578=item C<parse_term_ws(PMC mob)>
579
580Parses a whitespace term.
581
582=cut
583
584.sub 'parse_term_ws'
585    .param pmc mob
586    .param pmc adverbs         :slurpy :named
587
588    .local string target
589    .local int pos, lastpos
590    (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::WS')
591    lastpos = length target
592
593  term_ws_loop:
594    ##   scan for the next non-whitespace character
595    pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
596    $S0 = substr target, pos, 1
597    if $S0 != '#' goto end
598    ##   we have a #-comment, determine its closing delimiter
599    inc pos
600    .local string closedelim
601    closedelim = "\n"
602    $S0 = substr target, pos, 1
603    $I0 = index '<[{(', $S0
604    if $I0 < 0 goto term_ws_loop_1
605    closedelim = substr '>]})', $I0, 1
606  term_ws_loop_1:
607    $I0 = index target, closedelim, pos
608    pos = $I0 + 1
609    if pos > 0 goto term_ws_loop
610    pos = lastpos
611  end:
612    mob.'to'(pos)
613    .return (mob)
614.end
615
616
617=item C<parse_quant(PMC mob)>
618
619Parses a quantifier, such as *, +, ?, :, and all of their wondrous
620combinations.
621
622=cut
623
624.sub 'parse_quant'
625    .param pmc mob
626    .param pmc adverbs         :slurpy :named
627
628    .local string target
629    .local pmc key
630    .local int pos, lastpos
631    key = mob['KEY']
632    (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Quant')
633    lastpos = length target
634
635    .local int min, max, suffixpos, sepws
636    .local string suffix
637    min = 1
638    max = 1
639    sepws = is_cclass .CCLASS_WHITESPACE, target, pos
640    suffixpos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
641
642    if key == '**' goto quant_suffix
643    if key == ':' goto quant_cut
644    if key == '+' goto quant_max
645    ##  quantifier is '?' or '*'
646    min = 0
647  quant_max:
648    if key == '?' goto quant_suffix
649    ##  quantifier is '+' or '*'
650    max = PGE_INF
651    goto quant_suffix
652
653  quant_cut:
654    #   The postfix:<:> operator may bring us here when it's really a
655    #   term:<::> term.  So, we check for that here and fail this match
656    #   if we really have a cut term.
657    if key != ':' goto quant_suffix
658    $S0 = substr target, pos, 1
659    if $S0 == ':' goto end
660    mob['backtrack'] = PGE_BACKTRACK_NONE
661
662  quant_suffix:
663    suffix = substr target, suffixpos, 2
664    if suffix == ':?' goto quant_eager
665    if suffix == ':!' goto quant_greedy
666  quant_suffix_1:
667    suffix = substr target, suffixpos, 1
668    if suffix == '?' goto quant_eager
669    if suffix == '!' goto quant_greedy
670    if suffix != ':' goto quant
671  quant_none:
672    mob['backtrack'] = PGE_BACKTRACK_NONE
673    goto quant_skip_suffix
674  quant_eager:
675    mob['backtrack'] = PGE_BACKTRACK_EAGER
676    goto quant_skip_suffix
677  quant_greedy:
678    mob['backtrack'] = PGE_BACKTRACK_GREEDY
679  quant_skip_suffix:
680    $I0 = length suffix
681    pos = suffixpos + $I0
682
683  quant:
684    if key != '**' goto quant_set
685  quant_closure:
686    $I0 = is_cclass .CCLASS_WHITESPACE, target, pos
687    sepws |= $I0
688    pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
689    .local int isconst
690    isconst = is_cclass .CCLASS_NUMERIC, target, pos
691    if isconst goto brace_skip
692    $S0 = substr target, pos, 1
693    if $S0 != "{" goto parse_repetition_controller
694    inc pos
695  brace_skip:
696    $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
697    if $I1 <= pos goto err_closure
698    $S0 = substr target, pos
699    min = $S0
700    max = $S0
701    pos = $I1
702    $S0 = substr target, pos, 2
703    if $S0 != '..' goto quant_closure_end
704    pos += 2
705    max = PGE_INF
706    $S0 = substr target, pos, 1
707    if $S0 != '*' goto quant_range_end
708    inc pos
709    goto quant_closure_end
710  quant_range_end:
711    $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
712    if $I1 <= pos goto err_closure
713    $S0 = substr target, pos
714    max = $S0
715    pos = $I1
716  quant_closure_end:
717    if isconst goto brace_skip2
718    $S0 = substr target, pos, 1
719    if $S0 != "}" goto err_closure
720    inc pos
721  brace_skip2:
722    suffixpos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
723
724  quant_set:
725    mob['min'] = min
726    mob['max'] = max
727    mob.'to'(pos)
728  end:
729    .return (mob)
730
731  parse_repetition_controller:
732    .local pmc regex, repetition_controller
733    mob.'to'(pos)
734    regex = get_global 'regex'
735    #parse everything down to concatenation precedence
736    repetition_controller = regex(mob, 'tighter'=>'infix:')
737    unless repetition_controller goto err_repetition_controller
738
739    #update pos to after the matched
740    pos = repetition_controller.'to'()
741    repetition_controller = repetition_controller['expr']
742
743    # if there's surrounding ws, then add WS nodes
744    unless sepws goto sepws_done
745    $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat')
746    $P0.'to'(pos)
747    $P1 = mob.'new'(mob, 'grammar'=>'PGE::Exp::WS')
748    $P1.'to'(pos)
749    push $P0, $P1
750    push $P0, repetition_controller
751    $P1 = mob.'new'(mob, 'grammar'=>'PGE::Exp::WS')
752    $P1.'to'(pos)
753    push $P0, $P1
754    repetition_controller = $P0
755  sepws_done:
756
757    #save the matched in the mob as sep
758    mob['sep'] = repetition_controller
759
760    #force the match to be 1..Inf
761    mob['min'] = 1
762    mob['max'] = PGE_INF
763
764    #move position to after the matched
765    mob.'to'(pos)
766    .return (mob)
767
768  err_repetition_controller:
769    'parse_error'(mob, pos, "Error in repetition controller")
770  err_closure:
771    'parse_error'(mob, pos, "Error in closure quantifier")
772.end
773
774
775=item C<parse_quant_error(mob)>
776
777Throw an exception for quantifiers in term position.
778
779=cut
780
781.sub 'parse_quant_error'
782    .param pmc mob
783    .param pmc adverbs         :slurpy :named
784
785    .local int pos
786    pos = mob.'to'()
787    'parse_error'(mob, pos, "Quantifier follows nothing in regex")
788.end
789
790
791=item C<parse_dollar(PMC mob)>
792
793Parse things that begin with a dollar sign, such as scalars,
794anchors, and match subscripts.
795
796=cut
797
798.sub "parse_dollar"
799    .param pmc mob
800    .param pmc adverbs         :slurpy :named
801
802    .local string target
803    .local int pos, lastpos
804    .local string cname
805    $P0 = getattribute mob, '$.target'
806    target = $P0
807    $P0 = getattribute mob, '$.pos'
808    pos = $P0
809    lastpos = length target
810    $S0 = substr target, pos, 1
811    if $S0 == '<' goto name
812    $I0 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
813    if $I0 > pos goto numeric
814    $I0 = find_not_cclass .CCLASS_WORD, target, pos, lastpos
815    if $I0 > pos goto scalar
816
817  eos_anchor:
818    mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Anchor')
819    mob.'to'(pos)
820    .return (mob)
821
822  scalar:
823    mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar')
824    dec pos
825    $I1 = $I0 - pos
826    cname = substr target, pos, $I1
827    cname = concat '"', cname
828    cname = concat cname, '"'
829    mob["cname"] = cname
830    mob.'to'($I0)
831    .return (mob)
832
833  numeric:
834    mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar')
835    $I1 = $I0 - pos
836    cname = substr target, pos, $I1
837    mob["cname"] = cname
838    mob.'to'($I0)
839    .return (mob)
840
841  name:
842    inc pos
843    mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar')
844    $I0 = index target, ">", pos
845    if $I0 < pos goto err_close
846  name_1:
847    $I1 = $I0 - pos
848    cname = substr target, pos, $I1
849    cname = escape cname
850    cname = concat '"', cname
851    cname = concat cname, '"'
852    mob["cname"] = cname
853    pos = $I0 + 1
854    mob.'to'(pos)
855    .return (mob)
856
857  err_close:
858    parse_error(mob, pos, "Missing close '>' in scalar")
859    .return (mob)
860.end
861
862
863=item C<parse_subname(STR target, INT pos)>
864
865Scan C<target> starting at C<pos> looking for a subrule name
866(following Perl 6's identifier syntax).  Returns any subrule
867name found, and the ending position of the name.
868
869=cut
870
871
872.sub 'parse_subname'
873    .param string target
874    .param int pos
875    .local int startpos, targetlen
876
877    targetlen = length target
878    startpos = pos
879    $I0 = pos
880  loop:
881    $I1 = find_not_cclass .CCLASS_WORD, target, $I0, targetlen
882    if $I1 == $I0 goto end
883    pos = $I1
884    $S0 = substr target, pos, 2
885    if $S0 != '::' goto end
886    $I0 = pos + 2
887    goto loop
888  end:
889    $I0 = pos - startpos
890    $S0 = substr target, startpos, $I0
891    .return ($S0, pos)
892.end
893
894
895=item C<parse_subrule(PMC mob)>
896
897Parses a subrule token.
898
899=cut
900
901.sub 'parse_subrule'
902    .param pmc mob
903    .param pmc adverbs         :slurpy :named
904
905    .local string target
906    .local pmc mobsave
907    .local int pos, lastpos
908    .local string key
909    key = mob['KEY']
910    mobsave = mob
911    (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Subrule')
912    lastpos = length target
913
914    ##  default to non-capturing rule
915    .local int iscapture
916    iscapture = 0
917
918    ##  see what type of subrule this is
919    if key == '<.' goto scan_subname
920    if key == '<?' goto zerowidth
921    if key == '<!' goto negated
922
923    ##  capturing subrule, get its name/alias
924    iscapture = 1
925    .local string subname, cname
926    (subname, pos) = 'parse_subname'(target, pos)
927    cname = subname
928    $S0 = substr target, pos, 1
929    unless $S0 == '=' goto subrule_arg
930    ##  aliased subrule, skip the '=' and get the real name
931    inc pos
932    goto scan_subname
933
934  negated:
935    mob['isnegated'] = 1
936  zerowidth:
937    mob['iszerowidth'] = 1
938
939  scan_subname:
940    (subname, pos) = 'parse_subname'(target, pos)
941
942  subrule_arg:
943    mob['subname'] = subname
944    $S0 = substr target, pos, 1
945    if $S0 == ':' goto subrule_text_arg
946    if $S0 != ' ' goto subrule_end
947  subrule_pattern_arg:
948    inc pos
949    mob.'to'(pos)
950    .local pmc regex
951    regex = get_global 'regex'
952    $P1 = regex(mob, 'stop'=>'>')
953    unless $P1 goto end
954    $S0 = $P1
955    mob['arg'] = $S0
956    pos = $P1.'to'()
957    mob.'to'(-1)
958    goto subrule_end
959  subrule_text_arg:
960    $I0 = pos + 1
961    pos = find_not_cclass .CCLASS_WHITESPACE, target, $I0, lastpos
962    if pos == $I0 goto end
963    if pos >= lastpos goto end
964    .local string textarg, closedelim
965    textarg = ''
966    closedelim = '>'
967    $S0 = substr target, pos, 1
968    if $S0 == '"' goto subrule_text_quote
969    if $S0 != "'" goto subrule_text_loop
970  subrule_text_quote:
971    closedelim = $S0
972    inc pos
973  subrule_text_loop:
974    if pos >= lastpos goto end
975    $S0 = substr target, pos, 1
976    if $S0 == closedelim goto subrule_text_end
977    if $S0 != "\\" goto subrule_text_add
978    inc pos
979    $S0 = substr target, pos, 1
980    if $S0 == closedelim goto subrule_text_add
981    if $S0 == "\\" goto subrule_text_add
982    textarg .= "\\"
983  subrule_text_add:
984    textarg .= $S0
985    inc pos
986    goto subrule_text_loop
987  subrule_text_end:
988    mob['arg'] = textarg
989    if closedelim == '>' goto subrule_end
990    inc pos
991    pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
992  subrule_end:
993    $S0 = substr target, pos, 1
994    if $S0 != '>' goto end
995    inc pos
996    mob.'to'(pos)
997    mob['iscapture'] = iscapture
998    unless iscapture goto end
999    $S0 = escape cname
1000    $S0 = concat '"', $S0
1001    $S0 = concat $S0, '"'
1002    mob['cname'] = $S0
1003  end:
1004    .return (mob)
1005.end
1006
1007
1008=item C<parse_enumcharclass(PMC mob)>
1009
1010Extract an enumerated character list.
1011
1012=cut
1013
1014.sub 'parse_enumcharclass'
1015    .param pmc mob
1016    .param pmc adverbs         :slurpy :named
1017
1018    .local string target
1019    .local pmc term
1020    .local string op
1021    .local int pos, lastpos
1022
1023    $P0 = getattribute mob, '$.target'
1024    target = $P0
1025    pos = mob.'to'()
1026    lastpos = length target
1027    op = mob['KEY']
1028
1029    ##   handle the case of <[, <+[, <-[, and <![ as the token
1030    ##   by converting to <, <+, <-, or <!
1031    $S0 = substr op, -1, 1
1032    if $S0 != '[' goto parse_loop
1033    op = chopn op, 1
1034    goto enum
1035
1036  parse_loop:
1037    pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
1038    if pos >= lastpos goto err_close
1039    $S0 = substr target, pos, 1
1040    if $S0 != '[' goto subrule
1041    inc pos
1042
1043  enum:
1044    .local string charlist
1045    .local int isrange
1046    charlist = ''
1047    isrange = 0
1048
1049  enum_loop:
1050    ##   skip leading whitespace and get next character
1051    pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
1052    if pos >= lastpos goto err_close
1053    $S0 = substr target, pos, 1
1054    if $S0 == ']' goto enum_close
1055    if $S0 == '-' goto err_hyphen
1056    if $S0 == '.' goto enum_dotrange
1057    if $S0 != "\\" goto enum_addchar
1058  enum_backslash:
1059    inc pos
1060    ##   get escaped character
1061    $S0 = substr target, pos, 1
1062    ##   handle metas such as \n, \t, \r, etc.
1063    $I0 = index 'nrtfae0xco', $S0
1064    if $I0 == -1 goto enum_addchar
1065    if $I0 >= 7 goto enum_xco
1066    $S0 = substr "\n\r\t\f\a\e\0", $I0, 1
1067    goto enum_addchar
1068  enum_xco:
1069    $I0 = pos - 1
1070    $P0 = 'p6escapes'(mob, 'pos'=>$I0)
1071    $S0 = $P0.'ast'()
1072    pos = $P0.'to'()
1073    goto enum_addchar_1
1074  enum_addchar:
1075    inc pos
1076  enum_addchar_1:
1077    if isrange goto enum_addrange
1078    charlist .= $S0
1079    goto enum_loop
1080  enum_dotrange:
1081    ##   check if we have a .. range marker
1082    if isrange goto enum_addrange
1083    $S1 = substr target, pos, 2
1084    if $S1 != '..' goto enum_addchar
1085    pos += 2
1086    isrange = 1
1087    goto enum_loop
1088  enum_addrange:
1089    ##   add character range to charlist
1090    isrange = 0
1091    $I2 = ord charlist, -1
1092    $I0 = ord $S0
1093  enum_addrange_1:
1094    inc $I2
1095    if $I2 > $I0 goto enum_loop
1096    $S1 = chr $I2
1097    charlist .= $S1
1098    goto enum_addrange_1
1099  enum_close:
1100    inc pos
1101    ##   create a node for the charlist
1102    term = mob.'new'(mob, 'grammar'=>'PGE::Exp::EnumCharList')
1103    term.'to'(pos)
1104    term.'!make'(charlist)
1105    goto combine
1106
1107  subrule:
1108    $I0 = pos
1109    .local string subname
1110    (subname, pos) = 'parse_subname'(target, $I0)
1111    if pos == $I0 goto err
1112    term = mob.'new'(mob, 'grammar'=>'PGE::Exp::Subrule')
1113    term.'from'($I0)
1114    term.'to'(pos)
1115    term['subname'] = subname
1116    term['iscapture'] = 0
1117
1118  combine:
1119    ##   find out what operator preceded this term
1120    if op == '+' goto combine_plus
1121    if op == '-' goto combine_minus
1122    if op == '<' goto combine_init
1123    if op == '<+' goto combine_init
1124    ##   token was '<-' or '<!'
1125    term['isnegated'] = 1
1126    term['iszerowidth'] = 1
1127    if op == '<!' goto combine_init
1128    ##   token is '<-', we need to match a char by concat dot
1129    $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::CCShortcut')
1130    $P0.'to'(pos)
1131    $P0.'!make'('.')
1132    mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat')
1133    mob.'to'(pos)
1134    mob[0] = term
1135    mob[1] = $P0
1136    goto next_op
1137
1138  combine_init:
1139    mob = term
1140    goto next_op
1141
1142  combine_plus:
1143    ##   <a+b>  ==>   <a> | <b>
1144    $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::Alt')
1145    $P0.'to'(pos)
1146    $P0[0] = mob
1147    $P0[1] = term
1148    mob = $P0
1149    goto next_op
1150
1151  combine_minus:
1152    ##   <a-b> ==>   <!b> <a>
1153    term['isnegated'] = 1
1154    term['iszerowidth'] = 1
1155    $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat')
1156    $P0.'to'(pos)
1157    $P0[0] = term
1158    $P0[1] = mob
1159    mob = $P0
1160    goto next_op
1161
1162  next_op:
1163    pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
1164    if pos >= lastpos goto err_close
1165
1166    op = substr target, pos, 1
1167    inc pos
1168    if op == '+' goto parse_loop
1169    if op == '-' goto parse_loop
1170    if op != '>' goto err
1171    mob.'to'(pos)
1172    goto end
1173
1174  err:
1175    parse_error(mob, pos, "Error parsing enumerated character class")
1176    goto end
1177  err_hyphen:
1178    parse_error(mob, pos, "Unescaped '-' in charlist (use '..' or '\\-')")
1179    goto end
1180  err_close:
1181    parse_error(mob, pos, "Missing close '>' or ']>' in enumerated character class")
1182  end:
1183    .return (mob)
1184.end
1185
1186
1187=item C<parse_quoted_literal>
1188
1189Parses '...' literals.
1190
1191=cut
1192
1193.sub 'parse_quoted_literal'
1194    .param pmc mob
1195    .param pmc adverbs         :slurpy :named
1196
1197    .local int pos, lastpos
1198    .local string target
1199    (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
1200    lastpos = length target
1201    lastpos -= 1
1202    .local string lit
1203    lit = ''
1204  literal_iter:
1205    if pos > lastpos goto literal_error
1206    $S0 = substr target, pos, 1
1207    if $S0 == "'" goto literal_end
1208    if $S0 != "\\" goto literal_add
1209    inc pos
1210    $S0 = substr target, pos, 1
1211  literal_add:
1212    inc pos
1213    lit .= $S0
1214    goto literal_iter
1215  literal_end:
1216    inc pos
1217    mob.'!make'(lit)
1218    mob.'to'(pos)
1219    .return (mob)
1220  literal_error:
1221    parse_error(mob, pos, "No closing ' in quoted literal")
1222    .return (mob)
1223.end
1224
1225
1226=item C<parse_goal>
1227
1228Parse a goal.
1229
1230=cut
1231
1232.sub 'parse_goal'
1233    .param pmc mob
1234    .param pmc adverbs         :slurpy :named
1235
1236    .local int pos, lastpos
1237    .local string target
1238    (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat')
1239    lastpos = length target
1240    ##  skip any leading whitespace before goal
1241    pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
1242    .local pmc regex, goal, expr, alt, failsub
1243    regex = get_global 'regex'
1244    ##  parse the goal, down to concatenation precedence
1245    mob.'to'(pos)
1246    goal = regex(mob, 'tighter'=>'infix:')
1247    unless goal goto fail_goal
1248    pos = goal.'to'()
1249    goal = goal['expr']
1250    ##  skip any leading whitespace before expression
1251    pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
1252    ##  parse the goal, down to concatenation precedence
1253    mob.'to'(pos)
1254    expr = regex(mob, 'tighter'=>'infix:')
1255    unless expr goto fail_expr
1256    pos = expr.'to'()
1257    expr = expr['expr']
1258    mob.'to'(pos)
1259    failsub = mob.'new'(mob, 'grammar'=>'PGE::Exp::Subrule')
1260    failsub.'to'(pos)
1261    failsub['subname'] = 'FAILGOAL'
1262    $S0 = goal.'Str'()
1263    failsub['arg'] = $S0
1264    alt = mob.'new'(mob, 'grammar'=>'PGE::Exp::Alt')
1265    alt.'to'(pos)
1266    push alt, goal
1267    push alt, failsub
1268    push mob, expr
1269    push mob, alt
1270    .return (mob)
1271  fail_goal:
1272    'parse_error'(mob, pos, 'Unable to parse goal after ~')
1273  fail_expr:
1274    'parse_error'(mob, pos, 'Unable to parse expression after ~')
1275.end
1276
1277
1278=item C<parse_modifier>
1279
1280Parse a modifier.
1281
1282=cut
1283
1284.sub 'parse_modifier'
1285    .param pmc mob
1286    .param pmc adverbs         :slurpy :named
1287
1288    .local int pos, lastpos
1289    .local string target, value
1290    .local string key
1291    key = mob['KEY']
1292    (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Modifier')
1293    lastpos = length target
1294    value = "1"
1295    $I0 = pos
1296    pos = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
1297    if pos == $I0 goto name
1298    $I1 = pos - $I0
1299    value = substr target, $I0, $I1
1300    $I0 = pos
1301  name:
1302    pos = find_not_cclass .CCLASS_WORD, target, pos, lastpos
1303    $I1 = pos - $I0
1304    if $I1 == 0 goto fail
1305    $S0 = substr target, $I0, $I1
1306    mob['key'] = $S0
1307    mob.'!make'(value)
1308    $S0 = substr target, pos, 1
1309    if $S0 != '(' goto end
1310    $I0 = pos + 1
1311    pos = index target, ')', pos
1312    $I1 = pos - $I0
1313    $S0 = substr target, $I0, $I1
1314    mob.'!make'($S0)
1315    inc pos
1316  end:
1317    ### XXX pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
1318    mob.'to'(pos)
1319    .return (mob)
1320  fail:
1321    .return (mob)
1322.end
1323
1324
1325.sub 'parse_closure'
1326    .param pmc mob
1327    .param pmc adverbs         :slurpy :named
1328
1329    .local pmc key
1330    key = mob['KEY']
1331    .local string target
1332    .local int pos, len
1333    (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Closure')
1334    len = 2
1335  init:
1336    $S0 = substr target, pos, 1
1337    if $S0 != "{" goto body
1338    inc len
1339    inc pos
1340    goto init
1341  body:
1342    .local string close
1343    close = repeat "}", len
1344    if key == '<?{{' goto assert_pos
1345    if key == '<!{{' goto assert_neg
1346    goto have_close
1347  assert_neg:
1348    mob['isnegated'] = 1
1349  assert_pos:
1350    mob['iszerowidth'] = 1
1351    close = concat close, '>'
1352    inc len
1353  have_close:
1354    $I0 = index target, close, pos
1355    if $I0 < pos goto err_noclose
1356    $I1 = $I0 - pos
1357    $S1 = substr target, pos, $I1
1358    mob.'!make'($S1)
1359    pos = $I0 + len
1360    mob.'to'(pos)
1361    .return (mob)
1362 err_noclose:
1363    parse_error(mob, pos, "Missing closing braces for closure")
1364    .return (mob)
1365.end
1366
1367
1368.sub 'parse_action'
1369    .param pmc mob
1370    .param pmc adverbs         :slurpy :named
1371
1372    .local string target
1373    .local int pos, keypos
1374    (mob, pos, target) = mob.'new'(mob, 'grammar' => 'PGE::Exp::Action')
1375    keypos = index target, '#= ', pos
1376    if keypos < 0 goto end
1377    $I0 = find_cclass .CCLASS_NEWLINE, target, pos, keypos
1378    if $I0 < keypos goto end
1379    .local string actionkey
1380    keypos += 3
1381    $I0 -= keypos
1382    actionkey = substr target, keypos, $I0
1383    actionkey = 'trim'(actionkey)
1384    mob['actionkey'] = actionkey
1385  end:
1386    mob.'to'(pos)
1387    .return (mob)
1388.end
1389
1390
1391.sub 'parse_error'
1392    .param pmc mob
1393    .param int pos
1394    .param string message
1395    $P0 = getattribute mob, '$.pos'
1396    $P0 = pos
1397    $P0 = new 'Exception'
1398    $S0 = 'perl6regex parse error: '
1399    $S0 .= message
1400    $S0 .= ' at offset '
1401    $S1 = pos
1402    $S0 .= $S1
1403    $S0 .= ", found '"
1404    $P1 = getattribute mob, '$.target'
1405    $S1 = $P1
1406    $S1 = substr $S1, pos, 1
1407    $S0 .= $S1
1408    $S0 .= "'"
1409    $P0 = $S0
1410    throw $P0
1411    .return ()
1412.end
1413
1414
1415
1416
1417.namespace [ 'PGE';'Exp' ]
1418
1419.sub 'perl6exp' :method
1420    .param pmc pad
1421    .return (self)
1422.end
1423
1424
1425.namespace [ 'PGE';'Exp';'Literal' ]
1426
1427.sub 'perl6exp' :method
1428    .param pmc pad
1429    $I0 = pad['ignorecase']
1430    self['ignorecase'] = $I0
1431    .return (self)
1432.end
1433
1434
1435.namespace [ 'PGE';'Exp';'Concat' ]
1436
1437.sub 'perl6exp' :method
1438    .param pmc pad
1439
1440    .local pmc array, exp
1441    .local int i, j, n
1442    array = self.'list'()
1443    n = elements array
1444    i = 0
1445    j = 0
1446  iter_loop:
1447    if i >= n goto iter_end
1448    exp = self[i]
1449    inc i
1450    exp = exp.'perl6exp'(pad)
1451    if null exp goto iter_loop
1452    self[j] = exp
1453    inc j
1454    goto iter_loop
1455  iter_end:
1456    array = j
1457    if j > 1 goto end
1458    $P0 = array[0]
1459    .return ($P0)
1460  end:
1461    .return (self)
1462.end
1463
1464
1465.namespace [ 'PGE';'Exp';'Quant' ]
1466
1467.sub 'perl6exp' :method
1468    .param pmc pad
1469
1470    $I0 = exists self['backtrack']
1471    if $I0 goto backtrack_done
1472    self['backtrack'] = PGE_BACKTRACK_GREEDY
1473    $I0 = pad['ratchet']
1474    if $I0 == 0 goto backtrack_done
1475    self['backtrack'] = PGE_BACKTRACK_NONE
1476  backtrack_done:
1477
1478    .local pmc exp0, sep
1479    .local int isarray
1480    isarray = pad['isarray']
1481    pad['isarray'] = 1
1482    exp0 = self[0]
1483    $I0 = isa exp0, ['PGE';'Exp';'WS']
1484    if $I0 goto err_parse_quant
1485    exp0['isquant'] = 1
1486    exp0 = exp0.'perl6exp'(pad)
1487    self[0] = exp0
1488    sep = self['sep']
1489    if null sep goto sep_done
1490    sep = sep.'perl6exp'(pad)
1491    self['sep'] = sep
1492  sep_done:
1493    pad['isarray'] = isarray
1494    .return (self)
1495  err_parse_quant:
1496    $P0 = get_hll_global ['PGE';'Perl6Regex'], 'parse_quant_error'
1497    $P0(self)
1498    .return (self)
1499.end
1500
1501
1502.namespace [ 'PGE';'Exp';'Group' ]
1503
1504.sub 'perl6exp' :method
1505    .param pmc pad
1506    .local pmc exp0
1507
1508    $I0 = self['isquant']
1509    if $I0 goto backtrack_done
1510    $I0 = exists self['backtrack']
1511    if $I0 goto backtrack_done
1512    $I0 = pad['ratchet']
1513    if $I0 == 0 goto backtrack_done
1514    self['backtrack'] = PGE_BACKTRACK_NONE
1515  backtrack_done:
1516
1517    exp0 = self[0]
1518    exp0 = exp0.'perl6exp'(pad)
1519    self[0] = exp0
1520    .return (self)
1521.end
1522
1523
1524.namespace [ 'PGE';'Exp';'CGroup' ]
1525
1526.sub 'perl6exp' :method
1527    .param pmc pad
1528    .local pmc exp
1529
1530    $I0 = self['isquant']
1531    if $I0 goto backtrack_done
1532    $I0 = exists self['backtrack']
1533    if $I0 goto backtrack_done
1534    $I0 = pad['ratchet']
1535    if $I0 == 0 goto backtrack_done
1536    self['backtrack'] = PGE_BACKTRACK_NONE
1537  backtrack_done:
1538
1539    self['iscapture'] = 1
1540    $I0 = exists self['isscope']
1541    if $I0 goto set_cname
1542    self['isscope'] = 1
1543
1544  set_cname:
1545    $I0 = exists self['cname']
1546    if $I0 goto set_subpats
1547    $I0 = pad['subpats']
1548    self['cname'] = $I0
1549
1550  set_subpats:
1551    .local string cname
1552    cname = self['cname']
1553    $S0 = substr cname, 0, 1
1554    if $S0 == '"' goto set_lexicals
1555    $I0 = cname
1556    inc $I0
1557    pad['subpats'] = $I0
1558
1559  set_lexicals:
1560    .local int isarray
1561    isarray = 0
1562    .local pmc lexscope
1563    lexscope = pad['lexscope']
1564    $I0 = exists lexscope[cname]
1565    if $I0 == 0 goto set_lexicals_1
1566    $P0 = lexscope[cname]
1567    $P0['isarray'] = 1
1568    isarray = 1
1569  set_lexicals_1:
1570    lexscope[cname] = self
1571
1572    .local int padarray
1573    padarray = pad['isarray']
1574    isarray |= padarray
1575    self['isarray'] = isarray
1576    $I0 = self['isscope']
1577    if $I0 == 0 goto unscoped
1578
1579  scoped:
1580    .local int subpats
1581    subpats = pad['subpats']
1582    pad['subpats'] = 0
1583    pad['isarray'] = 0
1584    $P0 = new 'Hash'
1585    pad['lexscope'] = $P0
1586    exp = self[0]
1587    exp = exp.'perl6exp'(pad)
1588    self[0] = exp
1589    pad['lexscope'] = lexscope
1590    pad['isarray'] = padarray
1591    pad['subpats'] = subpats
1592    goto end
1593
1594  unscoped:
1595    exp = self[0]
1596    exp = exp.'perl6exp'(pad)
1597    self[0] = exp
1598  end:
1599    .return (self)
1600.end
1601
1602
1603.namespace [ 'PGE';'Exp';'Subrule' ]
1604
1605.sub 'perl6exp' :method
1606    .param pmc pad
1607
1608    $I0 = self['isquant']
1609    if $I0 goto backtrack_done
1610    $I0 = exists self['backtrack']
1611    if $I0 goto backtrack_done
1612    $I0 = pad['ratchet']
1613    if $I0 == 0 goto backtrack_done
1614    self['backtrack'] = PGE_BACKTRACK_NONE
1615  backtrack_done:
1616
1617    .local int iscapture, isarray
1618    .local pmc lexscope
1619    iscapture = self['iscapture']
1620    if iscapture == 0 goto end
1621    .local string cname
1622    cname = self['cname']
1623    isarray = pad['isarray']
1624    lexscope = pad['lexscope']
1625    $I0 = exists lexscope[cname]
1626    if $I0 == 0 goto lexscope_1
1627    $P0 = lexscope[cname]
1628    $P0['isarray'] = 1
1629    isarray = 1
1630  lexscope_1:
1631    lexscope[cname] = self
1632    self['isarray'] = isarray
1633  next_cname:
1634    $S0 = substr cname, 0, 1
1635    if $S0 == '"' goto end
1636    $I0 = cname
1637    inc $I0
1638    pad['subpats'] = $I0
1639  end:
1640    $S0 = pad['dba']
1641    self['dba'] = $S0
1642    .return (self)
1643.end
1644
1645
1646.namespace [ 'PGE';'Exp';'WS' ]
1647
1648.sub 'perl6exp' :method
1649    .param pmc pad
1650
1651    $I0 = pad['sigspace']
1652    if $I0 goto end
1653    null $P0
1654    .return ($P0)
1655  end:
1656    self['subname'] = 'ws'
1657    self['iscapture'] = 0
1658    $I0 = pad['ratchet']
1659    unless $I0 goto end_1
1660    self['backtrack'] = PGE_BACKTRACK_NONE
1661  end_1:
1662    .return (self)
1663.end
1664
1665
1666.namespace [ 'PGE';'Exp';'Alt' ]
1667
1668.sub 'perl6exp' :method
1669    .param pmc pad
1670
1671    .local pmc exp0, exp1
1672    exp0 = self[0]
1673    exp1 = self[1]
1674
1675    ##   if we only have one operand (prefix:|),
1676    ##   reduce and return it.
1677    $I0 = defined self[1]
1678    if $I0 goto with_rhs
1679    .tailcall exp0.'perl6exp'(pad)
1680  with_rhs:
1681
1682    ##   if lhs is whitespace, then this is a prefix-alt and
1683    ##   we ignore it (by simply returning its rhs)
1684    $I0 = isa exp0, ['PGE';'Exp';'WS']
1685    if $I0 == 0 goto with_lhs
1686    .tailcall exp1.'perl6exp'(pad)
1687  with_lhs:
1688
1689    .local pmc lexscope, savescope, it
1690    lexscope = pad['lexscope']
1691    savescope = new 'Hash'
1692    it = iter lexscope
1693  iter_loop:
1694    unless it goto iter_end
1695    $P1 = shift it
1696    $P2 = it[$P1]
1697    savescope[$P1] = $P2
1698    goto iter_loop
1699  iter_end:
1700    $I0 = pad['subpats']
1701    exp0 = exp0.'perl6exp'(pad)
1702    self[0] = exp0
1703
1704    $I1 = pad['subpats']
1705    pad['subpats'] = $I0
1706    pad['lexscope'] = savescope
1707    exp1 = exp1.'perl6exp'(pad)
1708    self[1] = exp1
1709    $I0 = pad['subpats']
1710    if $I0 >= $I1 goto end
1711    pad['subpats'] = $I1
1712  end:
1713    .return (self)
1714.end
1715
1716
1717.namespace [ 'PGE';'Exp';'Alias' ]
1718
1719.sub 'perl6exp' :method
1720    .param pmc pad
1721    .local string cname
1722    .local pmc exp0, exp1
1723
1724    exp0 = self[0]
1725    $I0 = isa exp0, ['PGE';'Exp';'Scalar']
1726    unless $I0 goto err_no_lvalue
1727
1728    cname = exp0['cname']
1729    exp1 = self[1]
1730
1731    ##   If we're aliasing a capture group or a quantified capture
1732    ##   group, then we just move the alias name to that group.
1733    ##   Otherwise, we need to create a capture group for this
1734    ##   alias and return that.
1735
1736    $I0 = isa exp1, ['PGE';'Exp';'CGroup']
1737    if $I0 == 1 goto make_alias
1738    $I0 = isa exp1, ['PGE';'Exp';'Subrule']
1739    if $I0 == 1 goto make_alias
1740    $I0 = isa exp1, ['PGE';'Exp';'Quant']
1741    if $I0 == 0 goto add_cgroup
1742    $P0 = exp1[0]
1743    $I0 = isa $P0, ['PGE';'Exp';'CGroup']
1744    if $I0 == 0 goto add_cgroup
1745    $P0['cname'] = cname
1746    goto end
1747
1748  add_cgroup:
1749    .local pmc cexp
1750    cexp = self.'new'(self, 'grammar'=>'PGE::Exp::CGroup')
1751    $I0 = self.'from'()
1752    cexp.'from'($I0)
1753    $I0 = self.'to'()
1754    cexp.'to'($I0)
1755    cexp[0] = exp1
1756    cexp['isscope'] = 0
1757    cexp['iscapture'] = 1
1758    cexp['cname'] = cname
1759    cexp = cexp.'perl6exp'(pad)
1760    .return (cexp)
1761
1762  make_alias:
1763    exp1['cname'] = cname
1764    exp1['iscapture'] = 1
1765  end:
1766    exp1 = exp1.'perl6exp'(pad)
1767    .return (exp1)
1768
1769  err_no_lvalue:
1770    $P0 = get_hll_global ['PGE';'Perl6Regex'], 'parse_error'
1771    $I0 = self.'from'()
1772    $P0(self, $I0, 'LHS of alias must be lvalue')
1773.end
1774
1775
1776.namespace [ 'PGE';'Exp';'Modifier' ]
1777
1778.sub 'perl6exp' :method
1779    .param pmc pad
1780    .local string key
1781    .local string value
1782    key = self['key']
1783    value = self.'ast'()
1784    if key == 'words' goto sigspace
1785    if key == 's' goto sigspace
1786    if key == 'w' goto sigspace
1787    if key == 'i' goto ignorecase
1788    goto setpad
1789  sigspace:
1790    key = 'sigspace'
1791    goto setpad
1792  ignorecase:
1793    key = 'ignorecase'
1794  setpad:
1795    $P0 = pad[key]
1796    pad[key] = value
1797    .local pmc exp
1798    exp = self[0]
1799    exp = exp.'perl6exp'(pad)
1800    self[0] = exp
1801    pad[key] = $P0
1802    .return (exp)
1803.end
1804
1805.namespace [ 'PGE';'Exp';'Conj' ]
1806
1807.sub 'perl6exp' :method
1808    .param pmc pad
1809    $P0 = self[0]
1810    $P0 = $P0.'perl6exp'(pad)
1811    self[0] = $P0
1812    $P1 = self[1]
1813    $P1 = $P1.'perl6exp'(pad)
1814    self[1] = $P1
1815    .return (self)
1816.end
1817
1818
1819.namespace [ 'PGE';'Exp';'Closure' ]
1820
1821.sub 'perl6exp' :method
1822    .param pmc pad
1823    .local string lang
1824    .local pmc closure_pp
1825    .local pmc closure_fn
1826    lang = pad['lang']
1827    self['lang'] = lang
1828    # see if we need to do any pre-processing of the closure
1829    closure_pp = get_hll_global ['PGE';'Perl6Regex'], '%closure_pp'
1830    $I0 = defined closure_pp[lang]
1831    if $I0 == 0 goto end
1832    closure_fn = closure_pp[lang]
1833    $S1 = self.'ast'()
1834    $S1 = closure_fn($S1)
1835    self.'!make'($S1)
1836  end:
1837    .return (self)
1838.end
1839
1840=back
1841
1842=head1 Functions
1843
1844=over 4
1845
1846=item C<PIR_closure(string code)>
1847
1848This helper function helps with :lang(PIR) closures in rules
1849by adding a ".sub" wrapper around the code if one isn't
1850already present.
1851
1852=back
1853
1854=cut
1855
1856.namespace [ 'PGE';'Perl6Regex' ]
1857
1858.sub 'PIR_closure'
1859    .param string code
1860    $I0 = index code, '.sub'
1861    if $I0 >= 0 goto end
1862    code = concat ".sub anon :anon\n.param pmc match\n", code
1863    code .= "\n.end\n"
1864  end:
1865    .return (code)
1866.end
1867
1868
1869.namespace [ 'PGE';'Exp';'Action' ]
1870
1871.sub 'perl6exp' :method
1872    .param pmc pad
1873    $S0 = pad['name']
1874    self['actionname'] = $S0
1875    .return (self)
1876.end
1877
1878
1879.namespace [ 'PGE';'Exp';'Cut' ]
1880
1881.sub 'perl6exp' :method
1882    .param pmc pad
1883    $S0 = self.'ast'()
1884    if $S0 == ':::' goto cut_rule
1885    if $S0 == '<commit>' goto cut_match
1886    self['cutmark'] = PGE_CUT_GROUP
1887    .return (self)
1888  cut_rule:
1889    self['cutmark'] = PGE_CUT_RULE
1890    .return (self)
1891  cut_match:
1892    self['cutmark'] = PGE_CUT_MATCH
1893    .return (self)
1894.end
1895
1896# Local Variables:
1897#   mode: pir
1898#   fill-column: 100
1899# End:
1900# vim: expandtab shiftwidth=4 ft=pir:
1901