1# Copyright (C) 2005-2009, Parrot Foundation.
2
3.namespace [ "PGE";"P5Regex" ]
4
5.sub "compile_p5regex"
6    .param pmc source
7    .param pmc adverbs         :slurpy :named
8
9    $I0 = exists adverbs['grammar']
10    if $I0 goto have_grammar
11    adverbs['grammar'] = 'PGE::Grammar'
12  have_grammar:
13
14    .local string target
15    target = adverbs['target']
16    target = downcase target
17
18    ##  If we're passed the results of a previous parse, use it.
19    .local pmc match
20    $I0 = isa source, ['PGE';'Match']
21    if $I0 == 0 goto parse
22    $P0 = source['expr']
23    if null $P0 goto parse
24    $I0 = isa $P0, ['PGE';'Exp']
25    if $I0 == 0 goto parse
26    match = source
27    goto analyze
28
29  parse:
30    $P0 = get_global "p5regex"
31    match = $P0(source, adverbs :flat :named)
32    if target != 'parse' goto check
33    .return (match)
34
35  check:
36    unless match goto check_1
37    $S0 = source
38    $S1 = match
39    if $S0 == $S1 goto analyze
40  check_1:
41    null $P0
42    .return ($P0)
43
44  analyze:
45    .local pmc exp, pad
46    exp = match['expr']
47    pad = new 'Hash'
48    pad['subpats'] = 0
49    exp = exp.'p5analyze'(pad)
50    .tailcall exp.'compile'(adverbs :flat :named)
51.end
52
53
54.sub "p5regex"
55    .param pmc mob
56    .param pmc adverbs        :slurpy :named
57
58    .local string stop, tighter
59    .local pmc stopstack, optable
60
61    stopstack = get_hll_global ['PGE';'P5Regex'], '@!stopstack'
62    optable = get_hll_global ["PGE";"P5Regex"], "$optable"
63
64    stop = adverbs['stop']
65    tighter = adverbs['tighter']
66    push stopstack, stop
67    $P0 = optable."parse"(mob, 'stop'=>stop, 'tighter'=>tighter)
68    $S0 = pop stopstack
69
70    .return ($P0)
71.end
72
73
74.include "cclass.pasm"
75
76.const int PGE_INF = 2147483647
77
78.sub "__onload" :load
79    .local pmc optable
80
81    optable = new ['PGE';'OPTable']
82    set_hll_global ["PGE";"P5Regex"], "$optable", optable
83
84    $P0 = get_hll_global ["PGE";"P5Regex"], "parse_lit"
85    optable.'newtok'('term:', 'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0)
86
87    optable.'newtok'('term:\b', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
88    optable.'newtok'('term:\B', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
89    optable.'newtok'('term:^',   'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
90    optable.'newtok'('term:$',   'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
91
92    optable.'newtok'('term:\d', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
93    optable.'newtok'('term:\D', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
94    optable.'newtok'('term:\s', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
95    optable.'newtok'('term:\S', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
96    optable.'newtok'('term:\w', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
97    optable.'newtok'('term:\W', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
98
99    optable.'newtok'('circumfix:( )',   'equiv'=>'term:', 'nows'=>1, 'nullterm'=>1, 'match'=>'PGE::Exp::CGroup')
100    optable.'newtok'('circumfix:(?: )', 'equiv'=>'term:', 'nows'=>1, 'nullterm'=>1, 'match'=>'PGE::Exp::Group')
101
102    $P0 = get_hll_global ['PGE';'P5Regex'], 'parse_enumclass'
103    optable.'newtok'('term:[', 'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0)
104    optable.'newtok'('term:.', 'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0)
105
106    $P0 = get_hll_global ['PGE';'P5Regex'], 'parse_quant'
107    optable.'newtok'('postfix:*', 'looser'=>'term:', 'left'=>1, 'nows'=>1, 'parsed'=>$P0)
108    optable.'newtok'('postfix:+', 'equiv'=>'postfix:*', 'left'=>1, 'nows'=>1, 'parsed'=>$P0)
109    optable.'newtok'('postfix:?', 'equiv'=>'postfix:*', 'left'=>1, 'nows'=>1, 'parsed'=>$P0)
110    optable.'newtok'('postfix:{', 'equiv'=>'postfix:*', 'left'=>1, 'nows'=>1, 'parsed'=>$P0)
111
112    optable.'newtok'('infix:',  'looser'=>'postfix:*', 'right'=>1, 'nows'=>1, 'match'=>'PGE::Exp::Concat')
113    optable.'newtok'('infix:|', 'looser'=>'infix:',    'left'=>1,  'nows'=>1, 'match'=>'PGE::Exp::Alt')
114
115    optable.'newtok'('close:}', 'looser'=>'infix:|', 'nows'=>1)            # XXX: hack
116
117    # Create a stack for holding stop tokens
118    $P0 = new 'ResizablePMCArray'
119    set_hll_global ['PGE';'P5Regex'], '@!stopstack', $P0
120
121    $P0 = get_hll_global ["PGE";"P5Regex"], "compile_p5regex"
122    compreg "PGE::P5Regex", $P0
123.end
124
125
126.sub 'parse_error'
127    .param pmc mob
128    .param int pos
129    .param string message
130    $P0 = getattribute mob, '$.pos'
131    $P0 = pos
132    $P0 = new 'Exception'
133    $S0 = 'p5regex parse error: '
134    $S0 .= message
135    $S0 .= ' at offset '
136    $S1 = pos
137    $S0 .= $S1
138    $S0 .= ", found '"
139    $P1 = getattribute mob, '$.target'
140    $S1 = $P1
141    $S1 = substr $S1, pos, 1
142    $S0 .= $S1
143    $S0 .= "'"
144    $P0 = $S0
145    throw $P0
146    .return ()
147.end
148
149
150.sub "parse_lit"
151    .param pmc mob
152    .param pmc adverbs        :slurpy :named
153    .local string target
154    .local int pos, lastpos
155    .local int litstart, litlen
156    .local string initchar
157    (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
158    lastpos = length target
159    initchar = substr target, pos, 1
160    unless initchar == '*' goto initchar_ok
161    parse_error(mob, pos, "Quantifier follows nothing")
162
163  initchar_ok:
164    if initchar == ')' goto end
165    inc pos
166    if initchar != "\\" goto term_literal
167  term_backslash:
168    initchar = substr target, pos, 1
169    inc pos
170    if pos <= lastpos goto term_backslash_ok
171    parse_error(mob, pos, "Search pattern not terminated")
172  term_backslash_ok:
173    $I0 = index "nrteab", initchar
174    if $I0 < 0 goto term_literal
175    initchar = substr "\n\r\t\e\a\b", $I0, 1
176  term_literal:
177    litstart = pos
178    litlen = 0
179    .local string stop
180    .local int stoplen
181    $P0 = get_hll_global ['PGE';'P5Regex'], '@!stopstack'
182    stop = $P0[-1]
183    stoplen = length stop
184  term_literal_loop:
185    if pos >= lastpos goto term_literal_end
186    if stoplen == 0 goto not_stop
187    $S0 = substr target, pos, stoplen
188    if $S0 == stop goto term_literal_end
189  not_stop:
190    $S0 = substr target, pos, 1
191    $I0 = index "[](){}*?+\\|^$.", $S0
192    # if not in circumfix:( ) throw error on end paren
193    if $I0 >= 0 goto term_literal_end
194    inc pos
195    inc litlen
196    goto term_literal_loop
197  term_literal_end:
198    if litlen < 1 goto term_literal_one
199    dec pos
200  term_literal_one:
201    $I0 = pos - litstart
202    $S0 = substr target, litstart, $I0
203    $S0 = concat initchar, $S0
204    mob.'!make'($S0)
205    goto end
206  end:
207    mob.'to'(pos)
208    .return (mob)
209.end
210
211.sub "parse_quant"
212    .param pmc mob
213    .param pmc adverbs        :slurpy :named
214    .local string target
215    .local int min, max, backtrack
216    .local int pos, lastpos
217    .local string key
218    key = mob['KEY']
219    (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Quant')
220    lastpos = length target
221    min = 0
222    max = PGE_INF
223    backtrack = 0
224    if key == '{' goto quant_range
225    if key != '+' goto quant_max
226    min = 1
227  quant_max:
228    if key != "?" goto quant_lazy
229    max = 1
230    goto quant_lazy
231  quant_range:
232    $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
233    if $I1 <= pos goto quant_range_max
234    $S0 = substr target, pos
235    min = $S0
236    max = $S0
237    pos = $I1
238  quant_range_max:
239    $S0 = substr target, pos, 1
240    if $S0 != "," goto quant_range_end
241    inc pos
242    max = PGE_INF
243    $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
244    if $I1 <= pos goto quant_range_end
245    $S0 = substr target, pos
246    max = $S0
247    pos = $I1
248  quant_range_end:
249    $S0 = substr target, pos, 1
250    if $S0 != "}" goto err_range
251    inc pos
252  quant_lazy:
253    $S0 = substr target, pos, 1
254    if $S0 != "?" goto end
255    backtrack = PGE_BACKTRACK_EAGER
256    inc pos
257  end:
258    mob["min"] = min
259    mob["max"] = max
260    mob["backtrack"] = backtrack
261    mob.'to'(pos)
262    .return (mob)
263  err_range:
264    parse_error(mob, pos, "Error in quantified range")
265.end
266
267
268.sub parse_group
269    .param pmc mob
270    .param pmc adverbs        :slurpy :named
271    .local string target
272    .local int pos, lastpos
273    (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::CGroup')
274    inc pos
275    $S0 = substr target, pos, 2
276    if $S0 == "?:" goto nocapture
277    goto end
278  nocapture:
279    pos += 2
280  end:
281    mob.'to'(pos)
282    .return (mob)
283.end
284
285.sub "parse_enumclass"
286    .param pmc mob
287    .param pmc adverbs        :slurpy :named
288    .local string target
289    .local int pos, lastpos
290    .local int isrange
291    .local string charlist
292    .local string key
293    key = mob['KEY']
294    (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::EnumCharList')
295    if key == '.' goto dot
296    lastpos = length target
297    charlist = ""
298    mob["isnegated"] = 0
299    isrange = 0
300    $S0 = substr target, pos, 1
301    if $S0 != "^" goto scan_first
302    mob["isnegated"] = 1
303    inc pos
304  scan_first:
305    if pos >= lastpos goto err_close
306    $S0 = substr target, pos, 1
307    inc pos
308    if $S0 == "\\" goto backslash
309    goto addchar
310  scan:
311    if pos >= lastpos goto err_close
312    $S0 = substr target, pos, 1
313    inc pos
314    if $S0 == "]" goto endclass
315    if $S0 == "-" goto hyphenrange
316    if $S0 != "\\" goto addchar
317  backslash:
318    $S0 = substr target, pos, 1
319    inc pos
320    $I0 = index "nrtfae0b", $S0
321    if $I0 == -1 goto addchar
322    $S0 = substr "\n\r\t\f\a\e\0\b", $I0, 1
323  addchar:
324    if isrange goto addrange
325    charlist .= $S0
326    goto scan
327  addrange:
328    isrange = 0
329    $I2 = ord charlist, -1
330    $I0 = ord $S0
331    if $I0 < $I2 goto err_range
332  addrange_1:
333    inc $I2
334    if $I2 > $I0 goto scan
335    $S1 = chr $I2
336    charlist .= $S1
337    goto addrange_1
338  hyphenrange:
339    if isrange goto addrange
340    isrange = 1
341    goto scan
342  endclass:
343    if isrange == 0 goto end
344    charlist .= "-"
345    goto end
346  dot:
347    charlist = "\n"
348    mob["isnegated"] = 1
349  end:
350    mob.'to'(pos)
351    mob.'!make'(charlist)
352    .return (mob)
353
354  err_close:
355    parse_error(mob, pos, "Unmatched [")
356  err_range:
357    $S0 = 'Invalid [] range "'
358    $S1 = chr $I2
359    $S0 .= $S1
360    $S0 .= '-'
361    $S1 = chr $I0
362    $S0 .= $S1
363    $S0 .= '"'
364    parse_error(mob, pos, $S0)
365.end
366
367
368.namespace [ "PGE";"Exp" ]
369
370.sub "p5analyze" :method
371    .param pmc pad
372    .local pmc exp
373    $I0 = 0
374  loop:
375    $I1 = defined self[$I0]
376    if $I1 == 0 goto end
377    $P0 = self[$I0]
378    $P0 = $P0."p5analyze"(pad)
379    self[$I0] = $P0
380    inc $I0
381    goto loop
382  end:
383    .return (self)
384.end
385
386.namespace [ "PGE";"Exp";"CGroup" ]
387
388.sub "p5analyze" :method
389    .param pmc pad
390    .local pmc exp
391
392    self["iscapture"] = 0
393    if self != "(" goto end
394    self["iscapture"] = 1
395    self["isscope"] = 0
396    self["isarray"] = 0
397    $I0 = pad["subpats"]
398    self["cname"] = $I0
399    inc $I0
400    pad["subpats"] = $I0
401  end:
402    exp = self[0]
403    exp = exp."p5analyze"(pad)
404    self[0] = exp
405    .return (self)
406.end
407
408
409
410# Local Variables:
411#   mode: pir
412#   fill-column: 100
413# End:
414# vim: expandtab shiftwidth=4 ft=pir:
415