1# -*- tcl -*-
2#
3# Copyright (c) 2005 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
4# Grammar / Parsing Expression Grammar / Interpreter (Namespace based)
5
6# ### ### ### ######### ######### #########
7## Package description
8
9## The instances of this class match an input provided by a buffer to
10## a parsing expression grammar provided by a peg container. The
11## matching process is interpretative, i.e. expressions are matched on
12## the fly and multiple as they are encountered. The interpreter
13## operates in pull-push mode, i.e. the interpreter object is in
14## charge and reads the character stream from the buffer as it needs,
15## and returns with the result of the match either when encountering
16## an error, or when the match was successful.
17
18# ### ### ### ######### ######### #########
19## Requisites
20
21package require grammar::me::tcl
22
23# ### ### ### ######### ######### #########
24## Implementation
25
26namespace eval ::grammar::peg::interp {
27    # Import the virtual machine for matching.
28
29    namespace import ::grammar::me::tcl::*
30    upvar #0 ::grammar::me::tcl::ok ok
31}
32
33# ### ### ### ######### ######### #########
34## Instance API Implementation.
35
36proc ::grammar::peg::interp::setup {peg} {
37    variable ru
38    variable mo
39    variable se
40
41    if {![$peg is valid]} {
42        return -code error "Cannot initialize interpreter for invalid grammar"
43    }
44    set se [$peg start]
45    foreach s [$peg nonterminals] {
46        set ru($s) [$peg nonterminal rule $s]
47        set mo($s) [$peg nonterminal mode $s]
48    }
49
50    #parray mo
51    return
52}
53
54proc ::grammar::peg::interp::parse {nxcmd emvar astvar} {
55    variable ok
56    variable se
57
58    upvar 1 $emvar emsg $astvar ast
59
60    init $nxcmd
61
62    MatchExpr $se
63    isv_nonterminal_reduce ALL -1
64    set ast [sv]
65    if {!$ok} {
66        foreach {l m} [ier_get] break
67        lappend l [lc $l]
68        set emsg [list $l $m]
69    }
70
71    return $ok
72}
73
74# ### ### ### ######### ######### #########
75## Internal helper methods
76
77proc ::grammar::peg::interp::MatchExpr {e} {
78    variable ok
79    variable mode
80    variable mo
81    variable ru
82
83    set op [lindex $e 0]
84    set ar [lrange $e 1 end]
85
86    switch -exact -- $op {
87        epsilon {
88            # No input to match, nor consume. Match always.
89            iok_ok
90        }
91        dot {
92            # Match and consume one character. No matter which
93            # character. Fails only when reaching eof. Does not
94            # consume input on failure.
95
96            ict_advance "Expected any character (got EOF)"
97            if {$ok && ($mode eq "value")} {isv_terminal}
98        }
99        alnum - alpha {
100            ict_advance            "Expected <$op> (got EOF)"
101            if {!$ok} return
102
103            ict_match_tokclass $op "Expected <$op>"
104            if {$ok && ($mode eq "value")} {isv_terminal}
105        }
106        t {
107            # Match and consume one specific character. Fails if
108            # the character at the location is not what was
109            # expected. Does not consume input on failure.
110
111            set ch [lindex $ar 0]
112
113            ict_advance     "Expected $ch (got EOF)"
114            if {!$ok} return
115
116            ict_match_token $ch "Expected $ch"
117            if {$ok && ($mode eq "value")} {isv_terminal}
118        }
119        .. {
120            # Match and consume one character, if in the specified
121            # range. Fails if the read character is outside of the
122            # range. Does not consume input on failure.
123
124            foreach {chbegin chend} $ar break
125
126            ict_advance                        "Expected \[$chbegin .. $chend\] (got EOF)"
127            if {!$ok} return
128
129            ict_match_tokrange $chbegin $chend "Expected \[$chbegin .. $chend\]"
130            if {$ok && ($mode eq "value")} {isv_terminal}
131        }
132        n {
133            # To match a nonterminal in the input we match its
134            # parsing expression. This can be cut short if the
135            # necessary information can be obtained from the memo
136            # cache. Does not consume input on failure.
137
138            set nt [lindex $ar 0]
139            set savemode $mode
140            set mode $mo($nt)
141
142            if {[inc_restore $nt]} {
143                if {$ok && ($mode ne "discard")} ias_push
144                set mode $savemode
145                return
146            }
147
148            set pos [icl_get]
149            set mrk [ias_mark]
150
151            MatchExpr $ru($nt)
152
153            # Generate semantic value, based on mode.
154            if {$mode eq "value"} {
155                isv_nonterminal_reduce $nt $pos $mrk
156            } elseif {$mode eq "match"} {
157                isv_nonterminal_range  $nt $pos
158            } elseif {$mode eq "leaf"} {
159                isv_nonterminal_leaf   $nt $pos
160            } else {
161                # mode eq "discard"
162                isv_clear
163            }
164            inc_save $nt $pos
165
166            # AST operations ...
167            ias_pop2mark $mrk
168            if {$ok && ($mode ne "discard")} ias_push
169
170            set mode $savemode
171            # Even if match is ok.
172	    ier_nonterminal "Expected $nt" $pos
173        }
174        & {
175            # Lookahead predicate. And. Matches the expression
176            # against the input and returns match result. Never
177            # consumes any input.
178
179            set pos [icl_get]
180
181            MatchExpr [lindex $ar 0]
182
183            icl_rewind $pos
184            return
185        }
186        ! {
187            # Negated lookahead predicate. Matches the expression
188            # against the input and returns the negated match
189            # result. Never consumes any input.
190
191            set pos [icl_get]
192            set mrk [ias_mark]
193
194            MatchExpr [lindex $ar 0]
195
196            if {$ok} {ias_pop2mark $mrk}
197            icl_rewind $pos
198
199            iok_negate
200            return
201        }
202        * {
203            # Zero or more repetitions. This consumes as much
204            # input as it was able to match the sub
205            # expression. The expresion as a whole always matches,
206            # even if the sub expression fails (zero repetition).
207
208            set sub [lindex $ar 0]
209
210            while {1} {
211                set pos [icl_get]
212
213                set old [ier_get]
214                MatchExpr $sub
215                ier_merge $old
216
217                if {$ok} continue
218		break
219            }
220
221	    icl_rewind $pos
222	    iok_ok
223	    return
224        }
225        + {
226            # One or more repetition. Like *, except for one match
227            # at the front which has to match for success. This
228            # expression can fail. It will consume only as much
229            # input as it was able to match.
230
231            set sub [lindex $ar 0]
232
233            set pos [icl_get]
234
235            MatchExpr $sub
236            if {!$ok} {
237                icl_rewind $pos
238                return
239            }
240
241            while {1} {
242                set pos [icl_get]
243
244                set old [ier_get]
245                MatchExpr $sub
246                ier_merge $old
247
248                if {$ok} continue
249		break
250            }
251
252	    icl_rewind $pos
253	    iok_ok
254	    return
255        }
256        ? {
257            # Optional matching. Tries to match the sub
258            # expression. Will never fail, even if the sub
259            # expression is not matching. Consumes only input as
260            # it could match in the sub expression. Like *, but
261            # without the repetition.
262
263            set pos [icl_get]
264
265	    set old [ier_get]
266            MatchExpr [lindex $ar 0]
267	    ier_merge $old
268
269            if {!$ok} {
270                icl_rewind $pos
271                iok_ok
272            }
273            return
274        }
275        x {
276            # Sequence. Matches each sub expression in turn, each
277            # consuming input. In case of failure by one of the
278            # sequence elements nothing is consumed at all.
279
280            set pos [icl_get]
281            set mrk [ias_mark]
282            ier_clear
283
284            foreach e $ar {
285
286                set old [ier_get]
287                MatchExpr $e
288                ier_merge $old
289
290                if {!$ok} {
291                    ias_pop2mark $mrk
292                    icl_rewind $pos
293                    return
294                }
295            }
296            # OK
297            return
298        }
299        / {
300            # Choice. Matches each sub expression in turn, always
301            # starting from the current location. Nothing is
302            # consumed if all branches fail. Consumes as much as
303            # was consumed by the matching branch.
304
305            set pos [icl_get]
306            set mrk [ias_mark]
307
308            ier_clear
309            foreach e $ar {
310
311                set old [ier_get]
312                MatchExpr $e
313                ier_merge $old
314
315                if {!$ok} {
316                    ias_pop2mark $mrk
317                    icl_rewind $pos
318                    continue
319                }
320                return
321            }
322            # FAIL
323            iok_fail
324            return
325        }
326    }
327}
328
329# ### ### ### ######### ######### #########
330## Interpreter data structures.
331
332namespace eval ::grammar::peg::interp {
333    ## Start expression.
334    ## Map from nonterminals to their expressions.
335    ## Reference to internal memo cache.
336
337    variable se {} ; # Start expression.
338    variable ru    ; # Nonterminals and rule map.
339    variable mo    ; # Nonterminal modes.
340
341    variable mode value ; # Matching mode.
342
343    array set ru {}
344    array set mo {}
345}
346
347# ### ### ### ######### ######### #########
348## Package Management
349
350package provide grammar::peg::interp 0.1.1
351