1# -*- tcl -*- 2# 3# Copyright (c) 2005 by Andreas Kupries <andreas_kupries@users.sourceforge.net> 4# Grammar / Parsing Expression Grammar / Interpreter (Namespace based) 5 6# ### ### ### ######### ######### ######### 7## Package description 8 9## The instances of this class match an input provided by a buffer to 10## a parsing expression grammar provided by a peg container. The 11## matching process is interpretative, i.e. expressions are matched on 12## the fly and multiple as they are encountered. The interpreter 13## operates in pull-push mode, i.e. the interpreter object is in 14## charge and reads the character stream from the buffer as it needs, 15## and returns with the result of the match either when encountering 16## an error, or when the match was successful. 17 18# ### ### ### ######### ######### ######### 19## Requisites 20 21package require grammar::me::tcl 22 23# ### ### ### ######### ######### ######### 24## Implementation 25 26namespace eval ::grammar::peg::interp { 27 # Import the virtual machine for matching. 28 29 namespace import ::grammar::me::tcl::* 30 upvar #0 ::grammar::me::tcl::ok ok 31} 32 33# ### ### ### ######### ######### ######### 34## Instance API Implementation. 35 36proc ::grammar::peg::interp::setup {peg} { 37 variable ru 38 variable mo 39 variable se 40 41 if {![$peg is valid]} { 42 return -code error "Cannot initialize interpreter for invalid grammar" 43 } 44 set se [$peg start] 45 foreach s [$peg nonterminals] { 46 set ru($s) [$peg nonterminal rule $s] 47 set mo($s) [$peg nonterminal mode $s] 48 } 49 50 #parray mo 51 return 52} 53 54proc ::grammar::peg::interp::parse {nxcmd emvar astvar} { 55 variable ok 56 variable se 57 58 upvar 1 $emvar emsg $astvar ast 59 60 init $nxcmd 61 62 MatchExpr $se 63 isv_nonterminal_reduce ALL -1 64 set ast [sv] 65 if {!$ok} { 66 foreach {l m} [ier_get] break 67 lappend l [lc $l] 68 set emsg [list $l $m] 69 } 70 71 return $ok 72} 73 74# ### ### ### ######### ######### ######### 75## Internal helper methods 76 77proc ::grammar::peg::interp::MatchExpr {e} { 78 variable ok 79 variable mode 80 variable mo 81 variable ru 82 83 set op [lindex $e 0] 84 set ar [lrange $e 1 end] 85 86 switch -exact -- $op { 87 epsilon { 88 # No input to match, nor consume. Match always. 89 iok_ok 90 } 91 dot { 92 # Match and consume one character. No matter which 93 # character. Fails only when reaching eof. Does not 94 # consume input on failure. 95 96 ict_advance "Expected any character (got EOF)" 97 if {$ok && ($mode eq "value")} {isv_terminal} 98 } 99 alnum - alpha { 100 ict_advance "Expected <$op> (got EOF)" 101 if {!$ok} return 102 103 ict_match_tokclass $op "Expected <$op>" 104 if {$ok && ($mode eq "value")} {isv_terminal} 105 } 106 t { 107 # Match and consume one specific character. Fails if 108 # the character at the location is not what was 109 # expected. Does not consume input on failure. 110 111 set ch [lindex $ar 0] 112 113 ict_advance "Expected $ch (got EOF)" 114 if {!$ok} return 115 116 ict_match_token $ch "Expected $ch" 117 if {$ok && ($mode eq "value")} {isv_terminal} 118 } 119 .. { 120 # Match and consume one character, if in the specified 121 # range. Fails if the read character is outside of the 122 # range. Does not consume input on failure. 123 124 foreach {chbegin chend} $ar break 125 126 ict_advance "Expected \[$chbegin .. $chend\] (got EOF)" 127 if {!$ok} return 128 129 ict_match_tokrange $chbegin $chend "Expected \[$chbegin .. $chend\]" 130 if {$ok && ($mode eq "value")} {isv_terminal} 131 } 132 n { 133 # To match a nonterminal in the input we match its 134 # parsing expression. This can be cut short if the 135 # necessary information can be obtained from the memo 136 # cache. Does not consume input on failure. 137 138 set nt [lindex $ar 0] 139 set savemode $mode 140 set mode $mo($nt) 141 142 if {[inc_restore $nt]} { 143 if {$ok && ($mode ne "discard")} ias_push 144 set mode $savemode 145 return 146 } 147 148 set pos [icl_get] 149 set mrk [ias_mark] 150 151 MatchExpr $ru($nt) 152 153 # Generate semantic value, based on mode. 154 if {$mode eq "value"} { 155 isv_nonterminal_reduce $nt $pos $mrk 156 } elseif {$mode eq "match"} { 157 isv_nonterminal_range $nt $pos 158 } elseif {$mode eq "leaf"} { 159 isv_nonterminal_leaf $nt $pos 160 } else { 161 # mode eq "discard" 162 isv_clear 163 } 164 inc_save $nt $pos 165 166 # AST operations ... 167 ias_pop2mark $mrk 168 if {$ok && ($mode ne "discard")} ias_push 169 170 set mode $savemode 171 # Even if match is ok. 172 ier_nonterminal "Expected $nt" $pos 173 } 174 & { 175 # Lookahead predicate. And. Matches the expression 176 # against the input and returns match result. Never 177 # consumes any input. 178 179 set pos [icl_get] 180 181 MatchExpr [lindex $ar 0] 182 183 icl_rewind $pos 184 return 185 } 186 ! { 187 # Negated lookahead predicate. Matches the expression 188 # against the input and returns the negated match 189 # result. Never consumes any input. 190 191 set pos [icl_get] 192 set mrk [ias_mark] 193 194 MatchExpr [lindex $ar 0] 195 196 if {$ok} {ias_pop2mark $mrk} 197 icl_rewind $pos 198 199 iok_negate 200 return 201 } 202 * { 203 # Zero or more repetitions. This consumes as much 204 # input as it was able to match the sub 205 # expression. The expresion as a whole always matches, 206 # even if the sub expression fails (zero repetition). 207 208 set sub [lindex $ar 0] 209 210 while {1} { 211 set pos [icl_get] 212 213 set old [ier_get] 214 MatchExpr $sub 215 ier_merge $old 216 217 if {$ok} continue 218 break 219 } 220 221 icl_rewind $pos 222 iok_ok 223 return 224 } 225 + { 226 # One or more repetition. Like *, except for one match 227 # at the front which has to match for success. This 228 # expression can fail. It will consume only as much 229 # input as it was able to match. 230 231 set sub [lindex $ar 0] 232 233 set pos [icl_get] 234 235 MatchExpr $sub 236 if {!$ok} { 237 icl_rewind $pos 238 return 239 } 240 241 while {1} { 242 set pos [icl_get] 243 244 set old [ier_get] 245 MatchExpr $sub 246 ier_merge $old 247 248 if {$ok} continue 249 break 250 } 251 252 icl_rewind $pos 253 iok_ok 254 return 255 } 256 ? { 257 # Optional matching. Tries to match the sub 258 # expression. Will never fail, even if the sub 259 # expression is not matching. Consumes only input as 260 # it could match in the sub expression. Like *, but 261 # without the repetition. 262 263 set pos [icl_get] 264 265 set old [ier_get] 266 MatchExpr [lindex $ar 0] 267 ier_merge $old 268 269 if {!$ok} { 270 icl_rewind $pos 271 iok_ok 272 } 273 return 274 } 275 x { 276 # Sequence. Matches each sub expression in turn, each 277 # consuming input. In case of failure by one of the 278 # sequence elements nothing is consumed at all. 279 280 set pos [icl_get] 281 set mrk [ias_mark] 282 ier_clear 283 284 foreach e $ar { 285 286 set old [ier_get] 287 MatchExpr $e 288 ier_merge $old 289 290 if {!$ok} { 291 ias_pop2mark $mrk 292 icl_rewind $pos 293 return 294 } 295 } 296 # OK 297 return 298 } 299 / { 300 # Choice. Matches each sub expression in turn, always 301 # starting from the current location. Nothing is 302 # consumed if all branches fail. Consumes as much as 303 # was consumed by the matching branch. 304 305 set pos [icl_get] 306 set mrk [ias_mark] 307 308 ier_clear 309 foreach e $ar { 310 311 set old [ier_get] 312 MatchExpr $e 313 ier_merge $old 314 315 if {!$ok} { 316 ias_pop2mark $mrk 317 icl_rewind $pos 318 continue 319 } 320 return 321 } 322 # FAIL 323 iok_fail 324 return 325 } 326 } 327} 328 329# ### ### ### ######### ######### ######### 330## Interpreter data structures. 331 332namespace eval ::grammar::peg::interp { 333 ## Start expression. 334 ## Map from nonterminals to their expressions. 335 ## Reference to internal memo cache. 336 337 variable se {} ; # Start expression. 338 variable ru ; # Nonterminals and rule map. 339 variable mo ; # Nonterminal modes. 340 341 variable mode value ; # Matching mode. 342 343 array set ru {} 344 array set mo {} 345} 346 347# ### ### ### ######### ######### ######### 348## Package Management 349 350package provide grammar::peg::interp 0.1.1 351