1header 2{ 3// This source file was generated by ANTLR. Do not edit manually! 4package org.epic.core.parser; 5} 6 7class PerlLexer extends Lexer("org.epic.core.parser.PerlLexerBase"); 8options 9{ 10 k = 4; 11 charVocabulary = '\0'..'\uFFFF'; 12 importVocab = shared; 13 exportVocab = Perl; 14} 15 16WS: (' ' | '\t' | NEWLINE)+; 17 18COMMENT: '#' (NOT_NEWLINE)* (NEWLINE! | '\uFFFF'!); 19 20SEMI 21 : ';' 22 { 23 format = glob = afterArrow = afterDArrow = false; 24 qmarkRegexp = slashRegexp = true; 25 $setToken(createOperatorToken(PerlTokenTypes.SEMI, ";")); 26 } 27 ; 28 29OPEN_CURLY 30 : '{' 31 { 32 $setToken(createCurlyToken(PerlTokenTypes.OPEN_CURLY, "{")); pc++; 33 proto = glob = afterSub = false; 34 qmarkRegexp = slashRegexp = true; 35 }; 36 37CLOSE_CURLY 38 : '}' 39 { 40 pc--; $setToken(createCurlyToken(PerlTokenTypes.CLOSE_CURLY, "}")); 41 qmarkRegexp = slashRegexp = format = glob = false; 42 }; 43 44OPEN_BQUOTE: '`' { getParent().expectStringEnd('`'); }; 45OPEN_SQUOTE: '\'' { getParent().expectStringEnd('\''); }; 46OPEN_DQUOTE: '"' { getParent().expectStringEnd('"'); }; 47 48MAYBE_SPECIAL_VAR 49 : { !proto }? ( 50 ("**=") 51 => "**=" { $setToken(createOperatorToken(PerlTokenTypes.OPER_MULMULEQ, "**=")); } 52 | ("**") 53 => "**" { $setToken(createOperatorToken(PerlTokenTypes.OPER_MULMUL, "**")); } 54 | ("*=") 55 => "*=" { $setToken(createOperatorToken(PerlTokenTypes.OPER_MULEQ, "*=")); } 56 | (VAR) 57 => VAR { $setType(PerlTokenTypes.VAR); } 58 | (SPECIAL_VAR) 59 => SPECIAL_VAR { $setType(PerlTokenTypes.SPECIAL_VAR); glob = false; } 60 | ('*') 61 => '*' { $setToken(createOperatorToken(PerlTokenTypes.OPER_MUL, "*")); } 62 | ("%=") 63 => "%=" { $setToken(createOperatorToken(PerlTokenTypes.OPER_MODEQ, "%=")); } 64 | ('%') 65 => '%' { $setToken(createOperatorToken(PerlTokenTypes.OPER_MOD, "%")); } 66 | (VAR_START) // incomplete variable 67 => VAR_START { $setType(PerlTokenTypes.VAR); } 68 ); 69 70protected SPECIAL_VAR 71 // see English.pm for the *? operators 72 : ( 73 "*^A" | "*^C" | "*^D" | "*^E" | "*^F" | "*^I" | "*^L" | "*^N" 74 | "*^O" | "*^P" | "*^R" | "*^S" | "*^T" | "*^V" | "*^W" | "*^X" 75 | "*/" | "*?" | "*%" | "*@" | "*_" | "*-" | "*+" | "*." | "*|" | "*," 76 | "*;" | "*~" | "*:" | "*^" | "*<" | "*>" | "*(" | "*)" /* | "*$" TODO, watch out: 5*$x */ 77 | "*\"" | "*\\" 78 79 | "$$m" | "$$s" 80 | "$^A" | "$^C" | "$^D" | "$^E" | "$^F" | "$^H" | "$^I" | "$^L" | "$^M" 81 | "$^N" | "$^O" | "$^P" | "$^R" | "$^S" | "$^T" | "$^V" | "$^W" | "$^X" 82 | "$/" | "$?" | "$%" | "$@" | "$_" | "$-" | "$+" | "$." | "$|" | "$!" 83 | "$;" | "$~" | "$$" | "$`" | "$'" | "$<" | "$>" | "$(" | "$)" | "$," 84 | "$[" | "$]" | "$:" | "$*" | "$#" | "$=" | "$^" | "$&" 85 | "$\"" | "$\\" 86 87 | "@+" | "@-" | "@_" | "@$" 88 89 | "%!" | "%@" | "%^H" 90 ); 91 92protected VAR 93 : { !proto }? VAR_START (ID | CURLY | '@' | '\uFFFF'!) 94 { glob = qmarkRegexp = slashRegexp = false; }; 95 96protected VAR_START 97 : ('@' | '$' | '%' /* | '*' TODO, but think of $x*5 */) 98 ('@' | '$' | '%' | '*' | '#' /* | ' ' TODO, but think of $x % $y */)* 99 ; 100 101protected CURLY 102 : '{' 103 ( CURLY | NEWLINE | ~('}' | '\uFFFF'))* 104 ('}' | '\uFFFF'!) 105 ; 106 107OPER_AND: "&&" { $setToken(createOperatorToken(PerlTokenTypes.OPER_AND, "&&")); }; 108OPER_OR: "||" { $setToken(createOperatorToken(PerlTokenTypes.OPER_OR, "||")); }; 109OPER_LTEQ: "<=" { $setToken(createOperatorToken(PerlTokenTypes.OPER_LTEQ, "<=")); }; 110OPER_GTEQ: ">=" { $setToken(createOperatorToken(PerlTokenTypes.OPER_GTEQ, ">=")); }; 111 112OPER_ANDANDEQ: "&&=" { $setToken(createOperatorToken(PerlTokenTypes.OPER_ANDANDEQ, "&&=")); }; 113OPER_OROREQ: "||=" { $setToken(createOperatorToken(PerlTokenTypes.OPER_OROREQ, "||=")); }; 114 115PROTO 116 : { proto }? 117 ('$' | '@' | '%' | '*' | ';' | '\\' | '&' | '_' | WS)+ 118 { proto = false; } 119 ; 120 121OPER_S: "-s" { $setToken(createOperatorToken(PerlTokenTypes.OPER_S, "-s")); }; 122 123OPER_SLASHSLASH 124 : { !slashRegexp }? "//" 125 { 126 $setToken(createOperatorToken(PerlTokenTypes.OPER_SLASHSLASH, "//")); 127 }; 128 129OPER_SLASHSLASHEQ 130 : { !slashRegexp }? "//=" 131 { 132 $setToken(createOperatorToken(PerlTokenTypes.OPER_SLASHSLASHEQ, "//=")); 133 }; 134 135OPEN_SLASH 136 : '/' 137 { 138 if (slashRegexp) 139 { 140 getParent().expectStringSuffix(); // second 141 getParent().expectStringEnd('/'); // first 142 slashRegexp = qmarkRegexp = false; 143 } 144 else 145 { 146 $setToken(createOperatorToken(PerlTokenTypes.OPER_DIV, "/")); 147 } 148 }; 149 150protected OPER_QMARK: ; 151 152OPEN_QMARK 153 : '?' 154 { 155 if (qmarkRegexp) 156 { 157 getParent().expectStringSuffix(); // second 158 getParent().expectStringEnd('?'); // first 159 slashRegexp = qmarkRegexp = false; 160 } 161 else 162 { 163 $setToken(createOperatorToken(PerlTokenTypes.OPER_QMARK, "?")); 164 } 165 }; 166 167SUBST_OR_MATCH_OR_WORD // this disambiguation rule disfavours EXPRs too much :-( 168 : { !afterArrow }? ((SUBST_OR_MATCH_OPER | 'x') (('A'..'Z' | 'a'..'z' | '0'..'9') | ((WS_CHAR)* "=>"))) 169 => { notOper = true; } t1:WORD { $setToken(t1); } 170 | { !afterArrow && !afterSub }? (("tr" | 's' | 'y') ~'}') 171 => SUBST_EXPR { $setType(PerlTokenTypes.SUBST_EXPR); } 172 | { !afterArrow || afterDArrow }? (("qq" | "qx" | "qw" | "qr" | 'm' | 'q') ~('a'..'z' | '0'..'9' | '_' | '}' | '\r' | '\n' | ' ')) 173 => MATCH_EXPR { $setType(PerlTokenTypes.MATCH_EXPR); } 174 | (NUMBER) 175 => n:NUMBER { $setToken(n); } 176 | (':' ('\uFFFF'! | ~':')) 177 => ':' 178 { 179 glob = false; 180 $setToken(createOperatorToken(PerlTokenTypes.OPER_COLON, ":")); 181 } 182 | t3:WORD { $setToken(t3); } 183 ; 184 185protected SUBST_EXPR 186 : ("tr" | 's' | 'y') 187 { 188 getParent().expectSubstExpr(); 189 slashRegexp = qmarkRegexp = false; 190 }; 191 192protected MATCH_EXPR 193 : ("qq" | "qx" | "qw" | "qr" | 'm' | 'q') 194 { 195 getParent().expectStringSuffix(); // second 196 getParent().expectString(); // first 197 slashRegexp = qmarkRegexp = false; 198 }; 199 200OPER_DARROW 201 : "=>" 202 { 203 $setToken(createOperatorToken(PerlTokenTypes.OPER_DARROW, "=>")); 204 afterArrow = afterDArrow = true; 205 }; 206 207OPER_ARROW 208 : "->" 209 { 210 $setToken(createOperatorToken(PerlTokenTypes.OPER_ARROW, "->")); 211 qmarkRegexp = slashRegexp = false; 212 afterArrow = true; 213 }; 214 215OPER_DOUBLEEQ 216 : "==" 217 { $setToken(createOperatorToken(PerlTokenTypes.OPER_DOUBLEEQ, "==")); }; 218 219OPER_NOTEQ 220 : "!=" 221 { $setToken(createOperatorToken(PerlTokenTypes.OPER_NOTEQ, "!=")); }; 222 223OPER_EQMATCH 224 : "=~" 225 { afterArrow = afterDArrow = false; $setToken(createOperatorToken(PerlTokenTypes.OPER_EQMATCH, "=~")); }; 226 227OPER_SMARTMATCH 228 : "~~" 229 { afterArrow = afterDArrow = false; $setToken(createOperatorToken(PerlTokenTypes.OPER_SMARTMATCH, "~~")); }; 230 231OPER_EQNOTMATCH 232 : "!~" 233 { $setToken(createOperatorToken(PerlTokenTypes.OPER_EQNOTMATCH, "!~")); }; 234 235OPER_MINUSMINUS 236 : "--" 237 { 238 $setToken(createOperatorToken(PerlTokenTypes.OPER_MINUSMINUS, "--")); 239 qmarkRegexp = false; 240 }; 241 242OPER_MINUSEQ 243 : "-=" 244 { 245 $setToken(createOperatorToken(PerlTokenTypes.OPER_PLUSPLUS, "-=")); 246 qmarkRegexp = false; 247 }; 248 249OPER_PLUSPLUS 250 : "++" 251 { 252 $setToken(createOperatorToken(PerlTokenTypes.OPER_PLUSPLUS, "++")); 253 qmarkRegexp = false; 254 }; 255 256OPER_PLUSEQ 257 : "+=" 258 { 259 $setToken(createOperatorToken(PerlTokenTypes.OPER_PLUSPLUS, "+=")); 260 qmarkRegexp = false; 261 }; 262 263OPER_ANDEQ 264 : "&=" 265 { 266 $setToken(createOperatorToken(PerlTokenTypes.OPER_OREQ, "&=")); 267 qmarkRegexp = false; 268 }; 269 270OPER_OREQ 271 : "|=" 272 { 273 $setToken(createOperatorToken(PerlTokenTypes.OPER_OREQ, "|=")); 274 qmarkRegexp = false; 275 }; 276 277OPER_XOREQ 278 : "^=" 279 { 280 $setToken(createOperatorToken(PerlTokenTypes.OPER_XOREQ, "^=")); 281 qmarkRegexp = false; 282 }; 283 284OPER_DIVEQ 285 : { !slashRegexp }? "/=" 286 { 287 $setToken(createOperatorToken(PerlTokenTypes.OPER_DIVEQ, "/=")); 288 qmarkRegexp = false; 289 }; 290 291OPER_COMMA 292 : ',' 293 { 294 $setToken(createOperatorToken(PerlTokenTypes.OPER_COMMA, ",")); 295 afterArrow = afterDArrow = false; 296 }; 297 298OPER_EQ 299 : { !format }? '=' 300 { 301 $setToken(createOperatorToken(PerlTokenTypes.OPER_EQ, "=")); 302 glob = true; 303 }; 304 305OPER_DIV 306 : '/' 307 { $setToken(createOperatorToken(PerlTokenTypes.OPER_DIV, "/")); }; 308 309OPER_PLUS 310 : '+' 311 { $setToken(createOperatorToken(PerlTokenTypes.OPER_PLUS, "+")); }; 312 313OPER_MINUS 314 : '-' 315 { $setToken(createOperatorToken(PerlTokenTypes.OPER_MINUS, "-")); }; 316 317OPER_DOTDOT 318 : ".." 319 { $setToken(createOperatorToken(PerlTokenTypes.OPER_DOTDOT, "..")); }; 320 321OPER_DOT 322 : '.' 323 { afterArrow = afterDArrow = false; $setToken(createOperatorToken(PerlTokenTypes.OPER_DOT, ".")); }; 324 325OPER_NOT 326 : '!' 327 { slashRegexp = true; $setToken(createOperatorToken(PerlTokenTypes.OPER_NOT, "!")); }; 328 329OPER_BSLASH 330 : '\\' 331 { $setToken(createOperatorToken(PerlTokenTypes.OPER_BSLASH, "\\")); }; 332 333OPEN_PAREN 334 : '(' 335 { 336 $setToken(createOperatorToken(PerlTokenTypes.OPEN_PAREN, "(")); 337 if (afterSub) { afterSub = false; proto = true; } 338 format = false; 339 glob = qmarkRegexp = slashRegexp = true; 340 }; 341 342CLOSE_PAREN 343 : ')' 344 { 345 $setToken(createOperatorToken(PerlTokenTypes.CLOSE_PAREN, ")")); 346 glob = qmarkRegexp = slashRegexp = false; 347 }; 348 349OPEN_BRACKET 350 : '[' 351 { 352 $setToken(createOperatorToken(PerlTokenTypes.OPEN_BRACKET, "[")); 353 format = false; 354 glob = qmarkRegexp = slashRegexp = true; 355 }; 356 357CLOSE_BRACKET 358 : ']' 359 { 360 $setToken(createOperatorToken(PerlTokenTypes.CLOSE_BRACKET, "]")); 361 glob = qmarkRegexp = slashRegexp = false; 362 }; 363 364FORMAT_STMT 365 : { format }? "=" 366 { 367 format = false; 368 getParent().expectFormatEnd(); 369 }; 370 371protected VAR_WITH_CURLY 372 : (VAR (WS)? '{') 373 => VAR (WS)? { getParent().expectString(); } 374 | (VAR) 375 => VAR; 376 377PROC_REF 378 : '&' ID 379 { qmarkRegexp = slashRegexp = false; }; 380 381OPER_LSHIFT_OR_HEREDOC 382 : (OPEN_HEREDOC) //("<<" (WS)? ('"' | '\'' | '`' | 'A'..'Z')) 383 => OPEN_HEREDOC 384 { $setType(PerlTokenTypes.OPEN_HEREDOC); } 385 | ("<<=") 386 => OPER_LSHIFTEQ 387 { $setToken(createOperatorToken(PerlTokenTypes.OPER_LSHIFTEQ, "<<=")); } 388 | ("<<" (WS)? ~('"' | '\'' | '`' | 'A'..'Z')) 389 => OPER_LSHIFT 390 { $setToken(createOperatorToken(PerlTokenTypes.OPER_LSHIFT, "<<")); } 391 ; 392 393protected OPER_LSHIFT: "<<"; 394protected OPER_LSHIFTEQ: "<<="; 395 396protected OPEN_HEREDOC 397 : 398 ( 399 ("<<" (WS)? '"') 400 => "<<" (WS!)? '"'! ("\\\"" | ~('"' | '\n' | '\r' | '\uFFFF'))* 401 | ("<<" (WS)? "'") 402 => "<<" (WS!)? "'"! ("\\'" | ~('\'' | '\n' | '\r' | '\uFFFF'))* 403 | ("<<" (WS)? '`') 404 => "<<" (WS!)? '`'! ("\\`" | ~('`' | '\n' | '\r' | '\uFFFF'))* 405 | ("<<" (WS)? ('A'..'Z'|'a'..'z'|'_')) 406 => "<<" ('A'..'Z'|'a'..'z'|'_')+ 407 ) 408 (NOT_NEWLINE!)* 409 { if (LA(1) != EOF_CHAR) getParent().expectHereDocEnd($getText); } 410 (NEWLINE | '\uFFFF'!) // tolerate "print <<B" at the end of file... 411 // TODO: proper handling of here-docs with quoted identifiers, 412 // see man perlop 413 ; 414 415GLOB 416 : { glob }? 417 '<' (~('<' | '>' | '\n' | '\r' | '\uFFFF'))* ('>' | '\uFFFF'!) 418 ; 419 420OPER_RSHIFT 421 : ">>" 422 { 423 $setToken(createOperatorToken(PerlTokenTypes.OPER_RSHIFT, ">>")); 424 qmarkRegexp = slashRegexp = false; 425 } 426 ; 427 428OPER_RSHIFTEQ 429 : ">>=" 430 { 431 $setToken(createOperatorToken(PerlTokenTypes.OPER_RSHIFTEQ, ">>=")); 432 qmarkRegexp = slashRegexp = false; 433 } 434 ; 435 436OPER_GT 437 : '>' 438 { 439 $setToken(createOperatorToken(PerlTokenTypes.OPER_GT, ">")); 440 qmarkRegexp = slashRegexp = false; 441 } 442 ; 443 444OPER_LT 445 : { !glob }? 446 '<' 447 { 448 $setToken(createOperatorToken(PerlTokenTypes.OPER_LT, "<")); 449 qmarkRegexp = slashRegexp = false; 450 } 451 ; 452 453OPEN_POD 454 : { getColumn() == 1 }? 455 '=' ID (NOT_NEWLINE)* (NEWLINE! | '\uFFFF'!) 456 { getParent().expectPODEnd(); } 457 ; 458 459protected NUMBER 460 : ("0x" ('0'..'9' | 'A'..'F' | 'a'..'f' | '_')+) 461 | ("0b" ('0' | '1' | '_')+) 462 | ('0'..'9') ('0'..'9' | '_')* 463 { 464 slashRegexp = qmarkRegexp = glob = afterArrow = afterDArrow = false; 465 $setType(PerlTokenTypes.NUMBER); 466 }; 467 468protected WORD 469 : ID 470 { 471 String str = $getText; 472 473 if ("use".equals(str)) $setType(PerlTokenTypes.KEYWORD_USE); 474 else if ("sub".equals(str)) { afterSub = true; $setType(PerlTokenTypes.KEYWORD_SUB); } 475 else if ("package".equals(str)) { $setType(PerlTokenTypes.KEYWORD_PACKAGE); } 476 else if ("format".equals(str) && !afterSub) { format = true; $setType(PerlTokenTypes.KEYWORD_FORMAT); } 477 else if ("__END__".equals(str)) { $setType(Token.EOF_TYPE); } 478 else if ("__DATA__".equals(str)) { $setType(Token.EOF_TYPE); } 479 else if (!afterSub) 480 { 481 if (KEYWORDS1.contains(str)) 482 { 483 if ("while".equals(str)) glob = true; 484 $setType(PerlTokenTypes.KEYWORD1); 485 } 486 else if (KEYWORDS2.contains(str)) 487 { 488 glob = str.equals("unlink"); 489 slashRegexp = false; // actually becomes true, see below! 490 $setType(PerlTokenTypes.KEYWORD2); 491 } 492 else if (OPERATORS.contains(str) && !afterArrow && !notOper) 493 { 494 glob = false; 495 $setToken(createOperatorToken(PerlTokenTypes.OPER_OTHER, str)); 496 } 497 } 498 else glob = false; 499 500 slashRegexp = !(afterArrow || slashRegexp); 501 qmarkRegexp = afterArrow = notOper = false; 502 } 503 ; 504 505protected ID 506 : { afterColon = false; } 507 ( 508 { 509 // keep going if we have "::", break on ":" 510 // there must be a better way to implement it X-( 511 if (LA(1) == ':') 512 { 513 if (!afterColon && LA(2) != ':') break; 514 else afterColon = true; 515 } 516 else afterColon = false; 517 } WORD_CHAR)+ 518 ; 519 520protected WORD_CHAR 521 : ('A'..'Z' | 'a'..'z' | '0'..'9' | '_' | ':') 522 ; 523 524protected WS_CHAR 525 : (' ' | '\t' | '\n' | '\r') 526 ; 527 528protected SUBST_OR_MATCH_OPER 529 : ("tr" | "qq" | "qx" | "qw" | "qr" | 's' | 'y' | 'm' | 'q') 530 ; 531 532OTHER: ~('\uFFFF'); 533 534protected KEYWORD1: ; 535protected KEYWORD2: ; 536protected KEYWORD_USE: ; 537protected KEYWORD_SUB: ; 538protected KEYWORD_FORMAT: ; 539protected OPEN_QUOTE: ; 540 541protected NEWLINE 542 : 543 ( 544 '\r' '\n' | // DOS 545 '\r' | // MacOS 546 '\n' // UNIX 547 ) 548 { $setType(Token.SKIP); newline(); } 549 ; 550 551protected NOT_NEWLINE 552 : ~('\r' | '\n' | '\uFFFF') 553 ; 554