1header
2{
3// This source file was generated by ANTLR. Do not edit manually!
4package org.epic.core.parser;
5}
6
7class PerlLexer extends Lexer("org.epic.core.parser.PerlLexerBase");
8options
9{
10	k = 4;
11	charVocabulary = '\0'..'\uFFFF';
12	importVocab = shared;
13	exportVocab = Perl;
14}
15
16WS: (' ' | '\t' | NEWLINE)+;
17
18COMMENT: '#' (NOT_NEWLINE)* (NEWLINE! | '\uFFFF'!);
19
20SEMI
21	: ';'
22	{
23		format = glob = afterArrow = afterDArrow = false;
24		qmarkRegexp = slashRegexp = true;
25		$setToken(createOperatorToken(PerlTokenTypes.SEMI, ";"));
26	}
27	;
28
29OPEN_CURLY
30	: '{'
31	{
32		$setToken(createCurlyToken(PerlTokenTypes.OPEN_CURLY, "{")); pc++;
33		proto = glob = afterSub = false;
34		qmarkRegexp = slashRegexp = true;
35	};
36
37CLOSE_CURLY
38	: '}'
39	{
40		pc--; $setToken(createCurlyToken(PerlTokenTypes.CLOSE_CURLY, "}"));
41		qmarkRegexp = slashRegexp = format = glob = false;
42	};
43
44OPEN_BQUOTE: '`'  { getParent().expectStringEnd('`'); };
45OPEN_SQUOTE: '\'' {	getParent().expectStringEnd('\''); };
46OPEN_DQUOTE: '"'  { getParent().expectStringEnd('"'); };
47
48MAYBE_SPECIAL_VAR
49	: { !proto }? (
50	("**=")
51	=> "**=" { $setToken(createOperatorToken(PerlTokenTypes.OPER_MULMULEQ, "**=")); }
52	| ("**")
53	=> "**" { $setToken(createOperatorToken(PerlTokenTypes.OPER_MULMUL, "**")); }
54	| ("*=")
55	=> "*=" { $setToken(createOperatorToken(PerlTokenTypes.OPER_MULEQ, "*=")); }
56	| (VAR)
57	=> VAR { $setType(PerlTokenTypes.VAR); }
58	| (SPECIAL_VAR)
59	=> SPECIAL_VAR { $setType(PerlTokenTypes.SPECIAL_VAR); glob = false; }
60	| ('*')
61	=> '*' { $setToken(createOperatorToken(PerlTokenTypes.OPER_MUL, "*")); }
62	| ("%=")
63	=> "%=" { $setToken(createOperatorToken(PerlTokenTypes.OPER_MODEQ, "%=")); }
64	| ('%')
65	=> '%' { $setToken(createOperatorToken(PerlTokenTypes.OPER_MOD, "%")); }
66	| (VAR_START) // incomplete variable
67	=> VAR_START { $setType(PerlTokenTypes.VAR); }
68	);
69
70protected SPECIAL_VAR
71	// see English.pm for the *? operators
72	: (
73	  "*^A" | "*^C" | "*^D" | "*^E" | "*^F" | "*^I" | "*^L" | "*^N"
74	| "*^O" | "*^P" | "*^R" | "*^S" | "*^T" | "*^V" | "*^W" | "*^X"
75	| "*/" |  "*?" | "*%" | "*@" | "*_" | "*-" | "*+" | "*." | "*|" | "*,"
76	| "*;" | "*~" | "*:" | "*^" | "*<" | "*>" | "*(" | "*)" /* | "*$" TODO, watch out: 5*$x */
77 	| "*\"" | "*\\"
78
79	| "$$m" | "$$s"
80	| "$^A" | "$^C" | "$^D" | "$^E" | "$^F" | "$^H" | "$^I" | "$^L" | "$^M"
81	| "$^N" | "$^O" | "$^P" | "$^R" | "$^S" | "$^T" | "$^V" | "$^W" | "$^X"
82	| "$/" | "$?" | "$%" | "$@" | "$_" | "$-" | "$+" | "$." | "$|" | "$!"
83	| "$;" | "$~" | "$$" | "$`" | "$'" | "$<" | "$>" | "$(" | "$)" | "$,"
84	| "$[" | "$]" | "$:" | "$*" | "$#" | "$=" | "$^" | "$&"
85	| "$\"" | "$\\"
86
87	| "@+" | "@-" | "@_" | "@$"
88
89	| "%!" | "%@" | "%^H"
90	);
91
92protected VAR
93	: { !proto }? VAR_START (ID | CURLY | '@' | '\uFFFF'!)
94	{ glob = qmarkRegexp = slashRegexp = false; };
95
96protected VAR_START
97	: ('@' | '$' | '%' /* | '*' TODO, but think of $x*5 */)
98	('@' | '$' | '%' | '*' | '#' /* | ' ' TODO, but think of $x % $y */)*
99	;
100
101protected CURLY
102	: '{'
103	( CURLY | NEWLINE | ~('}' | '\uFFFF'))*
104	('}' | '\uFFFF'!)
105	;
106
107OPER_AND:    "&&" { $setToken(createOperatorToken(PerlTokenTypes.OPER_AND, "&&")); };
108OPER_OR:     "||" { $setToken(createOperatorToken(PerlTokenTypes.OPER_OR, "||")); };
109OPER_LTEQ:   "<=" { $setToken(createOperatorToken(PerlTokenTypes.OPER_LTEQ, "<=")); };
110OPER_GTEQ:   ">=" { $setToken(createOperatorToken(PerlTokenTypes.OPER_GTEQ, ">=")); };
111
112OPER_ANDANDEQ: "&&=" { $setToken(createOperatorToken(PerlTokenTypes.OPER_ANDANDEQ, "&&=")); };
113OPER_OROREQ:   "||=" { $setToken(createOperatorToken(PerlTokenTypes.OPER_OROREQ, "||=")); };
114
115PROTO
116	: { proto }?
117	('$' | '@' | '%' | '*' | ';' | '\\' | '&' | '_' | WS)+
118	{ proto = false; }
119	;
120
121OPER_S: "-s" { $setToken(createOperatorToken(PerlTokenTypes.OPER_S, "-s")); };
122
123OPER_SLASHSLASH
124	: { !slashRegexp }? "//"
125	{
126		$setToken(createOperatorToken(PerlTokenTypes.OPER_SLASHSLASH, "//"));
127	};
128
129OPER_SLASHSLASHEQ
130	: { !slashRegexp }? "//="
131	{
132		$setToken(createOperatorToken(PerlTokenTypes.OPER_SLASHSLASHEQ, "//="));
133	};
134
135OPEN_SLASH
136	: '/'
137	{
138		if (slashRegexp)
139		{
140    		getParent().expectStringSuffix(); // second
141    		getParent().expectStringEnd('/'); // first
142    		slashRegexp = qmarkRegexp = false;
143		}
144		else
145		{
146			$setToken(createOperatorToken(PerlTokenTypes.OPER_DIV, "/"));
147		}
148	};
149
150protected OPER_QMARK: ;
151
152OPEN_QMARK
153	: '?'
154	{
155		if (qmarkRegexp)
156		{
157    		getParent().expectStringSuffix(); // second
158    		getParent().expectStringEnd('?'); // first
159    		slashRegexp = qmarkRegexp = false;
160		}
161		else
162		{
163			$setToken(createOperatorToken(PerlTokenTypes.OPER_QMARK, "?"));
164		}
165	};
166
167SUBST_OR_MATCH_OR_WORD // this disambiguation rule disfavours EXPRs too much :-(
168	: { !afterArrow }? ((SUBST_OR_MATCH_OPER | 'x') (('A'..'Z' | 'a'..'z' | '0'..'9') | ((WS_CHAR)* "=>")))
169	=> { notOper = true; } t1:WORD { $setToken(t1); }
170	| { !afterArrow && !afterSub }? (("tr" | 's' | 'y') ~'}')
171	=> SUBST_EXPR { $setType(PerlTokenTypes.SUBST_EXPR); }
172	| { !afterArrow || afterDArrow }? (("qq" | "qx" | "qw" | "qr" | 'm' | 'q') ~('a'..'z' | '0'..'9' | '_' | '}' | '\r' | '\n' | ' '))
173	=> MATCH_EXPR { $setType(PerlTokenTypes.MATCH_EXPR); }
174	| (NUMBER)
175	=> n:NUMBER { $setToken(n); }
176	| (':' ('\uFFFF'! | ~':'))
177	=> ':'
178	{
179		glob = false;
180		$setToken(createOperatorToken(PerlTokenTypes.OPER_COLON, ":"));
181	}
182	| t3:WORD { $setToken(t3); }
183	;
184
185protected SUBST_EXPR
186	: ("tr" | 's' | 'y')
187	{
188		getParent().expectSubstExpr();
189		slashRegexp = qmarkRegexp = false;
190	};
191
192protected MATCH_EXPR
193	: ("qq" | "qx" | "qw" | "qr" | 'm' | 'q')
194	{
195		getParent().expectStringSuffix(); // second
196		getParent().expectString(); // first
197		slashRegexp = qmarkRegexp = false;
198	};
199
200OPER_DARROW
201	: "=>"
202	{
203		$setToken(createOperatorToken(PerlTokenTypes.OPER_DARROW, "=>"));
204		afterArrow = afterDArrow = true;
205	};
206
207OPER_ARROW
208	: "->"
209	{
210		$setToken(createOperatorToken(PerlTokenTypes.OPER_ARROW, "->"));
211		qmarkRegexp = slashRegexp = false;
212		afterArrow = true;
213	};
214
215OPER_DOUBLEEQ
216	: "=="
217	{ $setToken(createOperatorToken(PerlTokenTypes.OPER_DOUBLEEQ, "==")); };
218
219OPER_NOTEQ
220	: "!="
221	{ $setToken(createOperatorToken(PerlTokenTypes.OPER_NOTEQ, "!=")); };
222
223OPER_EQMATCH
224	: "=~"
225	{ afterArrow = afterDArrow = false; $setToken(createOperatorToken(PerlTokenTypes.OPER_EQMATCH, "=~")); };
226
227OPER_SMARTMATCH
228	: "~~"
229	{ afterArrow = afterDArrow = false; $setToken(createOperatorToken(PerlTokenTypes.OPER_SMARTMATCH, "~~")); };
230
231OPER_EQNOTMATCH
232	: "!~"
233	{ $setToken(createOperatorToken(PerlTokenTypes.OPER_EQNOTMATCH, "!~")); };
234
235OPER_MINUSMINUS
236	: "--"
237	{
238		$setToken(createOperatorToken(PerlTokenTypes.OPER_MINUSMINUS, "--"));
239		qmarkRegexp = false;
240	};
241
242OPER_MINUSEQ
243	: "-="
244	{
245		$setToken(createOperatorToken(PerlTokenTypes.OPER_PLUSPLUS, "-="));
246		qmarkRegexp = false;
247	};
248
249OPER_PLUSPLUS
250	: "++"
251	{
252		$setToken(createOperatorToken(PerlTokenTypes.OPER_PLUSPLUS, "++"));
253		qmarkRegexp = false;
254	};
255
256OPER_PLUSEQ
257	: "+="
258	{
259		$setToken(createOperatorToken(PerlTokenTypes.OPER_PLUSPLUS, "+="));
260		qmarkRegexp = false;
261	};
262
263OPER_ANDEQ
264	: "&="
265	{
266		$setToken(createOperatorToken(PerlTokenTypes.OPER_OREQ, "&="));
267		qmarkRegexp = false;
268	};
269
270OPER_OREQ
271	: "|="
272	{
273		$setToken(createOperatorToken(PerlTokenTypes.OPER_OREQ, "|="));
274		qmarkRegexp = false;
275	};
276
277OPER_XOREQ
278	: "^="
279	{
280		$setToken(createOperatorToken(PerlTokenTypes.OPER_XOREQ, "^="));
281		qmarkRegexp = false;
282	};
283
284OPER_DIVEQ
285	: { !slashRegexp }? "/="
286	{
287		$setToken(createOperatorToken(PerlTokenTypes.OPER_DIVEQ, "/="));
288		qmarkRegexp = false;
289	};
290
291OPER_COMMA
292	: ','
293	{
294		$setToken(createOperatorToken(PerlTokenTypes.OPER_COMMA, ","));
295		afterArrow = afterDArrow = false;
296	};
297
298OPER_EQ
299	: { !format }? '='
300	{
301		$setToken(createOperatorToken(PerlTokenTypes.OPER_EQ, "="));
302		glob = true;
303	};
304
305OPER_DIV
306	: '/'
307	{ $setToken(createOperatorToken(PerlTokenTypes.OPER_DIV, "/")); };
308
309OPER_PLUS
310	: '+'
311	{ $setToken(createOperatorToken(PerlTokenTypes.OPER_PLUS, "+")); };
312
313OPER_MINUS
314	: '-'
315	{ $setToken(createOperatorToken(PerlTokenTypes.OPER_MINUS, "-")); };
316
317OPER_DOTDOT
318	: ".."
319	{ $setToken(createOperatorToken(PerlTokenTypes.OPER_DOTDOT, "..")); };
320
321OPER_DOT
322	: '.'
323	{ afterArrow = afterDArrow = false; $setToken(createOperatorToken(PerlTokenTypes.OPER_DOT, ".")); };
324
325OPER_NOT
326	: '!'
327	{ slashRegexp = true; $setToken(createOperatorToken(PerlTokenTypes.OPER_NOT, "!")); };
328
329OPER_BSLASH
330	: '\\'
331	{ $setToken(createOperatorToken(PerlTokenTypes.OPER_BSLASH, "\\")); };
332
333OPEN_PAREN
334	: '('
335	{
336		$setToken(createOperatorToken(PerlTokenTypes.OPEN_PAREN, "("));
337		if (afterSub) { afterSub = false; proto = true; }
338		format = false;
339		glob = qmarkRegexp = slashRegexp = true;
340	};
341
342CLOSE_PAREN
343	: ')'
344	{
345		$setToken(createOperatorToken(PerlTokenTypes.CLOSE_PAREN, ")"));
346		glob = qmarkRegexp = slashRegexp = false;
347	};
348
349OPEN_BRACKET
350	: '['
351	{
352		$setToken(createOperatorToken(PerlTokenTypes.OPEN_BRACKET, "["));
353		format = false;
354		glob = qmarkRegexp = slashRegexp = true;
355	};
356
357CLOSE_BRACKET
358	: ']'
359	{
360		$setToken(createOperatorToken(PerlTokenTypes.CLOSE_BRACKET, "]"));
361		glob = qmarkRegexp = slashRegexp = false;
362	};
363
364FORMAT_STMT
365	: { format }? "="
366	{
367		format = false;
368		getParent().expectFormatEnd();
369	};
370
371protected VAR_WITH_CURLY
372	: (VAR (WS)? '{')
373	=> VAR (WS)? { getParent().expectString(); }
374	| (VAR)
375	=> VAR;
376
377PROC_REF
378	: '&' ID
379	{ qmarkRegexp = slashRegexp = false; };
380
381OPER_LSHIFT_OR_HEREDOC
382	: (OPEN_HEREDOC) //("<<" (WS)?  ('"' | '\'' | '`' | 'A'..'Z'))
383	=> OPEN_HEREDOC
384	   { $setType(PerlTokenTypes.OPEN_HEREDOC); }
385	| ("<<=")
386	=> OPER_LSHIFTEQ
387	   { $setToken(createOperatorToken(PerlTokenTypes.OPER_LSHIFTEQ, "<<=")); }
388	| ("<<" (WS)? ~('"' | '\'' | '`' | 'A'..'Z'))
389	=> OPER_LSHIFT
390	   { $setToken(createOperatorToken(PerlTokenTypes.OPER_LSHIFT, "<<")); }
391	;
392
393protected OPER_LSHIFT: "<<";
394protected OPER_LSHIFTEQ: "<<=";
395
396protected OPEN_HEREDOC
397	:
398	(
399		("<<" (WS)? '"')
400		=> "<<" (WS!)? '"'! ("\\\"" | ~('"' | '\n' | '\r' | '\uFFFF'))*
401		| ("<<" (WS)? "'")
402		=> "<<" (WS!)? "'"! ("\\'"  | ~('\'' | '\n' | '\r' | '\uFFFF'))*
403		| ("<<" (WS)? '`')
404		=> "<<" (WS!)? '`'! ("\\`"  | ~('`' | '\n' | '\r' | '\uFFFF'))*
405		| ("<<" (WS)? ('A'..'Z'|'a'..'z'|'_'))
406		=> "<<" ('A'..'Z'|'a'..'z'|'_')+
407	)
408	(NOT_NEWLINE!)*
409	{ if (LA(1) != EOF_CHAR) getParent().expectHereDocEnd($getText); }
410	(NEWLINE | '\uFFFF'!) // tolerate "print <<B" at the end of file...
411	// TODO: proper handling of here-docs with quoted identifiers,
412	// see man perlop
413	;
414
415GLOB
416	: { glob }?
417	'<' (~('<' | '>' | '\n' | '\r' | '\uFFFF'))* ('>' | '\uFFFF'!)
418	;
419
420OPER_RSHIFT
421	: ">>"
422	{
423		$setToken(createOperatorToken(PerlTokenTypes.OPER_RSHIFT, ">>"));
424		qmarkRegexp = slashRegexp = false;
425	}
426	;
427
428OPER_RSHIFTEQ
429	: ">>="
430	{
431		$setToken(createOperatorToken(PerlTokenTypes.OPER_RSHIFTEQ, ">>="));
432		qmarkRegexp = slashRegexp = false;
433	}
434	;
435
436OPER_GT
437	: '>'
438	{
439		$setToken(createOperatorToken(PerlTokenTypes.OPER_GT, ">"));
440		qmarkRegexp = slashRegexp = false;
441	}
442	;
443
444OPER_LT
445	: { !glob }?
446	'<'
447	{
448		$setToken(createOperatorToken(PerlTokenTypes.OPER_LT, "<"));
449		qmarkRegexp = slashRegexp = false;
450	}
451	;
452
453OPEN_POD
454	: { getColumn() == 1 }?
455	'=' ID (NOT_NEWLINE)* (NEWLINE! | '\uFFFF'!)
456	{ getParent().expectPODEnd(); }
457	;
458
459protected NUMBER
460	: ("0x" ('0'..'9' | 'A'..'F' | 'a'..'f' | '_')+)
461	| ("0b" ('0' | '1' | '_')+)
462	| ('0'..'9') ('0'..'9' | '_')*
463	{
464		slashRegexp = qmarkRegexp = glob = afterArrow = afterDArrow = false;
465		$setType(PerlTokenTypes.NUMBER);
466	};
467
468protected WORD
469	: ID
470	{
471		String str = $getText;
472
473		if ("use".equals(str)) $setType(PerlTokenTypes.KEYWORD_USE);
474		else if ("sub".equals(str)) { afterSub = true; $setType(PerlTokenTypes.KEYWORD_SUB); }
475		else if ("package".equals(str)) { $setType(PerlTokenTypes.KEYWORD_PACKAGE); }
476		else if ("format".equals(str) && !afterSub) { format = true; $setType(PerlTokenTypes.KEYWORD_FORMAT); }
477		else if ("__END__".equals(str)) { $setType(Token.EOF_TYPE); }
478		else if ("__DATA__".equals(str)) { $setType(Token.EOF_TYPE); }
479		else if (!afterSub)
480		{
481			if (KEYWORDS1.contains(str))
482    		{
483    			if ("while".equals(str)) glob = true;
484    			$setType(PerlTokenTypes.KEYWORD1);
485    		}
486    		else if (KEYWORDS2.contains(str))
487    		{
488    			glob = str.equals("unlink");
489    			slashRegexp = false; // actually becomes true, see below!
490    			$setType(PerlTokenTypes.KEYWORD2);
491    		}
492    		else if (OPERATORS.contains(str) && !afterArrow && !notOper)
493    		{
494    			glob = false;
495    			$setToken(createOperatorToken(PerlTokenTypes.OPER_OTHER, str));
496    		}
497		}
498		else glob = false;
499
500		slashRegexp = !(afterArrow || slashRegexp);
501		qmarkRegexp = afterArrow = notOper = false;
502	}
503	;
504
505protected ID
506	: { afterColon = false; }
507	(
508	{
509		// keep going if we have "::", break on ":"
510		// there must be a better way to implement it X-(
511		if (LA(1) == ':')
512		{
513			if (!afterColon && LA(2) != ':') break;
514			else afterColon = true;
515		}
516		else afterColon = false;
517	} WORD_CHAR)+
518	;
519
520protected WORD_CHAR
521	: ('A'..'Z' | 'a'..'z' | '0'..'9' | '_' | ':')
522	;
523
524protected WS_CHAR
525	: (' ' | '\t' | '\n' | '\r')
526	;
527
528protected SUBST_OR_MATCH_OPER
529	: ("tr" | "qq" | "qx" | "qw" | "qr" | 's' | 'y' | 'm' | 'q')
530	;
531
532OTHER: ~('\uFFFF');
533
534protected KEYWORD1: ;
535protected KEYWORD2: ;
536protected KEYWORD_USE: ;
537protected KEYWORD_SUB: ;
538protected KEYWORD_FORMAT: ;
539protected OPEN_QUOTE: ;
540
541protected NEWLINE
542	:
543	(
544	 '\r' '\n'	|	// DOS
545     '\r'		|	// MacOS
546     '\n'			// UNIX
547    )
548    { $setType(Token.SKIP); newline(); }
549    ;
550
551protected NOT_NEWLINE
552	: ~('\r' | '\n' | '\uFFFF')
553	;
554