1 // $Id: scanner.cpp,v 1.45 2004/03/25 13:32:28 ericb Exp $
2 //
3 // This software is subject to the terms of the IBM Jikes Compiler
4 // License Agreement available at the following URL:
5 // http://ibm.com/developerworks/opensource/jikes.
6 // Copyright (C) 1996, 2004 IBM Corporation and others. All Rights Reserved.
7 // You must accept the terms of that agreement to use this software.
8 //
9
10 #include "scanner.h"
11 #include "control.h"
12 #include "error.h"
13 #include "javadef.h"
14 #include "javasym.h"
15 #include "option.h"
16 #include "code.h"
17
18 #ifdef HAVE_JIKES_NAMESPACE
19 namespace Jikes { // Open namespace Jikes block
20 #endif
21
22 int (*Scanner::scan_keyword[13]) (const wchar_t* p1) =
23 {
24 ScanKeyword0,
25 ScanKeyword0,
26 ScanKeyword2,
27 ScanKeyword3,
28 ScanKeyword4,
29 ScanKeyword5,
30 ScanKeyword6,
31 ScanKeyword7,
32 ScanKeyword8,
33 ScanKeyword9,
34 ScanKeyword10,
35 ScanKeyword0,
36 ScanKeyword12
37 };
38
39
40 //
41 // The constructor initializes all utility variables.
42 //
Scanner(Control & control_)43 Scanner::Scanner(Control& control_)
44 : control(control_),
45 dollar_warning_given(false),
46 deprecated(false)
47 {
48 //
49 // If this assertion fails, the Token structure in stream.h must be
50 // redesigned !!!
51 //
52 assert(NUM_TERMINALS < 128);
53 //
54 // If this assertion fails, then gencode.java is at fault.
55 //
56 #ifdef JIKES_DEBUG
57 assert(Code::CodeCheck());
58 #endif // JIKES_DEBUG
59
60 //
61 // CLASSIFY_TOKEN is a mapping from each character into a
62 // classification routine that is invoked when that character
63 // is the first character encountered in a token.
64 //
65 for (int c = 0; c < 128; c++)
66 {
67 if (Code::IsAsciiUpper(c) || Code::IsAsciiLower(c) || c == U_DOLLAR ||
68 c == U_UNDERSCORE)
69 {
70 classify_token[c] = &Scanner::ClassifyId;
71 }
72 else if (Code::IsDecimalDigit(c))
73 classify_token[c] = &Scanner::ClassifyNumericLiteral;
74 else if (Code::IsSpace(c))
75 classify_token[c] = &Scanner::SkipSpaces;
76 else classify_token[c] = &Scanner::ClassifyBadToken;
77 }
78 classify_token[128] = &Scanner::ClassifyNonAsciiUnicode;
79
80 classify_token[U_a] = &Scanner::ClassifyIdOrKeyword;
81 classify_token[U_b] = &Scanner::ClassifyIdOrKeyword;
82 classify_token[U_c] = &Scanner::ClassifyIdOrKeyword;
83 classify_token[U_d] = &Scanner::ClassifyIdOrKeyword;
84 classify_token[U_e] = &Scanner::ClassifyIdOrKeyword;
85 classify_token[U_f] = &Scanner::ClassifyIdOrKeyword;
86 classify_token[U_g] = &Scanner::ClassifyIdOrKeyword;
87 classify_token[U_i] = &Scanner::ClassifyIdOrKeyword;
88 classify_token[U_l] = &Scanner::ClassifyIdOrKeyword;
89 classify_token[U_n] = &Scanner::ClassifyIdOrKeyword;
90 classify_token[U_p] = &Scanner::ClassifyIdOrKeyword;
91 classify_token[U_r] = &Scanner::ClassifyIdOrKeyword;
92 classify_token[U_s] = &Scanner::ClassifyIdOrKeyword;
93 classify_token[U_t] = &Scanner::ClassifyIdOrKeyword;
94 classify_token[U_v] = &Scanner::ClassifyIdOrKeyword;
95 classify_token[U_w] = &Scanner::ClassifyIdOrKeyword;
96
97 classify_token[U_SINGLE_QUOTE] = &Scanner::ClassifyCharLiteral;
98 classify_token[U_DOUBLE_QUOTE] = &Scanner::ClassifyStringLiteral;
99
100 classify_token[U_PLUS] = &Scanner::ClassifyPlus;
101 classify_token[U_MINUS] = &Scanner::ClassifyMinus;
102 classify_token[U_EXCLAMATION] = &Scanner::ClassifyNot;
103 classify_token[U_PERCENT] = &Scanner::ClassifyMod;
104 classify_token[U_CARET] = &Scanner::ClassifyXor;
105 classify_token[U_AMPERSAND] = &Scanner::ClassifyAnd;
106 classify_token[U_STAR] = &Scanner::ClassifyStar;
107 classify_token[U_BAR] = &Scanner::ClassifyOr;
108 classify_token[U_TILDE] = &Scanner::ClassifyComplement;
109 classify_token[U_SLASH] = &Scanner::ClassifySlash;
110 classify_token[U_GREATER] = &Scanner::ClassifyGreater;
111 classify_token[U_LESS] = &Scanner::ClassifyLess;
112 classify_token[U_LEFT_PARENTHESIS] = &Scanner::ClassifyLparen;
113 classify_token[U_RIGHT_PARENTHESIS] = &Scanner::ClassifyRparen;
114 classify_token[U_LEFT_BRACE] = &Scanner::ClassifyLbrace;
115 classify_token[U_RIGHT_BRACE] = &Scanner::ClassifyRbrace;
116 classify_token[U_LEFT_BRACKET] = &Scanner::ClassifyLbracket;
117 classify_token[U_RIGHT_BRACKET] = &Scanner::ClassifyRbracket;
118 classify_token[U_SEMICOLON] = &Scanner::ClassifySemicolon;
119 classify_token[U_QUESTION] = &Scanner::ClassifyQuestion;
120 classify_token[U_COLON] = &Scanner::ClassifyColon;
121 classify_token[U_COMMA] = &Scanner::ClassifyComma;
122 classify_token[U_DOT] = &Scanner::ClassifyPeriod;
123 classify_token[U_EQUAL] = &Scanner::ClassifyEqual;
124 classify_token[U_AT] = &Scanner::ClassifyAt;
125 }
126
127
128 //
129 // Associate a lexical stream with this file. Remember, we doctored the stream
130 // to start with \n so that we always start on a whitespace token, and so that
131 // the first source code line is line 1.
132 //
Initialize(FileSymbol * file_symbol)133 void Scanner::Initialize(FileSymbol* file_symbol)
134 {
135 lex = new LexStream(control, file_symbol);
136 current_token_index = lex -> GetNextToken(); // Get 0th token.
137 current_token = &(lex -> token_stream[current_token_index]);
138 current_token -> SetKind(0);
139
140 #ifdef JIKES_DEBUG
141 if (control.option.debug_comments)
142 {
143 // Add 0th comment.
144 LexStream::Comment* current_comment = &(lex -> comment_stream.Next());
145 current_comment -> string = NULL;
146 current_comment -> length = 0;
147 current_comment -> previous_token = BAD_TOKEN;
148 current_comment -> location = 0;
149 }
150 #endif // JIKES_DEBUG
151
152 lex -> line_location.Next() = 0; // Mark starting location of line # 0
153 }
154
155
156 //
157 // This is one of the main entry point for the Java lexical analyser. Its
158 // input is the name of a regular text file. Its output is a stream of tokens.
159 //
SetUp(FileSymbol * file_symbol)160 void Scanner::SetUp(FileSymbol* file_symbol)
161 {
162 Initialize(file_symbol);
163 lex -> CompressSpace();
164 file_symbol -> lex_stream = lex;
165 }
166
167
168 //
169 // This is one of the main entry point for the Java lexical analyser. Its
170 // input is the name of a regular text file. Its output is a stream of tokens.
171 //
Scan(FileSymbol * file_symbol)172 void Scanner::Scan(FileSymbol* file_symbol)
173 {
174 Initialize(file_symbol);
175 lex -> ReadInput();
176 cursor = lex -> InputBuffer();
177 if (cursor)
178 {
179 Scan();
180 lex -> CompressSpace();
181
182 if (control.option.dump_errors)
183 {
184 lex -> SortMessages();
185 for (unsigned i = 0; i < lex -> bad_tokens.Length(); i++)
186 JikesAPI::getInstance() ->
187 reportError(&(lex -> bad_tokens[i]));
188 }
189 lex -> DestroyInput(); // get rid of input buffer
190 }
191 else
192 {
193 delete lex;
194 lex = NULL;
195 }
196 file_symbol -> lex_stream = lex;
197 }
198
199
200 //
201 // Scan the InputBuffer() and process all tokens and comments.
202 //
Scan()203 void Scanner::Scan()
204 {
205 input_buffer_tail = &cursor[lex -> InputBufferLength()];
206
207 //
208 // CURSOR is assumed to point to the next character to be scanned.
209 // Using CURSOR, we jump to the proper classification function
210 // which scans and classifies the token and returns the location of
211 // the character immediately following it.
212 //
213 do
214 {
215 //
216 // Allocate space for next token and set its location.
217 //
218 if (! current_token_index || current_token -> Kind())
219 {
220 current_token_index =
221 lex -> GetNextToken(cursor - lex -> InputBuffer());
222 current_token = &(lex -> token_stream[current_token_index]);
223 }
224 else
225 {
226 current_token -> ResetInfoAndSetLocation(cursor -
227 lex -> InputBuffer());
228 }
229 if (deprecated)
230 {
231 current_token -> SetDeprecated();
232 deprecated = false;
233 }
234 (this ->* classify_token[*cursor < 128 ? *cursor : 128])();
235 } while (cursor < input_buffer_tail);
236
237 //
238 // Add a a gate after the last line.
239 //
240 lex -> line_location.Next() = input_buffer_tail - lex -> InputBuffer();
241 current_token -> SetKind(TK_EOF);
242
243 //
244 // If the brace_stack is not empty, then there are unmatched left
245 // braces in the input. Each unmatched left brace should point to
246 // the EOF token as a substitute for a matching right brace.
247 //
248 assert(current_token_index == lex -> token_stream.Length() - 1);
249
250 for (TokenIndex left_brace = brace_stack.Top();
251 left_brace; left_brace = brace_stack.Top())
252 {
253 lex -> token_stream[left_brace].SetRightBrace(current_token_index);
254 brace_stack.Pop();
255 }
256 }
257
258
259 //
260 // CURSOR points to the first '*' in a /**/ comment.
261 //
ScanStarComment()262 void Scanner::ScanStarComment()
263 {
264 const wchar_t* start = cursor - 1;
265 current_token -> SetKind(0);
266 #ifdef JIKES_DEBUG
267 LexStream::Comment* current_comment = NULL;
268 if (control.option.debug_comments)
269 {
270 current_comment = &(lex -> comment_stream.Next());
271 current_comment -> string = NULL;
272 current_comment -> previous_token = current_token_index - 1;
273 current_comment -> location = start - lex -> InputBuffer();
274 }
275 #endif // JIKES_DEBUG
276
277 //
278 // If this comment starts with the prefix "/**" then it is a document
279 // comment. Check whether or not it contains the deprecated tag and if so,
280 // mark the token preceeding it. The @deprecated tag must appear at the
281 // beginning of a line. According to Sun,
282 // http://java.sun.com/j2se/1.4/docs/tooldocs/win32/javadoc.html#comments,
283 // this means ignoring whitespace, *, and /** patterns. But in practice,
284 // javac doesn't quite implement it this way, completely ignoring /**
285 // separators, and rejecting \f and \t after *<space>*.
286 // This implementation also ignores /**, but treats whitespace correctly.
287 //
288 // Note that we exploit the fact that the stream is doctored to always
289 // end in U_CARRIAGE_RETURN, U_NULL; and that we changed all CR to LF
290 // within the file.
291 //
292 if (*++cursor == U_STAR)
293 {
294 enum
295 {
296 HEADER,
297 STAR,
298 REMAINDER
299 } state = HEADER;
300 while (*cursor != U_CARRIAGE_RETURN)
301 {
302 switch (*cursor++)
303 {
304 case U_LINE_FEED:
305 // Record new line.
306 lex -> line_location.Next() = cursor - lex -> InputBuffer();
307 state = HEADER;
308 break;
309 case U_SPACE:
310 case U_FORM_FEED:
311 case U_HORIZONTAL_TAB:
312 if (state != REMAINDER)
313 state = HEADER;
314 break;
315 case U_STAR:
316 if (state != REMAINDER || *cursor == U_SLASH)
317 state = STAR;
318 break;
319 case U_SLASH:
320 if (state == STAR)
321 {
322 #ifdef JIKES_DEBUG
323 if (control.option.debug_comments)
324 current_comment -> length = cursor - start;
325 #endif // JIKES_DEBUG
326 return;
327 }
328 // fallthrough
329 default:
330 if (state != REMAINDER)
331 {
332 state = REMAINDER;
333 if (cursor[-1] == U_AT &&
334 cursor[0] == U_d &&
335 cursor[1] == U_e &&
336 cursor[2] == U_p &&
337 cursor[3] == U_r &&
338 cursor[4] == U_e &&
339 cursor[5] == U_c &&
340 cursor[6] == U_a &&
341 cursor[7] == U_t &&
342 cursor[8] == U_e &&
343 cursor[9] == U_d &&
344 (Code::IsWhitespace(cursor + 10) ||
345 cursor[10] == U_STAR))
346 {
347 deprecated = true;
348 cursor += 9;
349 }
350 }
351 }
352 }
353 }
354 else // normal /* */ comment
355 {
356 // Normal comments do not affect deprecation.
357 if (current_token -> Deprecated())
358 deprecated = true;
359 while (*cursor != U_CARRIAGE_RETURN)
360 {
361 if (*cursor == U_STAR) // Potential comment closer.
362 {
363 while (*++cursor == U_STAR)
364 ;
365 if (*cursor == U_SLASH)
366 {
367 cursor++;
368 #ifdef JIKES_DEBUG
369 if (control.option.debug_comments)
370 current_comment -> length = cursor - start;
371 #endif // JIKES_DEBUG
372 return;
373 }
374 if (*cursor == U_CARRIAGE_RETURN)
375 break;
376 }
377 if (Code::IsNewline(*cursor++)) // Record new line.
378 {
379 lex -> line_location.Next() = cursor - lex -> InputBuffer();
380 }
381 }
382 }
383
384 //
385 // If we got here, we are in an unterminated comment. Discard the
386 // U_CARRIAGE_RETURN that ends the stream.
387 //
388 lex -> ReportMessage(StreamError::UNTERMINATED_COMMENT,
389 start - lex -> InputBuffer(),
390 cursor - lex -> InputBuffer() - 1);
391
392 #ifdef JIKES_DEBUG
393 if (control.option.debug_comments)
394 current_comment -> length = cursor - 1 - start;
395 #endif // JIKES_DEBUG
396 }
397
398
399 //
400 // CURSOR points to the second '/' in a // comment.
401 //
ScanSlashComment()402 void Scanner::ScanSlashComment()
403 {
404 //
405 // Note that we exploit the fact that the stream is doctored to always
406 // end in U_CARRIAGE_RETURN, U_NULL; and that we changed all CR to LF
407 // within the file. Normal comments do not affect deprecation.
408 //
409 if (current_token -> Deprecated())
410 deprecated = true;
411 current_token -> SetKind(0);
412 while (! Code::IsNewline(*++cursor)); // Skip all until \n or EOF
413 #ifdef JIKES_DEBUG
414 if (control.option.debug_comments)
415 {
416 LexStream::Comment* current_comment = &(lex -> comment_stream.Next());
417 current_comment -> string = NULL;
418 current_comment -> previous_token = current_token_index - 1;
419 current_comment -> location = current_token -> Location();
420 current_comment -> length = (cursor - lex -> InputBuffer()) -
421 current_comment -> location;
422 }
423 #endif // JIKES_DEBUG
424 }
425
426
427 //
428 // This procedure is invoked to skip useless spaces in the input.
429 // It assumes upon entry that CURSOR points to the next character to
430 // be scanned. Before returning it sets CURSOR to the location of the
431 // first non-space character following its initial position.
432 //
SkipSpaces()433 inline void Scanner::SkipSpaces()
434 {
435 //
436 // We exploit the fact that the stream was doctored to end in
437 // U_CARRIAGE_RETURN, U_NULL; and that all internal CR were changed to LF.
438 // Normal comments do not affect deprecation.
439 //
440 if (current_token -> Deprecated())
441 deprecated = true;
442 current_token -> SetKind(0);
443 do
444 {
445 if (Code::IsNewline(*cursor)) // Starting a new line?
446 lex -> line_location.Next() = cursor + 1 - lex -> InputBuffer();
447 } while (Code::IsSpace(*++cursor));
448 }
449
450
451 //
452 // scan_keyword(i):
453 // Scan an identifier of length I and determine if it is a keyword.
454 //
ScanKeyword0(const wchar_t *)455 int Scanner::ScanKeyword0(const wchar_t*)
456 {
457 return TK_Identifier;
458 }
459
ScanKeyword2(const wchar_t * p1)460 int Scanner::ScanKeyword2(const wchar_t* p1)
461 {
462 if (p1[0] == U_d && p1[1] == U_o)
463 return TK_do;
464 if (p1[0] == U_i && p1[1] == U_f)
465 return TK_if;
466 return TK_Identifier;
467 }
468
ScanKeyword3(const wchar_t * p1)469 int Scanner::ScanKeyword3(const wchar_t* p1)
470 {
471 switch (*p1)
472 {
473 case U_f:
474 if (p1[1] == U_o && p1[2] == U_r)
475 return TK_for;
476 break;
477 case U_i:
478 if (p1[1] == U_n && p1[2] == U_t)
479 return TK_int;
480 break;
481 case U_n:
482 if (p1[1] == U_e && p1[2] == U_w)
483 return TK_new;
484 break;
485 case U_t:
486 if (p1[1] == U_r && p1[2] == U_y)
487 return TK_try;
488 break;
489 }
490 return TK_Identifier;
491 }
492
ScanKeyword4(const wchar_t * p1)493 int Scanner::ScanKeyword4(const wchar_t* p1)
494 {
495 switch (*p1)
496 {
497 case U_b:
498 if (p1[1] == U_y && p1[2] == U_t && p1[3] == U_e)
499 return TK_byte;
500 break;
501 case U_c:
502 if (p1[1] == U_a && p1[2] == U_s && p1[3] == U_e)
503 return TK_case;
504 if (p1[1] == U_h && p1[2] == U_a && p1[3] == U_r)
505 return TK_char;
506 break;
507 case U_e:
508 if (p1[1] == U_l && p1[2] == U_s && p1[3] == U_e)
509 return TK_else;
510 if (p1[1] == U_n && p1[2] == U_u && p1[3] == U_m)
511 return TK_enum;
512 break;
513 case U_g:
514 if (p1[1] == U_o && p1[2] == U_t && p1[3] == U_o)
515 return TK_goto;
516 break;
517 case U_l:
518 if (p1[1] == U_o && p1[2] == U_n && p1[3] == U_g)
519 return TK_long;
520 break;
521 case U_n:
522 if (p1[1] == U_u && p1[2] == U_l && p1[3] == U_l)
523 return TK_null;
524 break;
525 case U_t:
526 if (p1[1] == U_h && p1[2] == U_i && p1[3] == U_s)
527 return TK_this;
528 if (p1[1] == U_r && p1[2] == U_u && p1[3] == U_e)
529 return TK_true;
530 break;
531 case U_v:
532 if (p1[1] == U_o && p1[2] == U_i && p1[3] == U_d)
533 return TK_void;
534 break;
535 }
536 return TK_Identifier;
537 }
538
ScanKeyword5(const wchar_t * p1)539 int Scanner::ScanKeyword5(const wchar_t* p1)
540 {
541 switch (*p1)
542 {
543 case U_b:
544 if (p1[1] == U_r && p1[2] == U_e && p1[3] == U_a && p1[4] == U_k)
545 return TK_break;
546 break;
547 case U_c:
548 if (p1[1] == U_a && p1[2] == U_t && p1[3] == U_c && p1[4] == U_h)
549 return TK_catch;
550 if (p1[1] == U_l && p1[2] == U_a && p1[3] == U_s && p1[4] == U_s)
551 return TK_class;
552 if (p1[1] == U_o && p1[2] == U_n && p1[3] == U_s && p1[4] == U_t)
553 return TK_const;
554 break;
555 case U_f:
556 if (p1[1] == U_a && p1[2] == U_l && p1[3] == U_s && p1[4] == U_e)
557 return TK_false;
558 if (p1[1] == U_i && p1[2] == U_n && p1[3] == U_a && p1[4] == U_l)
559 return TK_final;
560 if (p1[1] == U_l && p1[2] == U_o && p1[3] == U_a && p1[4] == U_t)
561 return TK_float;
562 break;
563 case U_s:
564 if (p1[1] == U_h && p1[2] == U_o && p1[3] == U_r && p1[4] == U_t)
565 return TK_short;
566 if (p1[1] == U_u && p1[2] == U_p && p1[3] == U_e && p1[4] == U_r)
567 return TK_super;
568 break;
569 case U_t:
570 if (p1[1] == U_h && p1[2] == U_r && p1[3] == U_o && p1[4] == U_w)
571 return TK_throw;
572 break;
573 case U_w:
574 if (p1[1] == U_h && p1[2] == U_i && p1[3] == U_l && p1[4] == U_e)
575 return TK_while;
576 break;
577 }
578 return TK_Identifier;
579 }
580
ScanKeyword6(const wchar_t * p1)581 int Scanner::ScanKeyword6(const wchar_t* p1)
582 {
583 switch (*p1)
584 {
585 case U_a:
586 if (p1[1] == U_s && p1[2] == U_s &&
587 p1[3] == U_e && p1[4] == U_r && p1[5] == U_t)
588 return TK_assert;
589 break;
590 case U_d:
591 if (p1[1] == U_o && p1[2] == U_u &&
592 p1[3] == U_b && p1[4] == U_l && p1[5] == U_e)
593 return TK_double;
594 break;
595 case U_i:
596 if (p1[1] == U_m && p1[2] == U_p &&
597 p1[3] == U_o && p1[4] == U_r && p1[5] == U_t)
598 return TK_import;
599 break;
600 case U_n:
601 if (p1[1] == U_a && p1[2] == U_t &&
602 p1[3] == U_i && p1[4] == U_v && p1[5] == U_e)
603 return TK_native;
604 break;
605 case U_p:
606 if (p1[1] == U_u && p1[2] == U_b &&
607 p1[3] == U_l && p1[4] == U_i && p1[5] == U_c)
608 return TK_public;
609 break;
610 case U_r:
611 if (p1[1] == U_e && p1[2] == U_t &&
612 p1[3] == U_u && p1[4] == U_r && p1[5] == U_n)
613 return TK_return;
614 break;
615 case U_s:
616 if (p1[1] == U_t && p1[2] == U_a &&
617 p1[3] == U_t && p1[4] == U_i && p1[5] == U_c)
618 return TK_static;
619 if (p1[1] == U_w && p1[2] == U_i &&
620 p1[3] == U_t && p1[4] == U_c && p1[5] == U_h)
621 return TK_switch;
622 break;
623 case U_t:
624 if (p1[1] == U_h && p1[2] == U_r &&
625 p1[3] == U_o && p1[4] == U_w && p1[5] == U_s)
626 return TK_throws;
627 break;
628 }
629 return TK_Identifier;
630 }
631
ScanKeyword7(const wchar_t * p1)632 int Scanner::ScanKeyword7(const wchar_t* p1)
633 {
634 switch (*p1)
635 {
636 case U_b:
637 if (p1[1] == U_o && p1[2] == U_o && p1[3] == U_l &&
638 p1[4] == U_e && p1[5] == U_a && p1[6] == U_n)
639 return TK_boolean;
640 break;
641 case U_d:
642 if (p1[1] == U_e && p1[2] == U_f && p1[3] == U_a &&
643 p1[4] == U_u && p1[5] == U_l && p1[6] == U_t)
644 return TK_default;
645 break;
646 case U_e:
647 if (p1[1] == U_x && p1[2] == U_t && p1[3] == U_e &&
648 p1[4] == U_n && p1[5] == U_d && p1[6] == U_s)
649 return TK_extends;
650 break;
651 case U_f:
652 if (p1[1] == U_i && p1[2] == U_n && p1[3] == U_a &&
653 p1[4] == U_l && p1[5] == U_l && p1[6] == U_y)
654 return TK_finally;
655 break;
656 case U_p:
657 if (p1[1] == U_a && p1[2] == U_c && p1[3] == U_k &&
658 p1[4] == U_a && p1[5] == U_g && p1[6] == U_e)
659 return TK_package;
660 if (p1[1] == U_r && p1[2] == U_i && p1[3] == U_v &&
661 p1[4] == U_a && p1[5] == U_t && p1[6] == U_e)
662 return TK_private;
663 break;
664 }
665 return TK_Identifier;
666 }
667
ScanKeyword8(const wchar_t * p1)668 int Scanner::ScanKeyword8(const wchar_t* p1)
669 {
670 switch (*p1)
671 {
672 case U_a:
673 if (p1[1] == U_b && p1[2] == U_s &&
674 p1[3] == U_t && p1[4] == U_r &&
675 p1[5] == U_a && p1[6] == U_c && p1[7] == U_t)
676 return TK_abstract;
677 break;
678 case U_c:
679 if (p1[1] == U_o && p1[2] == U_n &&
680 p1[3] == U_t && p1[4] == U_i &&
681 p1[5] == U_n && p1[6] == U_u && p1[7] == U_e)
682 return TK_continue;
683 break;
684 case U_s:
685 if (p1[1] == U_t && p1[2] == U_r &&
686 p1[3] == U_i && p1[4] == U_c &&
687 p1[5] == U_t && p1[6] == U_f && p1[7] == U_p)
688 return TK_strictfp;
689 break;
690 case U_v:
691 if (p1[1] == U_o && p1[2] == U_l &&
692 p1[3] == U_a && p1[4] == U_t &&
693 p1[5] == U_i && p1[6] == U_l && p1[7] == U_e)
694 return TK_volatile;
695 break;
696 }
697 return TK_Identifier;
698 }
699
ScanKeyword9(const wchar_t * p1)700 int Scanner::ScanKeyword9(const wchar_t* p1)
701 {
702 if (p1[0] == U_i && p1[1] == U_n && p1[2] == U_t &&
703 p1[3] == U_e && p1[4] == U_r && p1[5] == U_f &&
704 p1[6] == U_a && p1[7] == U_c && p1[8] == U_e)
705 return TK_interface;
706 if (p1[0] == U_p && p1[1] == U_r && p1[2] == U_o &&
707 p1[3] == U_t && p1[4] == U_e && p1[5] == U_c &&
708 p1[6] == U_t && p1[7] == U_e && p1[8] == U_d)
709 return TK_protected;
710 if (p1[0] == U_t && p1[1] == U_r && p1[2] == U_a &&
711 p1[3] == U_n && p1[4] == U_s && p1[5] == U_i &&
712 p1[6] == U_e && p1[7] == U_n && p1[8] == U_t)
713 return TK_transient;
714 return TK_Identifier;
715 }
716
ScanKeyword10(const wchar_t * p1)717 int Scanner::ScanKeyword10(const wchar_t* p1)
718 {
719 if (p1[0] == U_i)
720 {
721 if (p1[1] == U_m && p1[2] == U_p && p1[3] == U_l &&
722 p1[4] == U_e && p1[5] == U_m && p1[6] == U_e &&
723 p1[7] == U_n && p1[8] == U_t && p1[9] == U_s)
724 return TK_implements;
725 if (p1[1] == U_n && p1[2] == U_s && p1[3] == U_t &&
726 p1[4] == U_a && p1[5] == U_n && p1[6] == U_c &&
727 p1[7] == U_e && p1[8] == U_o && p1[9] == U_f)
728 return TK_instanceof;
729 }
730 return TK_Identifier;
731 }
732
ScanKeyword12(const wchar_t * p1)733 int Scanner::ScanKeyword12(const wchar_t* p1)
734 {
735 if (p1[0] == U_s && p1[1] == U_y && p1[2] == U_n &&
736 p1[3] == U_c && p1[4] == U_h && p1[5] == U_r &&
737 p1[6] == U_o && p1[7] == U_n && p1[8] == U_i &&
738 p1[9] == U_z && p1[10] == U_e&& p1[11] == U_d)
739 return TK_synchronized;
740 return TK_Identifier;
741 }
742
743
744 //
745 // This procedure is invoked to scan a character literal. After the character
746 // literal has been scanned and classified, it is entered in the table with
747 // quotes intact.
748 //
ClassifyCharLiteral()749 void Scanner::ClassifyCharLiteral()
750 {
751 //
752 // We exploit the fact that the stream was doctored to end in
753 // U_CARRIAGE_RETURN, U_NULL; and that all internal CR were changed to LF.
754 //
755 current_token -> SetKind(TK_CharacterLiteral);
756 bool bad = false;
757 const wchar_t* ptr = cursor + 1;
758 switch (*ptr)
759 {
760 case U_SINGLE_QUOTE:
761 bad = true;
762 if (ptr[1] == U_SINGLE_QUOTE)
763 {
764 lex -> ReportMessage(StreamError::ESCAPE_EXPECTED,
765 current_token -> Location() + 1,
766 current_token -> Location() + 1);
767 }
768 else
769 {
770 lex -> ReportMessage(StreamError::EMPTY_CHARACTER_CONSTANT,
771 current_token -> Location(),
772 current_token -> Location() + 1);
773 ptr--;
774 }
775 break;
776 case U_BACKSLASH:
777 switch (*++ptr)
778 {
779 case U_b:
780 case U_f:
781 case U_n:
782 case U_r:
783 case U_t:
784 case U_DOUBLE_QUOTE:
785 case U_BACKSLASH:
786 break;
787 case U_SINGLE_QUOTE:
788 //
789 // The user may have forgotten to do '\\'.
790 //
791 if (ptr[1] != U_SINGLE_QUOTE)
792 {
793 lex -> ReportMessage(StreamError::ESCAPE_EXPECTED,
794 current_token -> Location() + 1,
795 current_token -> Location() + 1);
796 ptr--;
797 bad = true;
798 }
799 break;
800 case U_0:
801 case U_1:
802 case U_2:
803 case U_3:
804 if (! Code::IsOctalDigit(ptr[1]))
805 break;
806 ptr++;
807 // fallthrough
808 case U_4:
809 case U_5:
810 case U_6:
811 case U_7:
812 if (! Code::IsOctalDigit(ptr[1]))
813 break;
814 ptr++;
815 break;
816 case U_CARRIAGE_RETURN:
817 case U_LINE_FEED:
818 ptr--;
819 // fallthrough
820 case U_u:
821 //
822 // By now, Unicode escapes have already been flattened; and it is
823 // illegal to try it twice (such as '\u005cu0000').
824 //
825 default:
826 lex -> ReportMessage(StreamError::INVALID_ESCAPE_SEQUENCE,
827 current_token -> Location() + 1,
828 current_token -> Location() + ptr - cursor);
829 bad = true;
830 }
831 break;
832 case U_CARRIAGE_RETURN:
833 case U_LINE_FEED:
834 // Since the source is broken into lines before tokens (JLS 3.2), this
835 // is an unterminated quote. We complain after this switch.
836 ptr--;
837 break;
838 default:
839 break;
840 }
841
842 if (*++ptr != U_SINGLE_QUOTE)
843 {
844 //
845 // For generally better parsing and nicer error messages, see if the
846 // user tried to do a multiple character alpha-numeric string.
847 //
848 while (Code::IsAlnum(ptr))
849 ptr += Code::Codelength(ptr);
850 if (Code::IsNewline(*ptr))
851 ptr--;
852 if (! bad)
853 {
854 lex -> ReportMessage((*ptr != U_SINGLE_QUOTE || ptr == cursor
855 ? StreamError::UNTERMINATED_CHARACTER_CONSTANT
856 : StreamError::MULTI_CHARACTER_CONSTANT),
857 current_token -> Location(),
858 ptr - lex -> InputBuffer());
859 }
860 }
861
862 ptr++;
863 current_token ->
864 SetSymbol(control.char_table.FindOrInsertLiteral(cursor,
865 ptr - cursor));
866 cursor = ptr;
867 }
868
869
870 //
871 // This procedure is invoked to scan a string literal. After the string
872 // literal has been scanned and classified, it is entered in the table with
873 // quotes intact.
874 //
ClassifyStringLiteral()875 void Scanner::ClassifyStringLiteral()
876 {
877 //
878 // We exploit the fact that the stream was doctored to end in
879 // U_CARRIAGE_RETURN, U_NULL; and that all internal CR were changed to LF.
880 //
881 current_token -> SetKind(TK_StringLiteral);
882
883 const wchar_t* ptr = cursor + 1;
884
885 while (*ptr != U_DOUBLE_QUOTE && ! Code::IsNewline(*ptr))
886 {
887 if (*ptr++ == U_BACKSLASH)
888 {
889 switch (*ptr++)
890 {
891 case U_b:
892 case U_f:
893 case U_n:
894 case U_r:
895 case U_t:
896 case U_SINGLE_QUOTE:
897 case U_DOUBLE_QUOTE:
898 case U_BACKSLASH:
899 case U_0:
900 case U_1:
901 case U_2:
902 case U_3:
903 case U_4:
904 case U_5:
905 case U_6:
906 case U_7:
907 break;
908 case U_u:
909 //
910 // By now, Unicode escapes have already been flattened; and it
911 // is illegal to try it twice (such as "\u005cu0000").
912 //
913 default:
914 ptr--;
915 lex -> ReportMessage(StreamError::INVALID_ESCAPE_SEQUENCE,
916 ptr - lex -> InputBuffer() - 1,
917 (ptr - lex -> InputBuffer() -
918 (Code::IsNewline(*ptr) ? 1 : 0)));
919 }
920 }
921 }
922
923 if (Code::IsNewline(*ptr))
924 {
925 ptr--;
926 lex -> ReportMessage(StreamError::UNTERMINATED_STRING_CONSTANT,
927 current_token -> Location(),
928 ptr - lex -> InputBuffer());
929 }
930
931 ptr++;
932 current_token ->
933 SetSymbol(control.string_table.FindOrInsertLiteral(cursor,
934 ptr - cursor));
935 cursor = ptr;
936 }
937
938
939 //
940 // This procedure is invoked when CURSOR points to a letter which starts a
941 // keyword. It scans the identifier and checks whether or not it is a keyword.
942 // Note that the use of that check is a time-optimization that is not
943 // required for correctness.
944 //
ClassifyIdOrKeyword()945 void Scanner::ClassifyIdOrKeyword()
946 {
947 const wchar_t* ptr = cursor + 1;
948 bool has_dollar = false;
949
950 while (Code::IsAlnum(ptr))
951 {
952 has_dollar = has_dollar || (*ptr == U_DS);
953 ptr += Code::Codelength(ptr);
954 }
955 int len = ptr - cursor;
956
957 current_token -> SetKind(len < 13 ? (scan_keyword[len])(cursor)
958 : TK_Identifier);
959
960 if (current_token -> Kind() == TK_assert &&
961 control.option.source < JikesOption::SDK1_4)
962 {
963 lex -> ReportMessage(StreamError::DEPRECATED_IDENTIFIER_ASSERT,
964 current_token -> Location(),
965 current_token -> Location() + len - 1);
966 current_token -> SetKind(TK_Identifier);
967 }
968 if (current_token -> Kind() == TK_enum &&
969 control.option.source < JikesOption::SDK1_5)
970 {
971 lex -> ReportMessage(StreamError::DEPRECATED_IDENTIFIER_ENUM,
972 current_token -> Location(),
973 current_token -> Location() + len - 1);
974 current_token -> SetKind(TK_Identifier);
975 }
976 if (has_dollar && ! dollar_warning_given)
977 {
978 dollar_warning_given = true;
979 lex -> ReportMessage(StreamError::DOLLAR_IN_IDENTIFIER,
980 current_token -> Location(),
981 current_token -> Location() + len - 1);
982 }
983
984 if (current_token -> Kind() == TK_Identifier)
985 {
986 current_token -> SetSymbol(control.FindOrInsertName(cursor, len));
987 for (unsigned i = 0; i < control.option.keyword_map.Length(); i++)
988 {
989 if (control.option.keyword_map[i].length == len &&
990 wcsncmp(cursor, control.option.keyword_map[i].name, len) == 0)
991 {
992 current_token -> SetKind(control.option.keyword_map[i].key);
993 }
994 }
995 }
996 else if (current_token -> Kind() == TK_class ||
997 current_token -> Kind() == TK_enum ||
998 current_token -> Kind() == TK_interface)
999 {
1000 //
1001 // If this is a top-level type keyword (not in braces), we keep track
1002 // of it by adding it to a list.
1003 //
1004 if (brace_stack.Size() == 0)
1005 lex -> type_index.Next() = current_token_index;
1006 }
1007 else if (current_token -> Kind() == TK_package && ! lex -> package)
1008 lex -> package = current_token_index;
1009 cursor = ptr;
1010 }
1011
1012 //
1013 // This procedure is invoked when CURSOR points to an identifier start
1014 // which cannot start a keyword.
1015 //
ClassifyId()1016 void Scanner::ClassifyId()
1017 {
1018 const wchar_t* ptr = cursor;
1019 bool has_dollar = false;
1020
1021 while (Code::IsAlnum(ptr))
1022 {
1023 has_dollar = has_dollar || (*ptr == U_DS);
1024 ptr += Code::Codelength(ptr);
1025 }
1026
1027 int len = ptr - cursor;
1028
1029 if (has_dollar && ! dollar_warning_given)
1030 {
1031 dollar_warning_given = true;
1032 lex -> ReportMessage(StreamError::DOLLAR_IN_IDENTIFIER,
1033 current_token -> Location(),
1034 current_token -> Location() + len - 1);
1035 }
1036
1037 current_token -> SetKind(TK_Identifier);
1038 current_token -> SetSymbol(control.FindOrInsertName(cursor, len));
1039
1040 for (unsigned i = 0; i < control.option.keyword_map.Length(); i++)
1041 {
1042 if (control.option.keyword_map[i].length == len &&
1043 wcsncmp(cursor, control.option.keyword_map[i].name, len) == 0)
1044 {
1045 current_token -> SetKind(control.option.keyword_map[i].key);
1046 }
1047 }
1048 cursor = ptr;
1049 }
1050
1051
1052 //
1053 // This procedure is invoked when CURSOR points directly to '0' - '9' or '.'.
1054 // Such a token is classified as a numeric literal: TK_LongLiteral,
1055 // TK_IntegerLiteral, TK_DoubleLiteral, or TK_FloatLiteral.
1056 //
ClassifyNumericLiteral()1057 void Scanner::ClassifyNumericLiteral()
1058 {
1059 //
1060 // Scan the initial sequence of digits, if any.
1061 //
1062 const wchar_t* ptr = cursor - 1;
1063 const wchar_t* tmp;
1064 while (Code::IsDecimalDigit(*++ptr));
1065
1066 //
1067 // We now take an initial crack at classifying the numeric token.
1068 // We have three initial cases to consider, and stop parsing before any
1069 // exponent or type suffix:
1070 //
1071 // 1) If the initial (perhaps empty) sequence of digits is followed by
1072 // '.', we have a floating-point constant. We scan the sequence of
1073 // digits (if any) that follows the period. When '.' starts the number,
1074 // we already checked that a digit follows before calling this method.
1075 // 2) If the initial sequence is "0x" or "0X", we have a hexadecimal
1076 // literal, either integer or floating point. To be floating point,
1077 // the literal must contain an exponent with 'p' or 'P'; otherwise we
1078 // parse the largest int literal. There must be at least one hex
1079 // digit after the prefix, and before the (possible) exponent.
1080 // 2) Otherwise, we have an integer literal. If the initial (non-empty)
1081 // sequence of digits start with "0", we have an octal constant, and
1082 // for nicer parsing, we simply complain about non-octal digits rather
1083 // than strictly breaking 019 into the two tokens 01 and 9 (because
1084 // it would be a guaranteed syntax error later on). However, it is
1085 // still possible that 019 starts a valid floating point literal, which
1086 // is checked later.
1087 //
1088 if (*ptr == U_DOT)
1089 {
1090 current_token -> SetKind(TK_DoubleLiteral);
1091 while (Code::IsDecimalDigit(*++ptr));
1092 }
1093 else
1094 {
1095 current_token -> SetKind(TK_IntegerLiteral);
1096 if (*cursor == U_0)
1097 {
1098 if (*ptr == U_x || *ptr == U_X)
1099 {
1100 // Don't use isxdigit, it's not platform independent.
1101 while (Code::IsHexDigit(*++ptr)); // Skip the 'x'.
1102 if (*ptr == U_DOT)
1103 {
1104 current_token -> SetKind(TK_DoubleLiteral);
1105 while (Code::IsHexDigit(*++ptr));
1106 if (*ptr != U_p && *ptr != U_P)
1107 {
1108 // Missing required 'p' exponent.
1109 lex -> ReportMessage(StreamError::INVALID_FLOATING_HEX_EXPONENT,
1110 current_token -> Location(),
1111 ptr - 1 - lex -> InputBuffer());
1112 }
1113 else if (ptr == cursor + 3)
1114 {
1115 // Missing hex digits before exponent, with '.'.
1116 tmp = ptr;
1117 if (Code::IsSign(*++tmp)) // Skip the exponent letter.
1118 tmp++; // Skip the '+' or '-'.
1119 if (Code::IsHexDigit(*tmp))
1120 while (Code::IsHexDigit(*++tmp));
1121 if (*tmp != U_d && *tmp != U_D &&
1122 *tmp != U_f && *tmp != U_F)
1123 {
1124 tmp--;
1125 }
1126 lex -> ReportMessage(StreamError::INVALID_FLOATING_HEX_MANTISSA,
1127 current_token -> Location(),
1128 tmp - lex -> InputBuffer());
1129 }
1130 }
1131 else if (ptr == cursor + 2) // Found a runt "0x".
1132 {
1133 if (*ptr == U_p || *ptr == U_P)
1134 {
1135 // Missing hex digits before exponent, without '.'.
1136 tmp = ptr;
1137 if (Code::IsSign(*++tmp)) // Skip the exponent letter.
1138 tmp++; // Skip the '+' or '-'.
1139 if (Code::IsHexDigit(*tmp))
1140 while (Code::IsHexDigit(*++tmp));
1141 if (*tmp != U_d && *tmp != U_D &&
1142 *tmp != U_f && *tmp != U_F)
1143 {
1144 tmp--;
1145 }
1146 lex -> ReportMessage(StreamError::INVALID_FLOATING_HEX_MANTISSA,
1147 current_token -> Location(),
1148 tmp - lex -> InputBuffer());
1149 }
1150 else
1151 {
1152 tmp = (*ptr == U_l || *ptr == U_L) ? ptr : ptr - 1;
1153 lex -> ReportMessage(StreamError::INVALID_HEX_CONSTANT,
1154 current_token -> Location(),
1155 tmp - lex -> InputBuffer());
1156 }
1157 }
1158 }
1159 // Octal prefix. See if it will become floating point later.
1160 else if (*ptr != U_e && *ptr != U_E &&
1161 *ptr != U_d && *ptr != U_D &&
1162 *ptr != U_f && *ptr != U_F)
1163 {
1164 tmp = cursor;
1165 while (Code::IsOctalDigit(*++tmp)); // Skip leading '0'.
1166 if (tmp != ptr)
1167 {
1168 tmp = (*ptr == U_l || *ptr == U_L) ? ptr : ptr - 1;
1169 lex -> ReportMessage(StreamError::INVALID_OCTAL_CONSTANT,
1170 current_token -> Location(),
1171 tmp - lex -> InputBuffer());
1172 }
1173 }
1174 }
1175 }
1176
1177 //
1178 // If the initial numeric token is followed by an exponent, then it is a
1179 // floating-point constant. If that's the case, the literal is
1180 // reclassified and the exponent is scanned. Note that as 'E' and 'e' are
1181 // legitimate hexadecimal digits, we don't have to worry about a
1182 // hexadecimal constant being used as the prefix of a floating-point
1183 // constant. A hex floating point requires a hex prefix. An exponent
1184 // overrides an octal literal, as do the float and double suffixes. We
1185 // stop parsing before any type suffix.
1186 //
1187 // For example, 0x123e12 is tokenized as a single hexadecimal digit, while
1188 // the string 0x123e+12 gets broken down as the hex number 0x123e, the
1189 // operator '+', and the decimal constant 12. Meanwhile, 019e+0 and 019d
1190 // are both tokenized as a single floating-point constant 19.0. Note that
1191 // 1e should strictly be parsed as the int 1 followed by identifier e;
1192 // 1e+ should be the int 1, identifier e, and operator +; and 1p0d should
1193 // be the int 1 and identifier p0d; however all these cases are guaranteed
1194 // to be syntax errors later on, so we nicely consume them as a single
1195 // invalid floating point token now.
1196 //
1197 if (*ptr == U_e || *ptr == U_E || *ptr == U_p || *ptr == U_P)
1198 {
1199 current_token -> SetKind(TK_DoubleLiteral);
1200 if ((*ptr == U_p || *ptr == U_P) &&
1201 ! (cursor[1] == U_x || cursor[1] == U_X))
1202 {
1203 tmp = ptr;
1204 if (Code::IsSign(*++tmp)) // Skip the exponent letter.
1205 tmp++; // Skip the '+' or '-'.
1206 if (Code::IsDecimalDigit(*tmp))
1207 while (Code::IsDecimalDigit(*++tmp));
1208 if (*tmp != U_d && *tmp != U_D && *tmp != U_f && *tmp != U_F)
1209 tmp--;
1210 lex -> ReportMessage(StreamError::INVALID_FLOATING_HEX_PREFIX,
1211 current_token -> Location(),
1212 tmp - lex -> InputBuffer());
1213 }
1214 if (Code::IsSign(*++ptr)) // Skip the exponent letter.
1215 ptr++; // Skip the '+' or '-'.
1216 if (Code::IsDecimalDigit(*ptr))
1217 while (Code::IsDecimalDigit(*++ptr));
1218 else
1219 {
1220 tmp = (*ptr == U_d || *ptr == U_D || *ptr == U_f || *ptr == U_F)
1221 ? ptr : ptr - 1;
1222 lex -> ReportMessage(StreamError::INVALID_FLOATING_EXPONENT,
1223 current_token -> Location(),
1224 tmp - lex -> InputBuffer());
1225 }
1226 }
1227
1228 //
1229 // A numeric constant may be suffixed by a letter that further qualifies
1230 // what kind of a constant it is. We check for these suffixes here.
1231 //
1232 int len;
1233 if (*ptr == U_f || *ptr == U_F)
1234 {
1235 len = ++ptr - cursor;
1236 current_token ->
1237 SetSymbol(control.float_table.FindOrInsertLiteral(cursor, len));
1238 current_token -> SetKind(TK_FloatLiteral);
1239 }
1240 else if (*ptr == U_d || *ptr == U_D)
1241 {
1242 len = ++ptr - cursor;
1243 current_token ->
1244 SetSymbol(control.double_table.FindOrInsertLiteral(cursor, len));
1245 current_token -> SetKind(TK_DoubleLiteral);
1246 }
1247 else if (current_token -> Kind() == TK_IntegerLiteral)
1248 {
1249 if (*ptr == U_l || *ptr == U_L)
1250 {
1251 if (*ptr == U_l && control.option.pedantic)
1252 {
1253 lex -> ReportMessage(StreamError::FAVOR_CAPITAL_L_SUFFIX,
1254 current_token -> Location(),
1255 ptr - lex -> InputBuffer());
1256 }
1257
1258 len = ++ptr - cursor;
1259 current_token ->
1260 SetSymbol(control.long_table.FindOrInsertLiteral(cursor, len));
1261 current_token -> SetKind(TK_LongLiteral);
1262 }
1263 else
1264 {
1265 len = ptr - cursor;
1266 current_token ->
1267 SetSymbol(control.int_table.FindOrInsertLiteral(cursor, len));
1268 }
1269 }
1270 else
1271 {
1272 assert(current_token -> Kind() == TK_DoubleLiteral);
1273 len = ptr - cursor;
1274 current_token ->
1275 SetSymbol(control.double_table.FindOrInsertLiteral(cursor, len));
1276 }
1277 cursor = ptr;
1278 }
1279
1280
ClassifyColon()1281 void Scanner::ClassifyColon()
1282 {
1283 current_token -> SetKind(TK_COLON);
1284 cursor++;
1285 }
1286
1287
ClassifyPlus()1288 void Scanner::ClassifyPlus()
1289 {
1290 cursor++;
1291 if (*cursor == U_PLUS)
1292 {
1293 cursor++;
1294 current_token -> SetKind(TK_PLUS_PLUS);
1295 }
1296 else if (*cursor == U_EQUAL)
1297 {
1298 cursor++;
1299 current_token -> SetKind(TK_PLUS_EQUAL);
1300 }
1301 else current_token -> SetKind(TK_PLUS);
1302 }
1303
1304
ClassifyMinus()1305 void Scanner::ClassifyMinus()
1306 {
1307 cursor++;
1308 if (*cursor == U_MINUS)
1309 {
1310 cursor++;
1311 current_token -> SetKind(TK_MINUS_MINUS);
1312 }
1313 else if (*cursor == U_EQUAL)
1314 {
1315 cursor++;
1316 current_token -> SetKind(TK_MINUS_EQUAL);
1317 }
1318 else current_token -> SetKind(TK_MINUS);
1319 }
1320
1321
ClassifyStar()1322 void Scanner::ClassifyStar()
1323 {
1324 cursor++;
1325 if (*cursor == U_EQUAL)
1326 {
1327 cursor++;
1328 current_token -> SetKind(TK_MULTIPLY_EQUAL);
1329 }
1330 else current_token -> SetKind(TK_MULTIPLY);
1331 }
1332
1333
ClassifySlash()1334 void Scanner::ClassifySlash()
1335 {
1336 cursor++;
1337 if (*cursor == U_EQUAL)
1338 {
1339 cursor++;
1340 current_token -> SetKind(TK_DIVIDE_EQUAL);
1341 }
1342 else if (*cursor == U_SLASH)
1343 ScanSlashComment();
1344 else if (*cursor == U_STAR)
1345 ScanStarComment();
1346 else current_token -> SetKind(TK_DIVIDE);
1347 }
1348
1349
ClassifyLess()1350 void Scanner::ClassifyLess()
1351 {
1352 cursor++;
1353 if (*cursor == U_EQUAL)
1354 {
1355 cursor++;
1356 current_token -> SetKind(TK_LESS_EQUAL);
1357 }
1358 else if (*cursor == U_LESS)
1359 {
1360 cursor++;
1361 if (*cursor == U_EQUAL)
1362 {
1363 cursor++;
1364 current_token -> SetKind(TK_LEFT_SHIFT_EQUAL);
1365 }
1366 else current_token -> SetKind(TK_LEFT_SHIFT);
1367 }
1368 else current_token -> SetKind(TK_LESS);
1369 }
1370
1371
ClassifyGreater()1372 void Scanner::ClassifyGreater()
1373 {
1374 cursor++;
1375 current_token -> SetKind(TK_GREATER);
1376 if (*cursor == U_EQUAL)
1377 {
1378 cursor++;
1379 current_token -> SetKind(TK_GREATER_EQUAL);
1380 }
1381 else if (*cursor == U_GREATER)
1382 {
1383 cursor++;
1384 if (*cursor == U_EQUAL)
1385 {
1386 cursor++;
1387 current_token -> SetKind(TK_RIGHT_SHIFT_EQUAL);
1388 }
1389 else if (*cursor == U_GREATER)
1390 {
1391 cursor++;
1392 if (*cursor == U_EQUAL)
1393 {
1394 cursor++;
1395 current_token -> SetKind(TK_UNSIGNED_RIGHT_SHIFT_EQUAL);
1396 }
1397 else current_token -> SetKind(TK_UNSIGNED_RIGHT_SHIFT);
1398 }
1399 else current_token -> SetKind(TK_RIGHT_SHIFT);
1400 }
1401 }
1402
1403
ClassifyAnd()1404 void Scanner::ClassifyAnd()
1405 {
1406 cursor++;
1407 if (*cursor == U_AMPERSAND)
1408 {
1409 cursor++;
1410 current_token -> SetKind(TK_AND_AND);
1411 }
1412 else if (*cursor == U_EQUAL)
1413 {
1414 cursor++;
1415 current_token -> SetKind(TK_AND_EQUAL);
1416 }
1417 else current_token -> SetKind(TK_AND);
1418 }
1419
1420
ClassifyOr()1421 void Scanner::ClassifyOr()
1422 {
1423 cursor++;
1424 if (*cursor == U_BAR)
1425 {
1426 cursor++;
1427 current_token -> SetKind(TK_OR_OR);
1428 }
1429 else if (*cursor == U_EQUAL)
1430 {
1431 cursor++;
1432 current_token -> SetKind(TK_OR_EQUAL);
1433 }
1434 else current_token -> SetKind(TK_OR);
1435 }
1436
1437
ClassifyXor()1438 void Scanner::ClassifyXor()
1439 {
1440 cursor++;
1441 if (*cursor == U_EQUAL)
1442 {
1443 cursor++;
1444 current_token -> SetKind(TK_XOR_EQUAL);
1445 }
1446 else current_token -> SetKind(TK_XOR);
1447 }
1448
1449
ClassifyNot()1450 void Scanner::ClassifyNot()
1451 {
1452 cursor++;
1453 if (*cursor == U_EQUAL)
1454 {
1455 cursor++;
1456 current_token -> SetKind(TK_NOT_EQUAL);
1457 }
1458 else current_token -> SetKind(TK_NOT);
1459 }
1460
1461
ClassifyEqual()1462 void Scanner::ClassifyEqual()
1463 {
1464 cursor++;
1465 if (*cursor == U_EQUAL)
1466 {
1467 cursor++;
1468 current_token -> SetKind(TK_EQUAL_EQUAL);
1469 }
1470 else current_token -> SetKind(TK_EQUAL);
1471 }
1472
1473
ClassifyMod()1474 void Scanner::ClassifyMod()
1475 {
1476 cursor++;
1477 if (*cursor == U_EQUAL)
1478 {
1479 cursor++;
1480 current_token -> SetKind(TK_REMAINDER_EQUAL);
1481 }
1482 else current_token -> SetKind(TK_REMAINDER);
1483 }
1484
1485
ClassifyPeriod()1486 void Scanner::ClassifyPeriod()
1487 {
1488 if (Code::IsDecimalDigit(cursor[1])) // Is '.' followed by digit?
1489 ClassifyNumericLiteral();
1490 else if (cursor[1] == U_DOT && cursor[2] == U_DOT)
1491 {
1492 // Added for Java 1.5, varargs, by JSR 201.
1493 current_token -> SetKind(TK_ELLIPSIS);
1494 cursor += 3;
1495 }
1496 else
1497 {
1498 current_token -> SetKind(TK_DOT);
1499 cursor++;
1500 }
1501 }
1502
1503
ClassifySemicolon()1504 void Scanner::ClassifySemicolon()
1505 {
1506 current_token -> SetKind(TK_SEMICOLON);
1507 cursor++;
1508 }
1509
1510
ClassifyComma()1511 void Scanner::ClassifyComma()
1512 {
1513 current_token -> SetKind(TK_COMMA);
1514 cursor++;
1515 }
1516
1517
ClassifyLbrace()1518 void Scanner::ClassifyLbrace()
1519 {
1520 //
1521 // Instead of setting the symbol for a left brace, we keep track of it.
1522 // When we encounter its matching right brace, we use the symbol field
1523 // to identify its counterpart.
1524 //
1525 brace_stack.Push(current_token_index);
1526 current_token -> SetKind(TK_LBRACE);
1527 cursor++;
1528 }
1529
1530
ClassifyRbrace()1531 void Scanner::ClassifyRbrace()
1532 {
1533 //
1534 // When a left brace in encountered, it is pushed into the brace_stack.
1535 // When its matching right brace in encountered, we pop the left brace
1536 // and make it point to its matching right brace.
1537 //
1538 TokenIndex left_brace = brace_stack.Top();
1539 if (left_brace) // This right brace is matched by a left one
1540 {
1541 lex -> token_stream[left_brace].SetRightBrace(current_token_index);
1542 brace_stack.Pop();
1543 }
1544 current_token -> SetKind(TK_RBRACE);
1545 cursor++;
1546 }
1547
1548
ClassifyLparen()1549 void Scanner::ClassifyLparen()
1550 {
1551 current_token -> SetKind(TK_LPAREN);
1552 cursor++;
1553 }
1554
1555
ClassifyRparen()1556 void Scanner::ClassifyRparen()
1557 {
1558 current_token -> SetKind(TK_RPAREN);
1559 cursor++;
1560 }
1561
1562
ClassifyLbracket()1563 void Scanner::ClassifyLbracket()
1564 {
1565 current_token -> SetKind(TK_LBRACKET);
1566 cursor++;
1567 }
1568
1569
ClassifyRbracket()1570 void Scanner::ClassifyRbracket()
1571 {
1572 current_token -> SetKind(TK_RBRACKET);
1573 cursor++;
1574 }
1575
1576
ClassifyComplement()1577 void Scanner::ClassifyComplement()
1578 {
1579 current_token -> SetKind(TK_TWIDDLE);
1580 cursor++;
1581 }
1582
1583
ClassifyAt()1584 void Scanner::ClassifyAt()
1585 {
1586 // Added for Java 1.5, attributes, by JSR 175.
1587 current_token -> SetKind(TK_AT);
1588 cursor++;
1589 }
1590
1591
ClassifyQuestion()1592 void Scanner::ClassifyQuestion()
1593 {
1594 current_token -> SetKind(TK_QUESTION);
1595 cursor++;
1596 }
1597
1598
ClassifyNonAsciiUnicode()1599 void Scanner::ClassifyNonAsciiUnicode()
1600 {
1601 if (Code::IsAlpha(cursor)) // Some kind of non-ascii unicode letter
1602 ClassifyId();
1603 else ClassifyBadToken();
1604 }
1605
1606
1607 //
1608 // Anything that doesn't fit above. Note that the lex stream already stripped
1609 // any concluding ctrl-z, so we don't need to worry about seeing that as a
1610 // bad token. For fewer error messages, we scan until the next valid
1611 // character, issue the error message, then treat this token as whitespace.
1612 //
ClassifyBadToken()1613 void Scanner::ClassifyBadToken()
1614 {
1615 while (++cursor < input_buffer_tail)
1616 {
1617 if ((*cursor < 128 &&
1618 classify_token[*cursor] != &Scanner::ClassifyBadToken) ||
1619 Code::IsAlpha(cursor))
1620 {
1621 break;
1622 }
1623 }
1624 current_token -> SetKind(0);
1625 lex -> ReportMessage(StreamError::BAD_TOKEN, current_token -> Location(),
1626 cursor - lex -> InputBuffer() - 1);
1627 }
1628
1629 #ifdef HAVE_JIKES_NAMESPACE
1630 } // Close namespace Jikes block
1631 #endif
1632