1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
21
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
25
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded
28 characters and returns a token for everything found but comments,
29 white spaces and line terminators. When necessary, it also fills
30 the java_lval (yylval) union. It's implemented to be called by a
31 re-entrant parser generated by Bison.
32
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
36
37 #include "keyword.h"
38 #include "flags.h"
39 #include "chartables.h"
40
41 /* Function declarations. */
42 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
43 static void java_unicode_2_utf8 PARAMS ((unicode_t));
44 static void java_lex_error PARAMS ((const char *, int));
45 #ifndef JC1_LITE
46 static int java_is_eol PARAMS ((FILE *, int));
47 static tree build_wfl_node PARAMS ((tree));
48 #endif
49 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
50 static int java_parse_escape_sequence PARAMS ((void));
51 static int java_start_char_p PARAMS ((unicode_t));
52 static int java_part_char_p PARAMS ((unicode_t));
53 static int java_parse_doc_section PARAMS ((int));
54 static void java_parse_end_comment PARAMS ((int));
55 static int java_get_unicode PARAMS ((void));
56 static int java_read_unicode PARAMS ((java_lexer *, int *));
57 static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
58 int *));
59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60 static int java_read_char PARAMS ((java_lexer *));
61 static void java_allocate_new_line PARAMS ((void));
62 static void java_unget_unicode PARAMS ((void));
63 static unicode_t java_sneak_unicode PARAMS ((void));
64 #ifndef JC1_LITE
65 static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
66 #endif
67
68 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
69 #ifndef JC1_LITE
70 static void error_if_numeric_overflow PARAMS ((tree));
71 #endif
72
73 #ifdef HAVE_ICONV
74 /* This is nonzero if we have initialized `need_byteswap'. */
75 static int byteswap_init = 0;
76
77 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
78 big-endian order -- not native endian order. We handle this by
79 doing a conversion once at startup and seeing what happens. This
80 flag holds the results of this determination. */
81 static int need_byteswap = 0;
82 #endif
83
84 void
java_init_lex(finput,encoding)85 java_init_lex (finput, encoding)
86 FILE *finput;
87 const char *encoding;
88 {
89 #ifndef JC1_LITE
90 int java_lang_imported = 0;
91
92 if (!java_lang_id)
93 java_lang_id = get_identifier ("java.lang");
94 if (!inst_id)
95 inst_id = get_identifier ("inst$");
96 if (!wpv_id)
97 wpv_id = get_identifier ("write_parm_value$");
98
99 if (!java_lang_imported)
100 {
101 tree node = build_tree_list
102 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
103 read_import_dir (TREE_PURPOSE (node));
104 TREE_CHAIN (node) = ctxp->import_demand_list;
105 ctxp->import_demand_list = node;
106 java_lang_imported = 1;
107 }
108
109 if (!wfl_operator)
110 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
111 if (!label_id)
112 label_id = get_identifier ("$L");
113 if (!wfl_append)
114 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
115 if (!wfl_string_buffer)
116 wfl_string_buffer =
117 build_expr_wfl (get_identifier (flag_emit_class_files
118 ? "java.lang.StringBuffer"
119 : "gnu.gcj.runtime.StringBuffer"),
120 NULL, 0, 0);
121 if (!wfl_to_string)
122 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
123
124 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
125 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
126
127 memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
128 memset (current_jcf, 0, sizeof (JCF));
129 ctxp->current_parsed_class = NULL;
130 ctxp->package = NULL_TREE;
131 #endif
132
133 ctxp->filename = input_filename;
134 ctxp->lineno = lineno = 0;
135 ctxp->p_line = NULL;
136 ctxp->c_line = NULL;
137 ctxp->java_error_flag = 0;
138 ctxp->lexer = java_new_lexer (finput, encoding);
139 }
140
141 static char *
java_sprint_unicode(line,i)142 java_sprint_unicode (line, i)
143 struct java_line *line;
144 int i;
145 {
146 static char buffer [10];
147 if (line->unicode_escape_p [i] || line->line [i] > 128)
148 sprintf (buffer, "\\u%04x", line->line [i]);
149 else
150 {
151 buffer [0] = line->line [i];
152 buffer [1] = '\0';
153 }
154 return buffer;
155 }
156
157 static unicode_t
java_sneak_unicode()158 java_sneak_unicode ()
159 {
160 return (ctxp->c_line->line [ctxp->c_line->current]);
161 }
162
163 static void
java_unget_unicode()164 java_unget_unicode ()
165 {
166 if (!ctxp->c_line->current)
167 /* Can't unget unicode. */
168 abort ();
169
170 ctxp->c_line->current--;
171 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
172 }
173
174 static void
java_allocate_new_line()175 java_allocate_new_line ()
176 {
177 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
178 char ahead_escape_p = (ctxp->c_line ?
179 ctxp->c_line->unicode_escape_ahead_p : 0);
180
181 if (ctxp->c_line && !ctxp->c_line->white_space_only)
182 {
183 if (ctxp->p_line)
184 {
185 free (ctxp->p_line->unicode_escape_p);
186 free (ctxp->p_line->line);
187 free (ctxp->p_line);
188 }
189 ctxp->p_line = ctxp->c_line;
190 ctxp->c_line = NULL; /* Reallocated. */
191 }
192
193 if (!ctxp->c_line)
194 {
195 ctxp->c_line = xmalloc (sizeof (struct java_line));
196 ctxp->c_line->max = JAVA_LINE_MAX;
197 ctxp->c_line->line = xmalloc (sizeof (unicode_t)*ctxp->c_line->max);
198 ctxp->c_line->unicode_escape_p =
199 xmalloc (sizeof (char)*ctxp->c_line->max);
200 ctxp->c_line->white_space_only = 0;
201 }
202
203 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
204 ctxp->c_line->char_col = ctxp->c_line->current = 0;
205 if (ahead)
206 {
207 ctxp->c_line->line [ctxp->c_line->size] = ahead;
208 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
209 ctxp->c_line->size++;
210 }
211 ctxp->c_line->ahead [0] = 0;
212 ctxp->c_line->unicode_escape_ahead_p = 0;
213 ctxp->c_line->lineno = ++lineno;
214 ctxp->c_line->white_space_only = 1;
215 }
216
217 /* Create a new lexer object. */
218
219 java_lexer *
java_new_lexer(finput,encoding)220 java_new_lexer (finput, encoding)
221 FILE *finput;
222 const char *encoding;
223 {
224 java_lexer *lex = xmalloc (sizeof (java_lexer));
225 int enc_error = 0;
226
227 lex->finput = finput;
228 lex->bs_count = 0;
229 lex->unget_value = 0;
230 lex->hit_eof = 0;
231
232 #ifdef HAVE_ICONV
233 lex->handle = iconv_open ("UCS-2", encoding);
234 if (lex->handle != (iconv_t) -1)
235 {
236 lex->first = -1;
237 lex->last = -1;
238 lex->out_first = -1;
239 lex->out_last = -1;
240 lex->read_anything = 0;
241 lex->use_fallback = 0;
242
243 /* Work around broken iconv() implementations by doing checking at
244 runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
245 then all UCS-2 encoders will be broken. Perhaps not a valid
246 assumption. */
247 if (! byteswap_init)
248 {
249 iconv_t handle;
250
251 byteswap_init = 1;
252
253 handle = iconv_open ("UCS-2", "UTF-8");
254 if (handle != (iconv_t) -1)
255 {
256 unicode_t result;
257 unsigned char in[3];
258 char *inp, *outp;
259 size_t inc, outc, r;
260
261 /* This is the UTF-8 encoding of \ufeff. */
262 in[0] = 0xef;
263 in[1] = 0xbb;
264 in[2] = 0xbf;
265
266 inp = in;
267 inc = 3;
268 outp = (char *) &result;
269 outc = 2;
270
271 r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
272 &outp, &outc);
273 iconv_close (handle);
274 /* Conversion must be complete for us to use the result. */
275 if (r != (size_t) -1 && inc == 0 && outc == 0)
276 need_byteswap = (result != 0xfeff);
277 }
278 }
279
280 lex->byte_swap = need_byteswap;
281 }
282 else
283 #endif /* HAVE_ICONV */
284 {
285 /* If iconv failed, use the internal decoder if the default
286 encoding was requested. This code is used on platforms where
287 iconv exists but is insufficient for our needs. For
288 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
289
290 On Solaris the default encoding, as returned by nl_langinfo(),
291 is `646' (aka ASCII), but the Solaris iconv_open() doesn't
292 understand that. We work around that by pretending
293 `646' to be the same as UTF-8. */
294 if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
295 enc_error = 1;
296 #ifdef HAVE_ICONV
297 else
298 lex->use_fallback = 1;
299 #endif /* HAVE_ICONV */
300 }
301
302 if (enc_error)
303 fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
304
305 return lex;
306 }
307
308 void
java_destroy_lexer(lex)309 java_destroy_lexer (lex)
310 java_lexer *lex;
311 {
312 #ifdef HAVE_ICONV
313 if (! lex->use_fallback)
314 iconv_close (lex->handle);
315 #endif
316 free (lex);
317 }
318
319 static int
java_read_char(lex)320 java_read_char (lex)
321 java_lexer *lex;
322 {
323 if (lex->unget_value)
324 {
325 unicode_t r = lex->unget_value;
326 lex->unget_value = 0;
327 return r;
328 }
329
330 #ifdef HAVE_ICONV
331 if (! lex->use_fallback)
332 {
333 size_t ir, inbytesleft, in_save, out_count, out_save;
334 char *inp, *outp;
335 unicode_t result;
336
337 /* If there is data which has already been converted, use it. */
338 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
339 {
340 lex->out_first = 0;
341 lex->out_last = 0;
342
343 while (1)
344 {
345 /* See if we need to read more data. If FIRST == 0 then
346 the previous conversion attempt ended in the middle of
347 a character at the end of the buffer. Otherwise we
348 only have to read if the buffer is empty. */
349 if (lex->first == 0 || lex->first >= lex->last)
350 {
351 int r;
352
353 if (lex->first >= lex->last)
354 {
355 lex->first = 0;
356 lex->last = 0;
357 }
358 if (feof (lex->finput))
359 return UEOF;
360 r = fread (&lex->buffer[lex->last], 1,
361 sizeof (lex->buffer) - lex->last,
362 lex->finput);
363 lex->last += r;
364 }
365
366 inbytesleft = lex->last - lex->first;
367 out_count = sizeof (lex->out_buffer) - lex->out_last;
368
369 if (inbytesleft == 0)
370 {
371 /* We've tried to read and there is nothing left. */
372 return UEOF;
373 }
374
375 in_save = inbytesleft;
376 out_save = out_count;
377 inp = &lex->buffer[lex->first];
378 outp = &lex->out_buffer[lex->out_last];
379 ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
380 &inbytesleft, &outp, &out_count);
381
382 /* If we haven't read any bytes, then look to see if we
383 have read a BOM. */
384 if (! lex->read_anything && out_save - out_count >= 2)
385 {
386 unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
387 if (uc == 0xfeff)
388 {
389 lex->byte_swap = 0;
390 lex->out_first += 2;
391 }
392 else if (uc == 0xfffe)
393 {
394 lex->byte_swap = 1;
395 lex->out_first += 2;
396 }
397 lex->read_anything = 1;
398 }
399
400 if (lex->byte_swap)
401 {
402 unsigned int i;
403 for (i = 0; i < out_save - out_count; i += 2)
404 {
405 char t = lex->out_buffer[lex->out_last + i];
406 lex->out_buffer[lex->out_last + i]
407 = lex->out_buffer[lex->out_last + i + 1];
408 lex->out_buffer[lex->out_last + i + 1] = t;
409 }
410 }
411
412 lex->first += in_save - inbytesleft;
413 lex->out_last += out_save - out_count;
414
415 /* If we converted anything at all, move along. */
416 if (out_count != out_save)
417 break;
418
419 if (ir == (size_t) -1)
420 {
421 if (errno == EINVAL)
422 {
423 /* This is ok. This means that the end of our buffer
424 is in the middle of a character sequence. We just
425 move the valid part of the buffer to the beginning
426 to force a read. */
427 memmove (&lex->buffer[0], &lex->buffer[lex->first],
428 lex->last - lex->first);
429 lex->last -= lex->first;
430 lex->first = 0;
431 }
432 else
433 {
434 /* A more serious error. */
435 java_lex_error ("unrecognized character in input stream",
436 0);
437 return UEOF;
438 }
439 }
440 }
441 }
442
443 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
444 {
445 /* Don't have any data. */
446 return UEOF;
447 }
448
449 /* Success. */
450 result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
451 lex->out_first += 2;
452 return result;
453 }
454 else
455 #endif /* HAVE_ICONV */
456 {
457 int c, c1, c2;
458 c = getc (lex->finput);
459
460 if (c == EOF)
461 return UEOF;
462 if (c < 128)
463 return (unicode_t) c;
464 else
465 {
466 if ((c & 0xe0) == 0xc0)
467 {
468 c1 = getc (lex->finput);
469 if ((c1 & 0xc0) == 0x80)
470 {
471 unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
472 /* Check for valid 2-byte characters. We explicitly
473 allow \0 because this encoding is common in the
474 Java world. */
475 if (r == 0 || (r >= 0x80 && r <= 0x7ff))
476 return r;
477 }
478 }
479 else if ((c & 0xf0) == 0xe0)
480 {
481 c1 = getc (lex->finput);
482 if ((c1 & 0xc0) == 0x80)
483 {
484 c2 = getc (lex->finput);
485 if ((c2 & 0xc0) == 0x80)
486 {
487 unicode_t r = (unicode_t)(((c & 0xf) << 12) +
488 (( c1 & 0x3f) << 6)
489 + (c2 & 0x3f));
490 /* Check for valid 3-byte characters.
491 Don't allow surrogate, \ufffe or \uffff. */
492 if (IN_RANGE (r, 0x800, 0xffff)
493 && ! IN_RANGE (r, 0xd800, 0xdfff)
494 && r != 0xfffe && r != 0xffff)
495 return r;
496 }
497 }
498 }
499
500 /* We simply don't support invalid characters. We also
501 don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
502 cannot be valid Java characters. */
503 java_lex_error ("malformed UTF-8 character", 0);
504 }
505 }
506
507 /* We only get here on error. */
508 return UEOF;
509 }
510
511 static void
java_store_unicode(l,c,unicode_escape_p)512 java_store_unicode (l, c, unicode_escape_p)
513 struct java_line *l;
514 unicode_t c;
515 int unicode_escape_p;
516 {
517 if (l->size == l->max)
518 {
519 l->max += JAVA_LINE_MAX;
520 l->line = xrealloc (l->line, sizeof (unicode_t)*l->max);
521 l->unicode_escape_p = xrealloc (l->unicode_escape_p,
522 sizeof (char)*l->max);
523 }
524 l->line [l->size] = c;
525 l->unicode_escape_p [l->size++] = unicode_escape_p;
526 }
527
528 static int
java_read_unicode(lex,unicode_escape_p)529 java_read_unicode (lex, unicode_escape_p)
530 java_lexer *lex;
531 int *unicode_escape_p;
532 {
533 int c;
534
535 c = java_read_char (lex);
536 *unicode_escape_p = 0;
537
538 if (c != '\\')
539 {
540 lex->bs_count = 0;
541 return c;
542 }
543
544 ++lex->bs_count;
545 if ((lex->bs_count) % 2 == 1)
546 {
547 /* Odd number of \ seen. */
548 c = java_read_char (lex);
549 if (c == 'u')
550 {
551 unicode_t unicode = 0;
552 int shift = 12;
553
554 /* Recognize any number of `u's in \u. */
555 while ((c = java_read_char (lex)) == 'u')
556 ;
557
558 shift = 12;
559 do
560 {
561 if (c == UEOF)
562 {
563 java_lex_error ("prematurely terminated \\u sequence", 0);
564 return UEOF;
565 }
566
567 if (hex_p (c))
568 unicode |= (unicode_t)(hex_value (c) << shift);
569 else
570 {
571 java_lex_error ("non-hex digit in \\u sequence", 0);
572 break;
573 }
574
575 c = java_read_char (lex);
576 shift -= 4;
577 }
578 while (shift >= 0);
579
580 if (c != UEOF)
581 lex->unget_value = c;
582
583 lex->bs_count = 0;
584 *unicode_escape_p = 1;
585 return unicode;
586 }
587 lex->unget_value = c;
588 }
589 return (unicode_t) '\\';
590 }
591
592 static int
java_read_unicode_collapsing_terminators(lex,unicode_escape_p)593 java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
594 java_lexer *lex;
595 int *unicode_escape_p;
596 {
597 int c = java_read_unicode (lex, unicode_escape_p);
598
599 if (c == '\r')
600 {
601 /* We have to read ahead to see if we got \r\n. In that case we
602 return a single line terminator. */
603 int dummy;
604 c = java_read_unicode (lex, &dummy);
605 if (c != '\n' && c != UEOF)
606 lex->unget_value = c;
607 /* In either case we must return a newline. */
608 c = '\n';
609 }
610
611 return c;
612 }
613
614 static int
java_get_unicode()615 java_get_unicode ()
616 {
617 /* It's time to read a line when... */
618 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
619 {
620 int c;
621 int found_chars = 0;
622
623 if (ctxp->lexer->hit_eof)
624 return UEOF;
625
626 java_allocate_new_line ();
627 if (ctxp->c_line->line[0] != '\n')
628 {
629 for (;;)
630 {
631 int unicode_escape_p;
632 c = java_read_unicode_collapsing_terminators (ctxp->lexer,
633 &unicode_escape_p);
634 if (c != UEOF)
635 {
636 found_chars = 1;
637 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
638 if (ctxp->c_line->white_space_only
639 && !JAVA_WHITE_SPACE_P (c)
640 && c != '\n')
641 ctxp->c_line->white_space_only = 0;
642 }
643 if ((c == '\n') || (c == UEOF))
644 break;
645 }
646
647 if (c == UEOF && ! found_chars)
648 {
649 ctxp->lexer->hit_eof = 1;
650 return UEOF;
651 }
652 }
653 }
654 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
655 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
656 return ctxp->c_line->line [ctxp->c_line->current++];
657 }
658
659 /* Parse the end of a C style comment.
660 * C is the first character following the '/' and '*'. */
661 static void
java_parse_end_comment(c)662 java_parse_end_comment (c)
663 int c;
664 {
665 for ( ;; c = java_get_unicode ())
666 {
667 switch (c)
668 {
669 case UEOF:
670 java_lex_error ("Comment not terminated at end of input", 0);
671 return;
672 case '*':
673 switch (c = java_get_unicode ())
674 {
675 case UEOF:
676 java_lex_error ("Comment not terminated at end of input", 0);
677 return;
678 case '/':
679 return;
680 case '*': /* Reparse only '*'. */
681 java_unget_unicode ();
682 }
683 }
684 }
685 }
686
687 /* Parse the documentation section. Keywords must be at the beginning
688 of a documentation comment line (ignoring white space and any `*'
689 character). Parsed keyword(s): @DEPRECATED. */
690
691 static int
java_parse_doc_section(c)692 java_parse_doc_section (c)
693 int c;
694 {
695 int valid_tag = 0, seen_star = 0;
696
697 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
698 {
699 switch (c)
700 {
701 case '*':
702 seen_star = 1;
703 break;
704 case '\n': /* ULT */
705 valid_tag = 1;
706 default:
707 seen_star = 0;
708 }
709 c = java_get_unicode();
710 }
711
712 if (c == UEOF)
713 java_lex_error ("Comment not terminated at end of input", 0);
714
715 if (seen_star && (c == '/'))
716 return 1; /* Goto step1 in caller. */
717
718 /* We're parsing `@deprecated'. */
719 if (valid_tag && (c == '@'))
720 {
721 char tag [11];
722 int tag_index = 0;
723
724 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
725 {
726 c = java_get_unicode ();
727 tag [tag_index++] = c;
728 }
729
730 if (c == UEOF)
731 java_lex_error ("Comment not terminated at end of input", 0);
732 tag [tag_index] = '\0';
733
734 if (!strcmp (tag, "deprecated"))
735 ctxp->deprecated = 1;
736 }
737 java_unget_unicode ();
738 return 0;
739 }
740
741 /* Return true if C is a valid start character for a Java identifier.
742 This is only called if C >= 128 -- smaller values are handled
743 inline. However, this function handles all values anyway. */
744 static int
java_start_char_p(c)745 java_start_char_p (c)
746 unicode_t c;
747 {
748 unsigned int hi = c / 256;
749 const char *const page = type_table[hi];
750 unsigned long val = (unsigned long) page;
751 int flags;
752
753 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
754 flags = page[c & 255];
755 else
756 flags = val;
757
758 return flags & LETTER_START;
759 }
760
761 /* Return true if C is a valid part character for a Java identifier.
762 This is only called if C >= 128 -- smaller values are handled
763 inline. However, this function handles all values anyway. */
764 static int
java_part_char_p(c)765 java_part_char_p (c)
766 unicode_t c;
767 {
768 unsigned int hi = c / 256;
769 const char *const page = type_table[hi];
770 unsigned long val = (unsigned long) page;
771 int flags;
772
773 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
774 flags = page[c & 255];
775 else
776 flags = val;
777
778 return flags & LETTER_PART;
779 }
780
781 static int
java_parse_escape_sequence()782 java_parse_escape_sequence ()
783 {
784 unicode_t char_lit;
785 int c;
786
787 switch (c = java_get_unicode ())
788 {
789 case 'b':
790 return (unicode_t)0x8;
791 case 't':
792 return (unicode_t)0x9;
793 case 'n':
794 return (unicode_t)0xa;
795 case 'f':
796 return (unicode_t)0xc;
797 case 'r':
798 return (unicode_t)0xd;
799 case '"':
800 return (unicode_t)0x22;
801 case '\'':
802 return (unicode_t)0x27;
803 case '\\':
804 return (unicode_t)0x5c;
805 case '0': case '1': case '2': case '3': case '4':
806 case '5': case '6': case '7':
807 {
808 int octal_escape[3];
809 int octal_escape_index = 0;
810 int max = 3;
811 int i, shift;
812
813 for (; octal_escape_index < max && RANGE (c, '0', '7');
814 c = java_get_unicode ())
815 {
816 if (octal_escape_index == 0 && c > '3')
817 {
818 /* According to the grammar, `\477' has a well-defined
819 meaning -- it is `\47' followed by `7'. */
820 --max;
821 }
822 octal_escape [octal_escape_index++] = c;
823 }
824
825 java_unget_unicode ();
826
827 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
828 i < octal_escape_index; i++, shift -= 3)
829 char_lit |= (octal_escape [i] - '0') << shift;
830
831 return char_lit;
832 }
833 default:
834 java_lex_error ("Invalid character in escape sequence", 0);
835 return JAVA_CHAR_ERROR;
836 }
837 }
838
839 #ifndef JC1_LITE
840 #define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
841
842 /* Subroutine of java_lex: converts floating-point literals to tree
843 nodes. LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
844 store the result. FFLAG indicates whether the literal was tagged
845 with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
846 is the line number on which to report any error. */
847
848 static void java_perform_atof PARAMS ((YYSTYPE *, char *, int, int));
849
850 static void
java_perform_atof(java_lval,literal_token,fflag,number_beginning)851 java_perform_atof (java_lval, literal_token, fflag, number_beginning)
852 YYSTYPE *java_lval;
853 char *literal_token;
854 int fflag;
855 int number_beginning;
856 {
857 REAL_VALUE_TYPE value;
858 tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
859
860 SET_REAL_VALUE_ATOF (value,
861 REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
862
863 if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
864 {
865 JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
866 value = DCONST0;
867 }
868 else if (IS_ZERO (value))
869 {
870 /* We check to see if the value is really 0 or if we've found an
871 underflow. We do this in the most primitive imaginable way. */
872 int really_zero = 1;
873 char *p = literal_token;
874 if (*p == '-')
875 ++p;
876 while (*p && *p != 'e' && *p != 'E')
877 {
878 if (*p != '0' && *p != '.')
879 {
880 really_zero = 0;
881 break;
882 }
883 ++p;
884 }
885 if (! really_zero)
886 {
887 int i = ctxp->c_line->current;
888 ctxp->c_line->current = number_beginning;
889 java_lex_error ("Floating point literal underflow", 0);
890 ctxp->c_line->current = i;
891 }
892 }
893
894 SET_LVAL_NODE_TYPE (build_real (type, value), type);
895 }
896 #endif
897
898 static int yylex PARAMS ((YYSTYPE *));
899
900 static int
901 #ifdef JC1_LITE
yylex(java_lval)902 yylex (java_lval)
903 #else
904 java_lex (java_lval)
905 #endif
906 YYSTYPE *java_lval;
907 {
908 int c;
909 unicode_t first_unicode;
910 int ascii_index, all_ascii;
911 char *string;
912
913 /* Translation of the Unicode escape in the raw stream of Unicode
914 characters. Takes care of line terminator. */
915 step1:
916 /* Skip white spaces: SP, TAB and FF or ULT. */
917 for (c = java_get_unicode ();
918 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
919 if (c == '\n')
920 {
921 ctxp->elc.line = ctxp->c_line->lineno;
922 ctxp->elc.col = ctxp->c_line->char_col-2;
923 }
924
925 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
926
927 if (c == 0x1a) /* CTRL-Z. */
928 {
929 if ((c = java_get_unicode ()) == UEOF)
930 return 0; /* Ok here. */
931 else
932 java_unget_unicode (); /* Caught later, at the end of the
933 function. */
934 }
935 /* Handle EOF here. */
936 if (c == UEOF) /* Should probably do something here... */
937 return 0;
938
939 /* Take care of eventual comments. */
940 if (c == '/')
941 {
942 switch (c = java_get_unicode ())
943 {
944 case '/':
945 for (;;)
946 {
947 c = java_get_unicode ();
948 if (c == UEOF)
949 {
950 /* It is ok to end a `//' comment with EOF, unless
951 we're being pedantic. */
952 if (pedantic)
953 java_lex_error ("Comment not terminated at end of input",
954 0);
955 return 0;
956 }
957 if (c == '\n') /* ULT */
958 goto step1;
959 }
960 break;
961
962 case '*':
963 if ((c = java_get_unicode ()) == '*')
964 {
965 if ((c = java_get_unicode ()) == '/')
966 goto step1; /* Empty documentation comment. */
967 else if (java_parse_doc_section (c))
968 goto step1;
969 }
970
971 java_parse_end_comment ((c = java_get_unicode ()));
972 goto step1;
973 break;
974 default:
975 java_unget_unicode ();
976 c = '/';
977 break;
978 }
979 }
980
981 ctxp->elc.line = ctxp->c_line->lineno;
982 ctxp->elc.prev_col = ctxp->elc.col;
983 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
984 if (ctxp->elc.col < 0)
985 abort ();
986
987 /* Numeric literals. */
988 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
989 {
990 /* This section of code is borrowed from gcc/c-lex.c. */
991 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
992 int parts[TOTAL_PARTS];
993 HOST_WIDE_INT high, low;
994 /* End borrowed section. */
995 char literal_token [256];
996 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
997 int found_hex_digits = 0, found_non_octal_digits = 0;
998 int i;
999 #ifndef JC1_LITE
1000 int number_beginning = ctxp->c_line->current;
1001 tree value;
1002 #endif
1003
1004 /* We might have a . separator instead of a FP like .[0-9]*. */
1005 if (c == '.')
1006 {
1007 unicode_t peep = java_sneak_unicode ();
1008
1009 if (!JAVA_ASCII_DIGIT (peep))
1010 {
1011 JAVA_LEX_SEP('.');
1012 BUILD_OPERATOR (DOT_TK);
1013 }
1014 }
1015
1016 for (i = 0; i < TOTAL_PARTS; i++)
1017 parts [i] = 0;
1018
1019 if (c == '0')
1020 {
1021 c = java_get_unicode ();
1022 if (c == 'x' || c == 'X')
1023 {
1024 radix = 16;
1025 c = java_get_unicode ();
1026 }
1027 else if (JAVA_ASCII_DIGIT (c))
1028 radix = 8;
1029 else if (c == '.' || c == 'e' || c =='E')
1030 {
1031 /* Push the '.', 'e', or 'E' back and prepare for a FP
1032 parsing... */
1033 java_unget_unicode ();
1034 c = '0';
1035 }
1036 else
1037 {
1038 /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */
1039 JAVA_LEX_LIT ("0", 10);
1040 switch (c)
1041 {
1042 case 'L': case 'l':
1043 SET_LVAL_NODE (long_zero_node);
1044 return (INT_LIT_TK);
1045 case 'f': case 'F':
1046 SET_LVAL_NODE (float_zero_node);
1047 return (FP_LIT_TK);
1048 case 'd': case 'D':
1049 SET_LVAL_NODE (double_zero_node);
1050 return (FP_LIT_TK);
1051 default:
1052 java_unget_unicode ();
1053 SET_LVAL_NODE (integer_zero_node);
1054 return (INT_LIT_TK);
1055 }
1056 }
1057 }
1058 /* Parse the first part of the literal, until we find something
1059 which is not a number. */
1060 while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1061 JAVA_ASCII_DIGIT (c))
1062 {
1063 /* We store in a string (in case it turns out to be a FP) and in
1064 PARTS if we have to process a integer literal. */
1065 int numeric = hex_value (c);
1066 int count;
1067
1068 /* Remember when we find a valid hexadecimal digit. */
1069 if (radix == 16)
1070 found_hex_digits = 1;
1071 /* Remember when we find an invalid octal digit. */
1072 else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
1073 found_non_octal_digits = 1;
1074
1075 literal_token [literal_index++] = c;
1076 /* This section of code if borrowed from gcc/c-lex.c. */
1077 for (count = 0; count < TOTAL_PARTS; count++)
1078 {
1079 parts[count] *= radix;
1080 if (count)
1081 {
1082 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1083 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1084 }
1085 else
1086 parts[0] += numeric;
1087 }
1088 if (parts [TOTAL_PARTS-1] != 0)
1089 overflow = 1;
1090 /* End borrowed section. */
1091 c = java_get_unicode ();
1092 }
1093
1094 /* If we have something from the FP char set but not a digit, parse
1095 a FP literal. */
1096 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1097 {
1098 int stage = 0;
1099 int seen_digit = (literal_index ? 1 : 0);
1100 int seen_exponent = 0;
1101 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1102 double unless specified. */
1103
1104 /* It is ok if the radix is 8 because this just means we've
1105 seen a leading `0'. However, radix==16 is invalid. */
1106 if (radix == 16)
1107 java_lex_error ("Can't express non-decimal FP literal", 0);
1108 radix = 10;
1109
1110 for (;;)
1111 {
1112 if (c == '.')
1113 {
1114 if (stage < 1)
1115 {
1116 stage = 1;
1117 literal_token [literal_index++ ] = c;
1118 c = java_get_unicode ();
1119 }
1120 else
1121 java_lex_error ("Invalid character in FP literal", 0);
1122 }
1123
1124 if (c == 'e' || c == 'E')
1125 {
1126 if (stage < 2)
1127 {
1128 /* {E,e} must have seen at least a digit. */
1129 if (!seen_digit)
1130 java_lex_error
1131 ("Invalid FP literal, mantissa must have digit", 0);
1132 seen_digit = 0;
1133 seen_exponent = 1;
1134 stage = 2;
1135 literal_token [literal_index++] = c;
1136 c = java_get_unicode ();
1137 }
1138 else
1139 java_lex_error ("Invalid character in FP literal", 0);
1140 }
1141 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1142 {
1143 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1144 stage = 4; /* So we fall through. */
1145 }
1146
1147 if ((c=='-' || c =='+') && stage == 2)
1148 {
1149 stage = 3;
1150 literal_token [literal_index++] = c;
1151 c = java_get_unicode ();
1152 }
1153
1154 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1155 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1156 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1157 (stage == 3 && JAVA_ASCII_DIGIT (c)))
1158 {
1159 if (JAVA_ASCII_DIGIT (c))
1160 seen_digit = 1;
1161 if (stage == 2)
1162 stage = 3;
1163 literal_token [literal_index++ ] = c;
1164 c = java_get_unicode ();
1165 }
1166 else
1167 {
1168 if (stage != 4) /* Don't push back fF/dD. */
1169 java_unget_unicode ();
1170
1171 /* An exponent (if any) must have seen a digit. */
1172 if (seen_exponent && !seen_digit)
1173 java_lex_error
1174 ("Invalid FP literal, exponent must have digit", 0);
1175
1176 literal_token [literal_index] = '\0';
1177 JAVA_LEX_LIT (literal_token, radix);
1178
1179 #ifndef JC1_LITE
1180 java_perform_atof (java_lval, literal_token,
1181 fflag, number_beginning);
1182 #endif
1183 return FP_LIT_TK;
1184 }
1185 }
1186 } /* JAVA_ASCII_FPCHAR (c) */
1187
1188 /* Here we get back to converting the integral literal. */
1189 if (radix == 16 && ! found_hex_digits)
1190 java_lex_error
1191 ("0x must be followed by at least one hexadecimal digit", 0);
1192 else if (radix == 8 && found_non_octal_digits)
1193 java_lex_error ("Octal literal contains digit out of range", 0);
1194 else if (c == 'L' || c == 'l')
1195 long_suffix = 1;
1196 else
1197 java_unget_unicode ();
1198
1199 #ifdef JAVA_LEX_DEBUG
1200 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1201 JAVA_LEX_LIT (literal_token, radix);
1202 #endif
1203 /* This section of code is borrowed from gcc/c-lex.c. */
1204 if (!overflow)
1205 {
1206 bytes = GET_TYPE_PRECISION (long_type_node);
1207 for (i = bytes; i < TOTAL_PARTS; i++)
1208 if (parts [i])
1209 {
1210 overflow = 1;
1211 break;
1212 }
1213 }
1214 high = low = 0;
1215 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1216 {
1217 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1218 / HOST_BITS_PER_CHAR)]
1219 << (i * HOST_BITS_PER_CHAR));
1220 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1221 }
1222 /* End borrowed section. */
1223
1224 #ifndef JC1_LITE
1225 /* Range checking. */
1226 value = build_int_2 (low, high);
1227 /* Temporarily set type to unsigned. */
1228 SET_LVAL_NODE_TYPE (value, (long_suffix
1229 ? unsigned_long_type_node
1230 : unsigned_int_type_node));
1231
1232 /* For base 10 numbers, only values up to the highest value
1233 (plus one) can be written. For instance, only ints up to
1234 2147483648 can be written. The special case of the largest
1235 negative value is handled elsewhere. For other bases, any
1236 number can be represented. */
1237 if (overflow || (radix == 10
1238 && tree_int_cst_lt (long_suffix
1239 ? decimal_long_max
1240 : decimal_int_max,
1241 value)))
1242 {
1243 if (long_suffix)
1244 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1245 else
1246 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1247 }
1248
1249 /* Sign extend the value. */
1250 SET_LVAL_NODE_TYPE (value, (long_suffix ? long_type_node : int_type_node));
1251 force_fit_type (value, 0);
1252 JAVA_RADIX10_FLAG (value) = radix == 10;
1253 #else
1254 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1255 long_suffix ? long_type_node : int_type_node);
1256 #endif
1257 return INT_LIT_TK;
1258 }
1259
1260 /* Character literals. */
1261 if (c == '\'')
1262 {
1263 int char_lit;
1264 if ((c = java_get_unicode ()) == '\\')
1265 char_lit = java_parse_escape_sequence ();
1266 else
1267 {
1268 if (c == '\n' || c == '\'')
1269 java_lex_error ("Invalid character literal", 0);
1270 char_lit = c;
1271 }
1272
1273 c = java_get_unicode ();
1274
1275 if ((c == '\n') || (c == UEOF))
1276 java_lex_error ("Character literal not terminated at end of line", 0);
1277 if (c != '\'')
1278 java_lex_error ("Syntax error in character literal", 0);
1279
1280 if (char_lit == JAVA_CHAR_ERROR)
1281 char_lit = 0; /* We silently convert it to zero. */
1282
1283 JAVA_LEX_CHAR_LIT (char_lit);
1284 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1285 return CHAR_LIT_TK;
1286 }
1287
1288 /* String literals. */
1289 if (c == '"')
1290 {
1291 int no_error;
1292 char *string;
1293
1294 for (no_error = 1, c = java_get_unicode ();
1295 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1296 {
1297 if (c == '\\')
1298 c = java_parse_escape_sequence ();
1299 if (c == JAVA_CHAR_ERROR)
1300 {
1301 no_error = 0;
1302 c = 0; /* We silently convert it to zero. */
1303 }
1304 java_unicode_2_utf8 (c);
1305 }
1306 if (c == '\n' || c == UEOF) /* ULT. */
1307 {
1308 lineno--; /* Refer to the line where the terminator was seen. */
1309 java_lex_error ("String not terminated at end of line", 0);
1310 lineno++;
1311 }
1312
1313 obstack_1grow (&temporary_obstack, '\0');
1314 string = obstack_finish (&temporary_obstack);
1315 #ifndef JC1_LITE
1316 if (!no_error || (c != '"'))
1317 java_lval->node = error_mark_node; /* FIXME: Requires futher
1318 testing. */
1319 else
1320 java_lval->node = build_string (strlen (string), string);
1321 #endif
1322 obstack_free (&temporary_obstack, string);
1323 return STRING_LIT_TK;
1324 }
1325
1326 /* Separator. */
1327 switch (c)
1328 {
1329 case '(':
1330 JAVA_LEX_SEP (c);
1331 BUILD_OPERATOR (OP_TK);
1332 case ')':
1333 JAVA_LEX_SEP (c);
1334 return CP_TK;
1335 case '{':
1336 JAVA_LEX_SEP (c);
1337 if (ctxp->ccb_indent == 1)
1338 ctxp->first_ccb_indent1 = lineno;
1339 ctxp->ccb_indent++;
1340 BUILD_OPERATOR (OCB_TK);
1341 case '}':
1342 JAVA_LEX_SEP (c);
1343 ctxp->ccb_indent--;
1344 if (ctxp->ccb_indent == 1)
1345 ctxp->last_ccb_indent1 = lineno;
1346 BUILD_OPERATOR (CCB_TK);
1347 case '[':
1348 JAVA_LEX_SEP (c);
1349 BUILD_OPERATOR (OSB_TK);
1350 case ']':
1351 JAVA_LEX_SEP (c);
1352 return CSB_TK;
1353 case ';':
1354 JAVA_LEX_SEP (c);
1355 return SC_TK;
1356 case ',':
1357 JAVA_LEX_SEP (c);
1358 return C_TK;
1359 case '.':
1360 JAVA_LEX_SEP (c);
1361 BUILD_OPERATOR (DOT_TK);
1362 /* return DOT_TK; */
1363 }
1364
1365 /* Operators. */
1366 switch (c)
1367 {
1368 case '=':
1369 if ((c = java_get_unicode ()) == '=')
1370 {
1371 BUILD_OPERATOR (EQ_TK);
1372 }
1373 else
1374 {
1375 /* Equals is used in two different locations. In the
1376 variable_declarator: rule, it has to be seen as '=' as opposed
1377 to being seen as an ordinary assignment operator in
1378 assignment_operators: rule. */
1379 java_unget_unicode ();
1380 BUILD_OPERATOR (ASSIGN_TK);
1381 }
1382
1383 case '>':
1384 switch ((c = java_get_unicode ()))
1385 {
1386 case '=':
1387 BUILD_OPERATOR (GTE_TK);
1388 case '>':
1389 switch ((c = java_get_unicode ()))
1390 {
1391 case '>':
1392 if ((c = java_get_unicode ()) == '=')
1393 {
1394 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1395 }
1396 else
1397 {
1398 java_unget_unicode ();
1399 BUILD_OPERATOR (ZRS_TK);
1400 }
1401 case '=':
1402 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1403 default:
1404 java_unget_unicode ();
1405 BUILD_OPERATOR (SRS_TK);
1406 }
1407 default:
1408 java_unget_unicode ();
1409 BUILD_OPERATOR (GT_TK);
1410 }
1411
1412 case '<':
1413 switch ((c = java_get_unicode ()))
1414 {
1415 case '=':
1416 BUILD_OPERATOR (LTE_TK);
1417 case '<':
1418 if ((c = java_get_unicode ()) == '=')
1419 {
1420 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1421 }
1422 else
1423 {
1424 java_unget_unicode ();
1425 BUILD_OPERATOR (LS_TK);
1426 }
1427 default:
1428 java_unget_unicode ();
1429 BUILD_OPERATOR (LT_TK);
1430 }
1431
1432 case '&':
1433 switch ((c = java_get_unicode ()))
1434 {
1435 case '&':
1436 BUILD_OPERATOR (BOOL_AND_TK);
1437 case '=':
1438 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1439 default:
1440 java_unget_unicode ();
1441 BUILD_OPERATOR (AND_TK);
1442 }
1443
1444 case '|':
1445 switch ((c = java_get_unicode ()))
1446 {
1447 case '|':
1448 BUILD_OPERATOR (BOOL_OR_TK);
1449 case '=':
1450 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1451 default:
1452 java_unget_unicode ();
1453 BUILD_OPERATOR (OR_TK);
1454 }
1455
1456 case '+':
1457 switch ((c = java_get_unicode ()))
1458 {
1459 case '+':
1460 BUILD_OPERATOR (INCR_TK);
1461 case '=':
1462 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1463 default:
1464 java_unget_unicode ();
1465 BUILD_OPERATOR (PLUS_TK);
1466 }
1467
1468 case '-':
1469 switch ((c = java_get_unicode ()))
1470 {
1471 case '-':
1472 BUILD_OPERATOR (DECR_TK);
1473 case '=':
1474 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1475 default:
1476 java_unget_unicode ();
1477 BUILD_OPERATOR (MINUS_TK);
1478 }
1479
1480 case '*':
1481 if ((c = java_get_unicode ()) == '=')
1482 {
1483 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1484 }
1485 else
1486 {
1487 java_unget_unicode ();
1488 BUILD_OPERATOR (MULT_TK);
1489 }
1490
1491 case '/':
1492 if ((c = java_get_unicode ()) == '=')
1493 {
1494 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1495 }
1496 else
1497 {
1498 java_unget_unicode ();
1499 BUILD_OPERATOR (DIV_TK);
1500 }
1501
1502 case '^':
1503 if ((c = java_get_unicode ()) == '=')
1504 {
1505 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1506 }
1507 else
1508 {
1509 java_unget_unicode ();
1510 BUILD_OPERATOR (XOR_TK);
1511 }
1512
1513 case '%':
1514 if ((c = java_get_unicode ()) == '=')
1515 {
1516 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1517 }
1518 else
1519 {
1520 java_unget_unicode ();
1521 BUILD_OPERATOR (REM_TK);
1522 }
1523
1524 case '!':
1525 if ((c = java_get_unicode()) == '=')
1526 {
1527 BUILD_OPERATOR (NEQ_TK);
1528 }
1529 else
1530 {
1531 java_unget_unicode ();
1532 BUILD_OPERATOR (NEG_TK);
1533 }
1534
1535 case '?':
1536 JAVA_LEX_OP ("?");
1537 BUILD_OPERATOR (REL_QM_TK);
1538 case ':':
1539 JAVA_LEX_OP (":");
1540 BUILD_OPERATOR (REL_CL_TK);
1541 case '~':
1542 BUILD_OPERATOR (NOT_TK);
1543 }
1544
1545 /* Keyword, boolean literal or null literal. */
1546 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1547 c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1548 {
1549 java_unicode_2_utf8 (c);
1550 if (all_ascii && c >= 128)
1551 all_ascii = 0;
1552 ascii_index++;
1553 }
1554
1555 obstack_1grow (&temporary_obstack, '\0');
1556 string = obstack_finish (&temporary_obstack);
1557 if (c != UEOF)
1558 java_unget_unicode ();
1559
1560 /* If we have something all ascii, we consider a keyword, a boolean
1561 literal, a null literal or an all ASCII identifier. Otherwise,
1562 this is an identifier (possibly not respecting formation rule). */
1563 if (all_ascii)
1564 {
1565 const struct java_keyword *kw;
1566 if ((kw=java_keyword (string, ascii_index)))
1567 {
1568 JAVA_LEX_KW (string);
1569 switch (kw->token)
1570 {
1571 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1572 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1573 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1574 case PRIVATE_TK: case STRICT_TK:
1575 SET_MODIFIER_CTX (kw->token);
1576 return MODIFIER_TK;
1577 case FLOAT_TK:
1578 SET_LVAL_NODE (float_type_node);
1579 return FP_TK;
1580 case DOUBLE_TK:
1581 SET_LVAL_NODE (double_type_node);
1582 return FP_TK;
1583 case BOOLEAN_TK:
1584 SET_LVAL_NODE (boolean_type_node);
1585 return BOOLEAN_TK;
1586 case BYTE_TK:
1587 SET_LVAL_NODE (byte_type_node);
1588 return INTEGRAL_TK;
1589 case SHORT_TK:
1590 SET_LVAL_NODE (short_type_node);
1591 return INTEGRAL_TK;
1592 case INT_TK:
1593 SET_LVAL_NODE (int_type_node);
1594 return INTEGRAL_TK;
1595 case LONG_TK:
1596 SET_LVAL_NODE (long_type_node);
1597 return INTEGRAL_TK;
1598 case CHAR_TK:
1599 SET_LVAL_NODE (char_type_node);
1600 return INTEGRAL_TK;
1601
1602 /* Keyword based literals. */
1603 case TRUE_TK:
1604 case FALSE_TK:
1605 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1606 boolean_true_node : boolean_false_node));
1607 return BOOL_LIT_TK;
1608 case NULL_TK:
1609 SET_LVAL_NODE (null_pointer_node);
1610 return NULL_TK;
1611
1612 case ASSERT_TK:
1613 if (flag_assert)
1614 {
1615 BUILD_OPERATOR (kw->token);
1616 return kw->token;
1617 }
1618 else
1619 break;
1620
1621 /* Some keyword we want to retain information on the location
1622 they where found. */
1623 case CASE_TK:
1624 case DEFAULT_TK:
1625 case SUPER_TK:
1626 case THIS_TK:
1627 case RETURN_TK:
1628 case BREAK_TK:
1629 case CONTINUE_TK:
1630 case TRY_TK:
1631 case CATCH_TK:
1632 case THROW_TK:
1633 case INSTANCEOF_TK:
1634 BUILD_OPERATOR (kw->token);
1635
1636 default:
1637 return kw->token;
1638 }
1639 }
1640 }
1641
1642 /* We may have an ID here. */
1643 if (JAVA_START_CHAR_P (first_unicode))
1644 {
1645 JAVA_LEX_ID (string);
1646 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1647 return ID_TK;
1648 }
1649
1650 /* Everything else is an invalid character in the input. */
1651 {
1652 char lex_error_buffer [128];
1653 sprintf (lex_error_buffer, "Invalid character `%s' in input",
1654 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1655 java_lex_error (lex_error_buffer, 1);
1656 }
1657 return 0;
1658 }
1659
1660 #ifndef JC1_LITE
1661 /* This is called by the parser to see if an error should be generated
1662 due to numeric overflow. This function only handles the particular
1663 case of the largest negative value, and is only called in the case
1664 where this value is not preceded by `-'. */
1665 static void
error_if_numeric_overflow(value)1666 error_if_numeric_overflow (value)
1667 tree value;
1668 {
1669 if (TREE_CODE (value) == INTEGER_CST
1670 && JAVA_RADIX10_FLAG (value)
1671 && tree_int_cst_sgn (value) < 0)
1672 {
1673 if (TREE_TYPE (value) == long_type_node)
1674 java_lex_error ("Numeric overflow for `long' literal", 0);
1675 else
1676 java_lex_error ("Numeric overflow for `int' literal", 0);
1677 }
1678 }
1679 #endif /* JC1_LITE */
1680
1681 static void
java_unicode_2_utf8(unicode)1682 java_unicode_2_utf8 (unicode)
1683 unicode_t unicode;
1684 {
1685 if (RANGE (unicode, 0x01, 0x7f))
1686 obstack_1grow (&temporary_obstack, (char)unicode);
1687 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1688 {
1689 obstack_1grow (&temporary_obstack,
1690 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1691 obstack_1grow (&temporary_obstack,
1692 (unsigned char)(0x80 | (unicode & 0x3f)));
1693 }
1694 else /* Range 0x800-0xffff. */
1695 {
1696 obstack_1grow (&temporary_obstack,
1697 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1698 obstack_1grow (&temporary_obstack,
1699 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1700 obstack_1grow (&temporary_obstack,
1701 (unsigned char)(0x80 | (unicode & 0x003f)));
1702 }
1703 }
1704
1705 #ifndef JC1_LITE
1706 static tree
build_wfl_node(node)1707 build_wfl_node (node)
1708 tree node;
1709 {
1710 node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1711 /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1712 TREE_TYPE (node) = NULL_TREE;
1713 return node;
1714 }
1715 #endif
1716
1717 static void
java_lex_error(msg,forward)1718 java_lex_error (msg, forward)
1719 const char *msg ATTRIBUTE_UNUSED;
1720 int forward ATTRIBUTE_UNUSED;
1721 {
1722 #ifndef JC1_LITE
1723 ctxp->elc.line = ctxp->c_line->lineno;
1724 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1725
1726 /* Might be caught in the middle of some error report. */
1727 ctxp->java_error_flag = 0;
1728 java_error (NULL);
1729 java_error (msg);
1730 #endif
1731 }
1732
1733 #ifndef JC1_LITE
1734 static int
java_is_eol(fp,c)1735 java_is_eol (fp, c)
1736 FILE *fp;
1737 int c;
1738 {
1739 int next;
1740 switch (c)
1741 {
1742 case '\r':
1743 next = getc (fp);
1744 if (next != '\n' && next != EOF)
1745 ungetc (next, fp);
1746 return 1;
1747 case '\n':
1748 return 1;
1749 default:
1750 return 0;
1751 }
1752 }
1753 #endif
1754
1755 char *
java_get_line_col(filename,line,col)1756 java_get_line_col (filename, line, col)
1757 const char *filename ATTRIBUTE_UNUSED;
1758 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1759 {
1760 #ifdef JC1_LITE
1761 return 0;
1762 #else
1763 /* Dumb implementation. Doesn't try to cache or optimize things. */
1764 /* First line of the file is line 1, first column is 1. */
1765
1766 /* COL == -1 means, at the CR/LF in LINE. */
1767 /* COL == -2 means, at the first non space char in LINE. */
1768
1769 FILE *fp;
1770 int c, ccol, cline = 1;
1771 int current_line_col = 0;
1772 int first_non_space = 0;
1773 char *base;
1774
1775 if (!(fp = fopen (filename, "r")))
1776 fatal_io_error ("can't open %s", filename);
1777
1778 while (cline != line)
1779 {
1780 c = getc (fp);
1781 if (c == EOF)
1782 {
1783 static const char msg[] = "<<file too short - unexpected EOF>>";
1784 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1785 goto have_line;
1786 }
1787 if (java_is_eol (fp, c))
1788 cline++;
1789 }
1790
1791 /* Gather the chars of the current line in a buffer. */
1792 for (;;)
1793 {
1794 c = getc (fp);
1795 if (c < 0 || java_is_eol (fp, c))
1796 break;
1797 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1798 first_non_space = current_line_col;
1799 obstack_1grow (&temporary_obstack, c);
1800 current_line_col++;
1801 }
1802 have_line:
1803
1804 obstack_1grow (&temporary_obstack, '\n');
1805
1806 if (col == -1)
1807 {
1808 col = current_line_col;
1809 first_non_space = 0;
1810 }
1811 else if (col == -2)
1812 col = first_non_space;
1813 else
1814 first_non_space = 0;
1815
1816 /* Place the '^' a the right position. */
1817 base = obstack_base (&temporary_obstack);
1818 for (ccol = 1; ccol <= col+3; ccol++)
1819 {
1820 /* Compute \t when reaching first_non_space. */
1821 char c = (first_non_space ?
1822 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1823 obstack_1grow (&temporary_obstack, c);
1824 }
1825 obstack_grow0 (&temporary_obstack, "^", 1);
1826
1827 fclose (fp);
1828 return obstack_finish (&temporary_obstack);
1829 #endif
1830 }
1831
1832 #ifndef JC1_LITE
1833 static int
utf8_cmp(str,length,name)1834 utf8_cmp (str, length, name)
1835 const unsigned char *str;
1836 int length;
1837 const char *name;
1838 {
1839 const unsigned char *limit = str + length;
1840 int i;
1841
1842 for (i = 0; name[i]; ++i)
1843 {
1844 int ch = UTF8_GET (str, limit);
1845 if (ch != name[i])
1846 return ch - name[i];
1847 }
1848
1849 return str == limit ? 0 : 1;
1850 }
1851
1852 /* A sorted list of all C++ keywords. */
1853
1854 static const char *const cxx_keywords[] =
1855 {
1856 "_Complex",
1857 "__alignof",
1858 "__alignof__",
1859 "__asm",
1860 "__asm__",
1861 "__attribute",
1862 "__attribute__",
1863 "__builtin_va_arg",
1864 "__complex",
1865 "__complex__",
1866 "__const",
1867 "__const__",
1868 "__extension__",
1869 "__imag",
1870 "__imag__",
1871 "__inline",
1872 "__inline__",
1873 "__label__",
1874 "__null",
1875 "__real",
1876 "__real__",
1877 "__restrict",
1878 "__restrict__",
1879 "__signed",
1880 "__signed__",
1881 "__typeof",
1882 "__typeof__",
1883 "__volatile",
1884 "__volatile__",
1885 "and",
1886 "and_eq",
1887 "asm",
1888 "auto",
1889 "bitand",
1890 "bitor",
1891 "bool",
1892 "break",
1893 "case",
1894 "catch",
1895 "char",
1896 "class",
1897 "compl",
1898 "const",
1899 "const_cast",
1900 "continue",
1901 "default",
1902 "delete",
1903 "do",
1904 "double",
1905 "dynamic_cast",
1906 "else",
1907 "enum",
1908 "explicit",
1909 "export",
1910 "extern",
1911 "false",
1912 "float",
1913 "for",
1914 "friend",
1915 "goto",
1916 "if",
1917 "inline",
1918 "int",
1919 "long",
1920 "mutable",
1921 "namespace",
1922 "new",
1923 "not",
1924 "not_eq",
1925 "operator",
1926 "or",
1927 "or_eq",
1928 "private",
1929 "protected",
1930 "public",
1931 "register",
1932 "reinterpret_cast",
1933 "return",
1934 "short",
1935 "signed",
1936 "sizeof",
1937 "static",
1938 "static_cast",
1939 "struct",
1940 "switch",
1941 "template",
1942 "this",
1943 "throw",
1944 "true",
1945 "try",
1946 "typedef",
1947 "typeid",
1948 "typename",
1949 "typeof",
1950 "union",
1951 "unsigned",
1952 "using",
1953 "virtual",
1954 "void",
1955 "volatile",
1956 "wchar_t",
1957 "while",
1958 "xor",
1959 "xor_eq"
1960 };
1961
1962 /* Return true if NAME is a C++ keyword. */
1963
1964 int
cxx_keyword_p(name,length)1965 cxx_keyword_p (name, length)
1966 const char *name;
1967 int length;
1968 {
1969 int last = ARRAY_SIZE (cxx_keywords);
1970 int first = 0;
1971 int mid = (last + first) / 2;
1972 int old = -1;
1973
1974 for (mid = (last + first) / 2;
1975 mid != old;
1976 old = mid, mid = (last + first) / 2)
1977 {
1978 int kwl = strlen (cxx_keywords[mid]);
1979 int min_length = kwl > length ? length : kwl;
1980 int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1981
1982 if (r == 0)
1983 {
1984 int i;
1985 /* We've found a match if all the remaining characters are `$'. */
1986 for (i = min_length; i < length && name[i] == '$'; ++i)
1987 ;
1988 if (i == length)
1989 return 1;
1990 r = 1;
1991 }
1992
1993 if (r < 0)
1994 last = mid;
1995 else
1996 first = mid;
1997 }
1998 return 0;
1999 }
2000 #endif /* JC1_LITE */
2001