xref: /openbsd/gnu/usr.bin/gcc/gcc/java/lex.c (revision c87b03e5)
1 /* Language lexer for the GNU compiler for the Java(TM) language.
2    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
4 
5 This file is part of GNU CC.
6 
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11 
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING.  If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
21 
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc.  */
25 
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27    possibly containing Unicode escape sequence or utf8 encoded
28    characters and returns a token for everything found but comments,
29    white spaces and line terminators. When necessary, it also fills
30    the java_lval (yylval) union. It's implemented to be called by a
31    re-entrant parser generated by Bison.
32 
33    The lexical analysis conforms to the Java grammar described in "The
34    Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35    Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
36 
37 #include "keyword.h"
38 #include "flags.h"
39 #include "chartables.h"
40 
41 /* Function declarations.  */
42 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
43 static void java_unicode_2_utf8 PARAMS ((unicode_t));
44 static void java_lex_error PARAMS ((const char *, int));
45 #ifndef JC1_LITE
46 static int java_is_eol PARAMS ((FILE *, int));
47 static tree build_wfl_node PARAMS ((tree));
48 #endif
49 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
50 static int java_parse_escape_sequence PARAMS ((void));
51 static int java_start_char_p PARAMS ((unicode_t));
52 static int java_part_char_p PARAMS ((unicode_t));
53 static int java_parse_doc_section PARAMS ((int));
54 static void java_parse_end_comment PARAMS ((int));
55 static int java_get_unicode PARAMS ((void));
56 static int java_read_unicode PARAMS ((java_lexer *, int *));
57 static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
58 							     int *));
59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60 static int java_read_char PARAMS ((java_lexer *));
61 static void java_allocate_new_line PARAMS ((void));
62 static void java_unget_unicode PARAMS ((void));
63 static unicode_t java_sneak_unicode PARAMS ((void));
64 #ifndef JC1_LITE
65 static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
66 #endif
67 
68 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
69 #ifndef JC1_LITE
70 static void error_if_numeric_overflow PARAMS ((tree));
71 #endif
72 
73 #ifdef HAVE_ICONV
74 /* This is nonzero if we have initialized `need_byteswap'.  */
75 static int byteswap_init = 0;
76 
77 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
78    big-endian order -- not native endian order.  We handle this by
79    doing a conversion once at startup and seeing what happens.  This
80    flag holds the results of this determination.  */
81 static int need_byteswap = 0;
82 #endif
83 
84 void
java_init_lex(finput,encoding)85 java_init_lex (finput, encoding)
86      FILE *finput;
87      const char *encoding;
88 {
89 #ifndef JC1_LITE
90   int java_lang_imported = 0;
91 
92   if (!java_lang_id)
93     java_lang_id = get_identifier ("java.lang");
94   if (!inst_id)
95     inst_id = get_identifier ("inst$");
96   if (!wpv_id)
97     wpv_id = get_identifier ("write_parm_value$");
98 
99   if (!java_lang_imported)
100     {
101       tree node = build_tree_list
102 	(build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
103       read_import_dir (TREE_PURPOSE (node));
104       TREE_CHAIN (node) = ctxp->import_demand_list;
105       ctxp->import_demand_list = node;
106       java_lang_imported = 1;
107     }
108 
109   if (!wfl_operator)
110     wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
111   if (!label_id)
112     label_id = get_identifier ("$L");
113   if (!wfl_append)
114     wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
115   if (!wfl_string_buffer)
116     wfl_string_buffer =
117       build_expr_wfl (get_identifier (flag_emit_class_files
118 				      ? "java.lang.StringBuffer"
119 				      : "gnu.gcj.runtime.StringBuffer"),
120 		      NULL, 0, 0);
121   if (!wfl_to_string)
122     wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
123 
124   CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
125     CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
126 
127   memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
128   memset (current_jcf, 0, sizeof (JCF));
129   ctxp->current_parsed_class = NULL;
130   ctxp->package = NULL_TREE;
131 #endif
132 
133   ctxp->filename = input_filename;
134   ctxp->lineno = lineno = 0;
135   ctxp->p_line = NULL;
136   ctxp->c_line = NULL;
137   ctxp->java_error_flag = 0;
138   ctxp->lexer = java_new_lexer (finput, encoding);
139 }
140 
141 static char *
java_sprint_unicode(line,i)142 java_sprint_unicode (line, i)
143     struct java_line *line;
144     int i;
145 {
146   static char buffer [10];
147   if (line->unicode_escape_p [i] || line->line [i] > 128)
148     sprintf (buffer, "\\u%04x", line->line [i]);
149   else
150     {
151       buffer [0] = line->line [i];
152       buffer [1] = '\0';
153     }
154   return buffer;
155 }
156 
157 static unicode_t
java_sneak_unicode()158 java_sneak_unicode ()
159 {
160   return (ctxp->c_line->line [ctxp->c_line->current]);
161 }
162 
163 static void
java_unget_unicode()164 java_unget_unicode ()
165 {
166   if (!ctxp->c_line->current)
167     /* Can't unget unicode.  */
168     abort ();
169 
170   ctxp->c_line->current--;
171   ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
172 }
173 
174 static void
java_allocate_new_line()175 java_allocate_new_line ()
176 {
177   unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
178   char ahead_escape_p = (ctxp->c_line ?
179 			 ctxp->c_line->unicode_escape_ahead_p : 0);
180 
181   if (ctxp->c_line && !ctxp->c_line->white_space_only)
182     {
183       if (ctxp->p_line)
184 	{
185 	  free (ctxp->p_line->unicode_escape_p);
186 	  free (ctxp->p_line->line);
187 	  free (ctxp->p_line);
188 	}
189       ctxp->p_line = ctxp->c_line;
190       ctxp->c_line = NULL;		/* Reallocated.  */
191     }
192 
193   if (!ctxp->c_line)
194     {
195       ctxp->c_line = xmalloc (sizeof (struct java_line));
196       ctxp->c_line->max = JAVA_LINE_MAX;
197       ctxp->c_line->line = xmalloc (sizeof (unicode_t)*ctxp->c_line->max);
198       ctxp->c_line->unicode_escape_p =
199 	xmalloc (sizeof (char)*ctxp->c_line->max);
200       ctxp->c_line->white_space_only = 0;
201     }
202 
203   ctxp->c_line->line [0] = ctxp->c_line->size = 0;
204   ctxp->c_line->char_col = ctxp->c_line->current = 0;
205   if (ahead)
206     {
207       ctxp->c_line->line [ctxp->c_line->size] = ahead;
208       ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
209       ctxp->c_line->size++;
210     }
211   ctxp->c_line->ahead [0] = 0;
212   ctxp->c_line->unicode_escape_ahead_p = 0;
213   ctxp->c_line->lineno = ++lineno;
214   ctxp->c_line->white_space_only = 1;
215 }
216 
217 /* Create a new lexer object.  */
218 
219 java_lexer *
java_new_lexer(finput,encoding)220 java_new_lexer (finput, encoding)
221      FILE *finput;
222      const char *encoding;
223 {
224   java_lexer *lex = xmalloc (sizeof (java_lexer));
225   int enc_error = 0;
226 
227   lex->finput = finput;
228   lex->bs_count = 0;
229   lex->unget_value = 0;
230   lex->hit_eof = 0;
231 
232 #ifdef HAVE_ICONV
233   lex->handle = iconv_open ("UCS-2", encoding);
234   if (lex->handle != (iconv_t) -1)
235     {
236       lex->first = -1;
237       lex->last = -1;
238       lex->out_first = -1;
239       lex->out_last = -1;
240       lex->read_anything = 0;
241       lex->use_fallback = 0;
242 
243       /* Work around broken iconv() implementations by doing checking at
244 	 runtime.  We assume that if the UTF-8 => UCS-2 encoder is broken,
245 	 then all UCS-2 encoders will be broken.  Perhaps not a valid
246 	 assumption.  */
247       if (! byteswap_init)
248 	{
249 	  iconv_t handle;
250 
251 	  byteswap_init = 1;
252 
253 	  handle = iconv_open ("UCS-2", "UTF-8");
254 	  if (handle != (iconv_t) -1)
255 	    {
256 	      unicode_t result;
257 	      unsigned char in[3];
258 	      char *inp, *outp;
259 	      size_t inc, outc, r;
260 
261 	      /* This is the UTF-8 encoding of \ufeff.  */
262 	      in[0] = 0xef;
263 	      in[1] = 0xbb;
264 	      in[2] = 0xbf;
265 
266 	      inp = in;
267 	      inc = 3;
268 	      outp = (char *) &result;
269 	      outc = 2;
270 
271 	      r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
272 			 &outp, &outc);
273 	      iconv_close (handle);
274 	      /* Conversion must be complete for us to use the result.  */
275 	      if (r != (size_t) -1 && inc == 0 && outc == 0)
276 		need_byteswap = (result != 0xfeff);
277 	    }
278 	}
279 
280       lex->byte_swap = need_byteswap;
281     }
282   else
283 #endif /* HAVE_ICONV */
284     {
285       /* If iconv failed, use the internal decoder if the default
286 	 encoding was requested.  This code is used on platforms where
287 	 iconv exists but is insufficient for our needs.  For
288 	 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
289 
290 	 On Solaris the default encoding, as returned by nl_langinfo(),
291 	 is `646' (aka ASCII), but the Solaris iconv_open() doesn't
292 	 understand that.  We work around that by pretending
293 	 `646' to be the same as UTF-8.   */
294       if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
295 	enc_error = 1;
296 #ifdef HAVE_ICONV
297       else
298 	lex->use_fallback = 1;
299 #endif /* HAVE_ICONV */
300     }
301 
302   if (enc_error)
303     fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation.  If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
304 
305   return lex;
306 }
307 
308 void
java_destroy_lexer(lex)309 java_destroy_lexer (lex)
310      java_lexer *lex;
311 {
312 #ifdef HAVE_ICONV
313   if (! lex->use_fallback)
314     iconv_close (lex->handle);
315 #endif
316   free (lex);
317 }
318 
319 static int
java_read_char(lex)320 java_read_char (lex)
321      java_lexer *lex;
322 {
323   if (lex->unget_value)
324     {
325       unicode_t r = lex->unget_value;
326       lex->unget_value = 0;
327       return r;
328     }
329 
330 #ifdef HAVE_ICONV
331   if (! lex->use_fallback)
332     {
333       size_t ir, inbytesleft, in_save, out_count, out_save;
334       char *inp, *outp;
335       unicode_t result;
336 
337       /* If there is data which has already been converted, use it.  */
338       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
339 	{
340 	  lex->out_first = 0;
341 	  lex->out_last = 0;
342 
343 	  while (1)
344 	    {
345 	      /* See if we need to read more data.  If FIRST == 0 then
346 		 the previous conversion attempt ended in the middle of
347 		 a character at the end of the buffer.  Otherwise we
348 		 only have to read if the buffer is empty.  */
349 	      if (lex->first == 0 || lex->first >= lex->last)
350 		{
351 		  int r;
352 
353 		  if (lex->first >= lex->last)
354 		    {
355 		      lex->first = 0;
356 		      lex->last = 0;
357 		    }
358 		  if (feof (lex->finput))
359 		    return UEOF;
360 		  r = fread (&lex->buffer[lex->last], 1,
361 			     sizeof (lex->buffer) - lex->last,
362 			     lex->finput);
363 		  lex->last += r;
364 		}
365 
366 	      inbytesleft = lex->last - lex->first;
367 	      out_count = sizeof (lex->out_buffer) - lex->out_last;
368 
369 	      if (inbytesleft == 0)
370 		{
371 		  /* We've tried to read and there is nothing left.  */
372 		  return UEOF;
373 		}
374 
375 	      in_save = inbytesleft;
376 	      out_save = out_count;
377 	      inp = &lex->buffer[lex->first];
378 	      outp = &lex->out_buffer[lex->out_last];
379 	      ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
380 			  &inbytesleft, &outp, &out_count);
381 
382 	      /* If we haven't read any bytes, then look to see if we
383 		 have read a BOM.  */
384 	      if (! lex->read_anything && out_save - out_count >= 2)
385 		{
386 		  unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
387 		  if (uc == 0xfeff)
388 		    {
389 		      lex->byte_swap = 0;
390 		      lex->out_first += 2;
391 		    }
392 		  else if (uc == 0xfffe)
393 		    {
394 		      lex->byte_swap = 1;
395 		      lex->out_first += 2;
396 		    }
397 		  lex->read_anything = 1;
398 		}
399 
400 	      if (lex->byte_swap)
401 		{
402 		  unsigned int i;
403 		  for (i = 0; i < out_save - out_count; i += 2)
404 		    {
405 		      char t = lex->out_buffer[lex->out_last + i];
406 		      lex->out_buffer[lex->out_last + i]
407 			= lex->out_buffer[lex->out_last + i + 1];
408 		      lex->out_buffer[lex->out_last + i + 1] = t;
409 		    }
410 		}
411 
412 	      lex->first += in_save - inbytesleft;
413 	      lex->out_last += out_save - out_count;
414 
415 	      /* If we converted anything at all, move along.  */
416 	      if (out_count != out_save)
417 		break;
418 
419 	      if (ir == (size_t) -1)
420 		{
421 		  if (errno == EINVAL)
422 		    {
423 		      /* This is ok.  This means that the end of our buffer
424 			 is in the middle of a character sequence.  We just
425 			 move the valid part of the buffer to the beginning
426 			 to force a read.  */
427 		      memmove (&lex->buffer[0], &lex->buffer[lex->first],
428 			       lex->last - lex->first);
429 		      lex->last -= lex->first;
430 		      lex->first = 0;
431 		    }
432 		  else
433 		    {
434 		      /* A more serious error.  */
435 		      java_lex_error ("unrecognized character in input stream",
436 				      0);
437 		      return UEOF;
438 		    }
439 		}
440 	    }
441 	}
442 
443       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
444 	{
445 	  /* Don't have any data.  */
446 	  return UEOF;
447 	}
448 
449       /* Success.  */
450       result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
451       lex->out_first += 2;
452       return result;
453     }
454   else
455 #endif /* HAVE_ICONV */
456     {
457       int c, c1, c2;
458       c = getc (lex->finput);
459 
460       if (c == EOF)
461 	return UEOF;
462       if (c < 128)
463 	return (unicode_t) c;
464       else
465 	{
466 	  if ((c & 0xe0) == 0xc0)
467 	    {
468 	      c1 = getc (lex->finput);
469 	      if ((c1 & 0xc0) == 0x80)
470 		{
471 		  unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
472 		  /* Check for valid 2-byte characters.  We explicitly
473 		     allow \0 because this encoding is common in the
474 		     Java world.  */
475 		  if (r == 0 || (r >= 0x80 && r <= 0x7ff))
476 		    return r;
477 		}
478 	    }
479 	  else if ((c & 0xf0) == 0xe0)
480 	    {
481 	      c1 = getc (lex->finput);
482 	      if ((c1 & 0xc0) == 0x80)
483 		{
484 		  c2 = getc (lex->finput);
485 		  if ((c2 & 0xc0) == 0x80)
486 		    {
487 		      unicode_t r =  (unicode_t)(((c & 0xf) << 12) +
488 						 (( c1 & 0x3f) << 6)
489 						 + (c2 & 0x3f));
490 		      /* Check for valid 3-byte characters.
491 			 Don't allow surrogate, \ufffe or \uffff.  */
492 		      if (IN_RANGE (r, 0x800, 0xffff)
493 			  && ! IN_RANGE (r, 0xd800, 0xdfff)
494 			  && r != 0xfffe && r != 0xffff)
495 			return r;
496 		    }
497 		}
498 	    }
499 
500 	  /* We simply don't support invalid characters.  We also
501 	     don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
502 	     cannot be valid Java characters.  */
503 	  java_lex_error ("malformed UTF-8 character", 0);
504 	}
505     }
506 
507   /* We only get here on error.  */
508   return UEOF;
509 }
510 
511 static void
java_store_unicode(l,c,unicode_escape_p)512 java_store_unicode (l, c, unicode_escape_p)
513     struct java_line *l;
514     unicode_t c;
515     int unicode_escape_p;
516 {
517   if (l->size == l->max)
518     {
519       l->max += JAVA_LINE_MAX;
520       l->line = xrealloc (l->line, sizeof (unicode_t)*l->max);
521       l->unicode_escape_p = xrealloc (l->unicode_escape_p,
522 				      sizeof (char)*l->max);
523     }
524   l->line [l->size] = c;
525   l->unicode_escape_p [l->size++] = unicode_escape_p;
526 }
527 
528 static int
java_read_unicode(lex,unicode_escape_p)529 java_read_unicode (lex, unicode_escape_p)
530      java_lexer *lex;
531      int *unicode_escape_p;
532 {
533   int c;
534 
535   c = java_read_char (lex);
536   *unicode_escape_p = 0;
537 
538   if (c != '\\')
539     {
540       lex->bs_count = 0;
541       return c;
542     }
543 
544   ++lex->bs_count;
545   if ((lex->bs_count) % 2 == 1)
546     {
547       /* Odd number of \ seen.  */
548       c = java_read_char (lex);
549       if (c == 'u')
550         {
551 	  unicode_t unicode = 0;
552 	  int shift = 12;
553 
554 	  /* Recognize any number of `u's in \u.  */
555 	  while ((c = java_read_char (lex)) == 'u')
556 	    ;
557 
558 	  shift = 12;
559 	  do
560 	    {
561 	      if (c == UEOF)
562 		{
563 		  java_lex_error ("prematurely terminated \\u sequence", 0);
564 		  return UEOF;
565 		}
566 
567 	      if (hex_p (c))
568 		unicode |= (unicode_t)(hex_value (c) << shift);
569 	      else
570 		{
571 		  java_lex_error ("non-hex digit in \\u sequence", 0);
572 		  break;
573 		}
574 
575 	      c = java_read_char (lex);
576 	      shift -= 4;
577 	    }
578 	  while (shift >= 0);
579 
580 	  if (c != UEOF)
581 	    lex->unget_value = c;
582 
583 	  lex->bs_count = 0;
584 	  *unicode_escape_p = 1;
585 	  return unicode;
586 	}
587       lex->unget_value = c;
588     }
589   return (unicode_t) '\\';
590 }
591 
592 static int
java_read_unicode_collapsing_terminators(lex,unicode_escape_p)593 java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
594      java_lexer *lex;
595      int *unicode_escape_p;
596 {
597   int c = java_read_unicode (lex, unicode_escape_p);
598 
599   if (c == '\r')
600     {
601       /* We have to read ahead to see if we got \r\n.  In that case we
602 	 return a single line terminator.  */
603       int dummy;
604       c = java_read_unicode (lex, &dummy);
605       if (c != '\n' && c != UEOF)
606 	lex->unget_value = c;
607       /* In either case we must return a newline.  */
608       c = '\n';
609     }
610 
611   return c;
612 }
613 
614 static int
java_get_unicode()615 java_get_unicode ()
616 {
617   /* It's time to read a line when...  */
618   if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
619     {
620       int c;
621       int found_chars = 0;
622 
623       if (ctxp->lexer->hit_eof)
624 	return UEOF;
625 
626       java_allocate_new_line ();
627       if (ctxp->c_line->line[0] != '\n')
628 	{
629 	  for (;;)
630 	    {
631 	      int unicode_escape_p;
632 	      c = java_read_unicode_collapsing_terminators (ctxp->lexer,
633 							    &unicode_escape_p);
634 	      if (c != UEOF)
635 		{
636 		  found_chars = 1;
637 		  java_store_unicode (ctxp->c_line, c, unicode_escape_p);
638 		  if (ctxp->c_line->white_space_only
639 		      && !JAVA_WHITE_SPACE_P (c)
640 		      && c != '\n')
641 		    ctxp->c_line->white_space_only = 0;
642 		}
643 	      if ((c == '\n') || (c == UEOF))
644 		break;
645 	    }
646 
647 	  if (c == UEOF && ! found_chars)
648 	    {
649 	      ctxp->lexer->hit_eof = 1;
650 	      return UEOF;
651 	    }
652 	}
653     }
654   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
655   JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
656   return ctxp->c_line->line [ctxp->c_line->current++];
657 }
658 
659 /* Parse the end of a C style comment.
660  * C is the first character following the '/' and '*'.  */
661 static void
java_parse_end_comment(c)662 java_parse_end_comment (c)
663      int c;
664 {
665   for ( ;; c = java_get_unicode ())
666     {
667       switch (c)
668 	{
669 	case UEOF:
670 	  java_lex_error ("Comment not terminated at end of input", 0);
671 	  return;
672 	case '*':
673 	  switch (c = java_get_unicode ())
674 	    {
675 	    case UEOF:
676 	      java_lex_error ("Comment not terminated at end of input", 0);
677 	      return;
678 	    case '/':
679 	      return;
680 	    case '*':	/* Reparse only '*'.  */
681 	      java_unget_unicode ();
682 	    }
683 	}
684     }
685 }
686 
687 /* Parse the documentation section. Keywords must be at the beginning
688    of a documentation comment line (ignoring white space and any `*'
689    character). Parsed keyword(s): @DEPRECATED.  */
690 
691 static int
java_parse_doc_section(c)692 java_parse_doc_section (c)
693      int c;
694 {
695   int valid_tag = 0, seen_star = 0;
696 
697   while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
698     {
699       switch (c)
700 	{
701 	case '*':
702 	  seen_star = 1;
703 	  break;
704 	case '\n': /* ULT */
705 	  valid_tag = 1;
706 	default:
707 	  seen_star = 0;
708 	}
709       c = java_get_unicode();
710     }
711 
712   if (c == UEOF)
713     java_lex_error ("Comment not terminated at end of input", 0);
714 
715   if (seen_star && (c == '/'))
716     return 1;			/* Goto step1 in caller.  */
717 
718   /* We're parsing `@deprecated'.  */
719   if (valid_tag && (c == '@'))
720     {
721       char tag [11];
722       int  tag_index = 0;
723 
724       while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
725 	{
726 	  c = java_get_unicode ();
727 	  tag [tag_index++] = c;
728 	}
729 
730       if (c == UEOF)
731 	java_lex_error ("Comment not terminated at end of input", 0);
732       tag [tag_index] = '\0';
733 
734       if (!strcmp (tag, "deprecated"))
735 	ctxp->deprecated = 1;
736     }
737   java_unget_unicode ();
738   return 0;
739 }
740 
741 /* Return true if C is a valid start character for a Java identifier.
742    This is only called if C >= 128 -- smaller values are handled
743    inline.  However, this function handles all values anyway.  */
744 static int
java_start_char_p(c)745 java_start_char_p (c)
746      unicode_t c;
747 {
748   unsigned int hi = c / 256;
749   const char *const page = type_table[hi];
750   unsigned long val = (unsigned long) page;
751   int flags;
752 
753   if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
754     flags = page[c & 255];
755   else
756     flags = val;
757 
758   return flags & LETTER_START;
759 }
760 
761 /* Return true if C is a valid part character for a Java identifier.
762    This is only called if C >= 128 -- smaller values are handled
763    inline.  However, this function handles all values anyway.  */
764 static int
java_part_char_p(c)765 java_part_char_p (c)
766      unicode_t c;
767 {
768   unsigned int hi = c / 256;
769   const char *const page = type_table[hi];
770   unsigned long val = (unsigned long) page;
771   int flags;
772 
773   if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
774     flags = page[c & 255];
775   else
776     flags = val;
777 
778   return flags & LETTER_PART;
779 }
780 
781 static int
java_parse_escape_sequence()782 java_parse_escape_sequence ()
783 {
784   unicode_t char_lit;
785   int c;
786 
787   switch (c = java_get_unicode ())
788     {
789     case 'b':
790       return (unicode_t)0x8;
791     case 't':
792       return (unicode_t)0x9;
793     case 'n':
794       return (unicode_t)0xa;
795     case 'f':
796       return (unicode_t)0xc;
797     case 'r':
798       return (unicode_t)0xd;
799     case '"':
800       return (unicode_t)0x22;
801     case '\'':
802       return (unicode_t)0x27;
803     case '\\':
804       return (unicode_t)0x5c;
805     case '0': case '1': case '2': case '3': case '4':
806     case '5': case '6': case '7':
807       {
808 	int octal_escape[3];
809 	int octal_escape_index = 0;
810 	int max = 3;
811 	int i, shift;
812 
813 	for (; octal_escape_index < max && RANGE (c, '0', '7');
814 	     c = java_get_unicode ())
815 	  {
816 	    if (octal_escape_index == 0 && c > '3')
817 	      {
818 		/* According to the grammar, `\477' has a well-defined
819 		   meaning -- it is `\47' followed by `7'.  */
820 		--max;
821 	      }
822 	    octal_escape [octal_escape_index++] = c;
823 	  }
824 
825 	java_unget_unicode ();
826 
827 	for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
828 	     i < octal_escape_index; i++, shift -= 3)
829 	  char_lit |= (octal_escape [i] - '0') << shift;
830 
831 	return char_lit;
832       }
833     default:
834       java_lex_error ("Invalid character in escape sequence", 0);
835       return JAVA_CHAR_ERROR;
836     }
837 }
838 
839 #ifndef JC1_LITE
840 #define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
841 
842 /* Subroutine of java_lex: converts floating-point literals to tree
843    nodes.  LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
844    store the result.  FFLAG indicates whether the literal was tagged
845    with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
846    is the line number on which to report any error.  */
847 
848 static void java_perform_atof	PARAMS ((YYSTYPE *, char *, int, int));
849 
850 static void
java_perform_atof(java_lval,literal_token,fflag,number_beginning)851 java_perform_atof (java_lval, literal_token, fflag, number_beginning)
852      YYSTYPE *java_lval;
853      char *literal_token;
854      int fflag;
855      int number_beginning;
856 {
857   REAL_VALUE_TYPE value;
858   tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
859 
860   SET_REAL_VALUE_ATOF (value,
861 		       REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
862 
863   if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
864     {
865       JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
866       value = DCONST0;
867     }
868   else if (IS_ZERO (value))
869     {
870       /* We check to see if the value is really 0 or if we've found an
871 	 underflow.  We do this in the most primitive imaginable way.  */
872       int really_zero = 1;
873       char *p = literal_token;
874       if (*p == '-')
875 	++p;
876       while (*p && *p != 'e' && *p != 'E')
877 	{
878 	  if (*p != '0' && *p != '.')
879 	    {
880 	      really_zero = 0;
881 	      break;
882 	    }
883 	  ++p;
884 	}
885       if (! really_zero)
886 	{
887 	  int i = ctxp->c_line->current;
888 	  ctxp->c_line->current = number_beginning;
889 	  java_lex_error ("Floating point literal underflow", 0);
890 	  ctxp->c_line->current = i;
891 	}
892     }
893 
894   SET_LVAL_NODE_TYPE (build_real (type, value), type);
895 }
896 #endif
897 
898 static int yylex		PARAMS ((YYSTYPE *));
899 
900 static int
901 #ifdef JC1_LITE
yylex(java_lval)902 yylex (java_lval)
903 #else
904 java_lex (java_lval)
905 #endif
906      YYSTYPE *java_lval;
907 {
908   int c;
909   unicode_t first_unicode;
910   int ascii_index, all_ascii;
911   char *string;
912 
913   /* Translation of the Unicode escape in the raw stream of Unicode
914      characters. Takes care of line terminator.  */
915  step1:
916   /* Skip white spaces: SP, TAB and FF or ULT.  */
917   for (c = java_get_unicode ();
918        c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
919     if (c == '\n')
920       {
921 	ctxp->elc.line = ctxp->c_line->lineno;
922 	ctxp->elc.col  = ctxp->c_line->char_col-2;
923       }
924 
925   ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
926 
927   if (c == 0x1a)		/* CTRL-Z.  */
928     {
929       if ((c = java_get_unicode ()) == UEOF)
930 	return 0;		/* Ok here.  */
931       else
932 	java_unget_unicode ();	/* Caught later, at the end of the
933                                    function.  */
934     }
935   /* Handle EOF here.  */
936   if (c == UEOF)	/* Should probably do something here...  */
937     return 0;
938 
939   /* Take care of eventual comments.  */
940   if (c == '/')
941     {
942       switch (c = java_get_unicode ())
943 	{
944 	case '/':
945 	  for (;;)
946 	    {
947 	      c = java_get_unicode ();
948 	      if (c == UEOF)
949 		{
950 		  /* It is ok to end a `//' comment with EOF, unless
951 		     we're being pedantic.  */
952 		  if (pedantic)
953 		    java_lex_error ("Comment not terminated at end of input",
954 				    0);
955 		  return 0;
956 		}
957 	      if (c == '\n')	/* ULT */
958 		goto step1;
959 	    }
960 	  break;
961 
962 	case '*':
963 	  if ((c = java_get_unicode ()) == '*')
964 	    {
965 	      if ((c = java_get_unicode ()) == '/')
966 		goto step1;	/* Empty documentation comment.  */
967 	      else if (java_parse_doc_section (c))
968 		goto step1;
969 	    }
970 
971 	  java_parse_end_comment ((c = java_get_unicode ()));
972 	  goto step1;
973 	  break;
974 	default:
975 	  java_unget_unicode ();
976 	  c = '/';
977 	  break;
978 	}
979     }
980 
981   ctxp->elc.line = ctxp->c_line->lineno;
982   ctxp->elc.prev_col = ctxp->elc.col;
983   ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
984   if (ctxp->elc.col < 0)
985     abort ();
986 
987   /* Numeric literals.  */
988   if (JAVA_ASCII_DIGIT (c) || (c == '.'))
989     {
990       /* This section of code is borrowed from gcc/c-lex.c.  */
991 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
992       int parts[TOTAL_PARTS];
993       HOST_WIDE_INT high, low;
994       /* End borrowed section.  */
995       char literal_token [256];
996       int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
997       int  found_hex_digits = 0, found_non_octal_digits = 0;
998       int  i;
999 #ifndef JC1_LITE
1000       int  number_beginning = ctxp->c_line->current;
1001       tree value;
1002 #endif
1003 
1004       /* We might have a . separator instead of a FP like .[0-9]*.  */
1005       if (c == '.')
1006 	{
1007 	  unicode_t peep = java_sneak_unicode ();
1008 
1009 	  if (!JAVA_ASCII_DIGIT (peep))
1010 	    {
1011 	      JAVA_LEX_SEP('.');
1012 	      BUILD_OPERATOR (DOT_TK);
1013 	    }
1014 	}
1015 
1016       for (i = 0; i < TOTAL_PARTS; i++)
1017 	parts [i] = 0;
1018 
1019       if (c == '0')
1020 	{
1021 	  c = java_get_unicode ();
1022 	  if (c == 'x' || c == 'X')
1023 	    {
1024 	      radix = 16;
1025 	      c = java_get_unicode ();
1026 	    }
1027 	  else if (JAVA_ASCII_DIGIT (c))
1028 	    radix = 8;
1029 	  else if (c == '.' || c == 'e' || c =='E')
1030 	    {
1031 	      /* Push the '.', 'e', or 'E' back and prepare for a FP
1032 		 parsing...  */
1033 	      java_unget_unicode ();
1034 	      c = '0';
1035 	    }
1036 	  else
1037 	    {
1038 	      /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}.  */
1039 	      JAVA_LEX_LIT ("0", 10);
1040               switch (c)
1041 		{
1042 		case 'L': case 'l':
1043 		  SET_LVAL_NODE (long_zero_node);
1044 		  return (INT_LIT_TK);
1045 		case 'f': case 'F':
1046 		  SET_LVAL_NODE (float_zero_node);
1047 		  return (FP_LIT_TK);
1048 		case 'd': case 'D':
1049 		  SET_LVAL_NODE (double_zero_node);
1050 		  return (FP_LIT_TK);
1051 		default:
1052 		  java_unget_unicode ();
1053 		  SET_LVAL_NODE (integer_zero_node);
1054 		  return (INT_LIT_TK);
1055 		}
1056 	    }
1057 	}
1058       /* Parse the first part of the literal, until we find something
1059 	 which is not a number.  */
1060       while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1061 	     JAVA_ASCII_DIGIT (c))
1062 	{
1063 	  /* We store in a string (in case it turns out to be a FP) and in
1064 	     PARTS if we have to process a integer literal.  */
1065 	  int numeric = hex_value (c);
1066 	  int count;
1067 
1068 	  /* Remember when we find a valid hexadecimal digit.  */
1069 	  if (radix == 16)
1070 	    found_hex_digits = 1;
1071           /* Remember when we find an invalid octal digit.  */
1072           else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
1073             found_non_octal_digits = 1;
1074 
1075 	  literal_token [literal_index++] = c;
1076 	  /* This section of code if borrowed from gcc/c-lex.c.  */
1077 	  for (count = 0; count < TOTAL_PARTS; count++)
1078 	    {
1079 	      parts[count] *= radix;
1080 	      if (count)
1081 		{
1082 		  parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
1083 		  parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1084 		}
1085 	      else
1086 		parts[0] += numeric;
1087 	    }
1088 	  if (parts [TOTAL_PARTS-1] != 0)
1089 	    overflow = 1;
1090 	  /* End borrowed section.  */
1091 	  c = java_get_unicode ();
1092 	}
1093 
1094       /* If we have something from the FP char set but not a digit, parse
1095 	 a FP literal.  */
1096       if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1097 	{
1098 	  int stage = 0;
1099 	  int seen_digit = (literal_index ? 1 : 0);
1100 	  int seen_exponent = 0;
1101 	  int fflag = 0;	/* 1 for {f,F}, 0 for {d,D}. FP literal are
1102 				   double unless specified.  */
1103 
1104 	  /* It is ok if the radix is 8 because this just means we've
1105 	     seen a leading `0'.  However, radix==16 is invalid.  */
1106 	  if (radix == 16)
1107 	    java_lex_error ("Can't express non-decimal FP literal", 0);
1108 	  radix = 10;
1109 
1110 	  for (;;)
1111 	    {
1112 	      if (c == '.')
1113 		{
1114 		  if (stage < 1)
1115 		    {
1116 		      stage = 1;
1117 		      literal_token [literal_index++ ] = c;
1118 		      c = java_get_unicode ();
1119 		    }
1120 		  else
1121 		    java_lex_error ("Invalid character in FP literal", 0);
1122 		}
1123 
1124 	      if (c == 'e' || c == 'E')
1125 		{
1126 		  if (stage < 2)
1127 		    {
1128 		      /* {E,e} must have seen at least a digit.  */
1129 		      if (!seen_digit)
1130 			java_lex_error
1131                           ("Invalid FP literal, mantissa must have digit", 0);
1132 		      seen_digit = 0;
1133 		      seen_exponent = 1;
1134 		      stage = 2;
1135 		      literal_token [literal_index++] = c;
1136 		      c = java_get_unicode ();
1137 		    }
1138 		  else
1139 		    java_lex_error ("Invalid character in FP literal", 0);
1140 		}
1141 	      if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1142 		{
1143 		  fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1144 		  stage = 4;	/* So we fall through.  */
1145 		}
1146 
1147 	      if ((c=='-' || c =='+') && stage == 2)
1148 		{
1149 		  stage = 3;
1150 		  literal_token [literal_index++] = c;
1151 		  c = java_get_unicode ();
1152 		}
1153 
1154 	      if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1155 		  (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1156 		  (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1157 		  (stage == 3 && JAVA_ASCII_DIGIT (c)))
1158 		{
1159 		  if (JAVA_ASCII_DIGIT (c))
1160 		    seen_digit = 1;
1161                   if (stage == 2)
1162                     stage = 3;
1163 		  literal_token [literal_index++ ] = c;
1164 		  c = java_get_unicode ();
1165 		}
1166 	      else
1167 		{
1168 		  if (stage != 4) /* Don't push back fF/dD.  */
1169 		    java_unget_unicode ();
1170 
1171 		  /* An exponent (if any) must have seen a digit.  */
1172 		  if (seen_exponent && !seen_digit)
1173 		    java_lex_error
1174                       ("Invalid FP literal, exponent must have digit", 0);
1175 
1176 		  literal_token [literal_index] = '\0';
1177 		  JAVA_LEX_LIT (literal_token, radix);
1178 
1179 #ifndef JC1_LITE
1180 		  java_perform_atof (java_lval, literal_token,
1181 				     fflag, number_beginning);
1182 #endif
1183 		  return FP_LIT_TK;
1184 		}
1185 	    }
1186 	} /* JAVA_ASCII_FPCHAR (c) */
1187 
1188       /* Here we get back to converting the integral literal.  */
1189       if (radix == 16 && ! found_hex_digits)
1190 	java_lex_error
1191 	  ("0x must be followed by at least one hexadecimal digit", 0);
1192       else if (radix == 8 && found_non_octal_digits)
1193 	java_lex_error ("Octal literal contains digit out of range", 0);
1194       else if (c == 'L' || c == 'l')
1195 	long_suffix = 1;
1196       else
1197 	java_unget_unicode ();
1198 
1199 #ifdef JAVA_LEX_DEBUG
1200       literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe.  */
1201       JAVA_LEX_LIT (literal_token, radix);
1202 #endif
1203       /* This section of code is borrowed from gcc/c-lex.c.  */
1204       if (!overflow)
1205 	{
1206 	  bytes = GET_TYPE_PRECISION (long_type_node);
1207 	  for (i = bytes; i < TOTAL_PARTS; i++)
1208 	    if (parts [i])
1209 	      {
1210 	        overflow = 1;
1211 		break;
1212 	      }
1213 	}
1214       high = low = 0;
1215       for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1216 	{
1217 	  high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1218 					      / HOST_BITS_PER_CHAR)]
1219 		   << (i * HOST_BITS_PER_CHAR));
1220 	  low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1221 	}
1222       /* End borrowed section.  */
1223 
1224 #ifndef JC1_LITE
1225       /* Range checking.  */
1226       value = build_int_2 (low, high);
1227       /* Temporarily set type to unsigned.  */
1228       SET_LVAL_NODE_TYPE (value, (long_suffix
1229 				  ? unsigned_long_type_node
1230 				  : unsigned_int_type_node));
1231 
1232       /* For base 10 numbers, only values up to the highest value
1233 	 (plus one) can be written.  For instance, only ints up to
1234 	 2147483648 can be written.  The special case of the largest
1235 	 negative value is handled elsewhere.  For other bases, any
1236 	 number can be represented.  */
1237       if (overflow || (radix == 10
1238 		       && tree_int_cst_lt (long_suffix
1239 					   ? decimal_long_max
1240 					   : decimal_int_max,
1241 					   value)))
1242 	{
1243 	  if (long_suffix)
1244 	    JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1245 	  else
1246 	    JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1247 	}
1248 
1249       /* Sign extend the value.  */
1250       SET_LVAL_NODE_TYPE (value, (long_suffix ? long_type_node : int_type_node));
1251       force_fit_type (value, 0);
1252       JAVA_RADIX10_FLAG (value) = radix == 10;
1253 #else
1254       SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1255 			  long_suffix ? long_type_node : int_type_node);
1256 #endif
1257       return INT_LIT_TK;
1258     }
1259 
1260   /* Character literals.  */
1261   if (c == '\'')
1262     {
1263       int char_lit;
1264       if ((c = java_get_unicode ()) == '\\')
1265 	char_lit = java_parse_escape_sequence ();
1266       else
1267 	{
1268 	  if (c == '\n' || c == '\'')
1269 	    java_lex_error ("Invalid character literal", 0);
1270 	  char_lit = c;
1271 	}
1272 
1273       c = java_get_unicode ();
1274 
1275       if ((c == '\n') || (c == UEOF))
1276 	java_lex_error ("Character literal not terminated at end of line", 0);
1277       if (c != '\'')
1278 	java_lex_error ("Syntax error in character literal", 0);
1279 
1280       if (char_lit == JAVA_CHAR_ERROR)
1281         char_lit = 0;		/* We silently convert it to zero.  */
1282 
1283       JAVA_LEX_CHAR_LIT (char_lit);
1284       SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1285       return CHAR_LIT_TK;
1286     }
1287 
1288   /* String literals.  */
1289   if (c == '"')
1290     {
1291       int no_error;
1292       char *string;
1293 
1294       for (no_error = 1, c = java_get_unicode ();
1295 	   c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1296 	{
1297 	  if (c == '\\')
1298 	    c = java_parse_escape_sequence ();
1299 	  if (c == JAVA_CHAR_ERROR)
1300 	    {
1301 	      no_error = 0;
1302 	      c = 0;		/* We silently convert it to zero.  */
1303 	    }
1304 	  java_unicode_2_utf8 (c);
1305 	}
1306       if (c == '\n' || c == UEOF) /* ULT.  */
1307 	{
1308 	  lineno--;	/* Refer to the line where the terminator was seen.  */
1309 	  java_lex_error ("String not terminated at end of line", 0);
1310 	  lineno++;
1311 	}
1312 
1313       obstack_1grow (&temporary_obstack, '\0');
1314       string = obstack_finish (&temporary_obstack);
1315 #ifndef JC1_LITE
1316       if (!no_error || (c != '"'))
1317 	java_lval->node = error_mark_node; /* FIXME: Requires futher
1318                                               testing.  */
1319       else
1320 	java_lval->node = build_string (strlen (string), string);
1321 #endif
1322       obstack_free (&temporary_obstack, string);
1323       return STRING_LIT_TK;
1324     }
1325 
1326   /* Separator.  */
1327   switch (c)
1328     {
1329     case '(':
1330       JAVA_LEX_SEP (c);
1331       BUILD_OPERATOR (OP_TK);
1332     case ')':
1333       JAVA_LEX_SEP (c);
1334       return CP_TK;
1335     case '{':
1336       JAVA_LEX_SEP (c);
1337       if (ctxp->ccb_indent == 1)
1338 	ctxp->first_ccb_indent1 = lineno;
1339       ctxp->ccb_indent++;
1340       BUILD_OPERATOR (OCB_TK);
1341     case '}':
1342       JAVA_LEX_SEP (c);
1343       ctxp->ccb_indent--;
1344       if (ctxp->ccb_indent == 1)
1345         ctxp->last_ccb_indent1 = lineno;
1346       BUILD_OPERATOR (CCB_TK);
1347     case '[':
1348       JAVA_LEX_SEP (c);
1349       BUILD_OPERATOR (OSB_TK);
1350     case ']':
1351       JAVA_LEX_SEP (c);
1352       return CSB_TK;
1353     case ';':
1354       JAVA_LEX_SEP (c);
1355       return SC_TK;
1356     case ',':
1357       JAVA_LEX_SEP (c);
1358       return C_TK;
1359     case '.':
1360       JAVA_LEX_SEP (c);
1361       BUILD_OPERATOR (DOT_TK);
1362       /*      return DOT_TK; */
1363     }
1364 
1365   /* Operators.  */
1366   switch (c)
1367     {
1368     case '=':
1369       if ((c = java_get_unicode ()) == '=')
1370 	{
1371 	  BUILD_OPERATOR (EQ_TK);
1372 	}
1373       else
1374 	{
1375 	  /* Equals is used in two different locations. In the
1376 	     variable_declarator: rule, it has to be seen as '=' as opposed
1377 	     to being seen as an ordinary assignment operator in
1378 	     assignment_operators: rule.  */
1379 	  java_unget_unicode ();
1380 	  BUILD_OPERATOR (ASSIGN_TK);
1381 	}
1382 
1383     case '>':
1384       switch ((c = java_get_unicode ()))
1385 	{
1386 	case '=':
1387 	  BUILD_OPERATOR (GTE_TK);
1388 	case '>':
1389 	  switch ((c = java_get_unicode ()))
1390 	    {
1391 	    case '>':
1392 	      if ((c = java_get_unicode ()) == '=')
1393 		{
1394 		  BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1395 		}
1396 	      else
1397 		{
1398 		  java_unget_unicode ();
1399 		  BUILD_OPERATOR (ZRS_TK);
1400 		}
1401 	    case '=':
1402 	      BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1403 	    default:
1404 	      java_unget_unicode ();
1405 	      BUILD_OPERATOR (SRS_TK);
1406 	    }
1407 	default:
1408 	  java_unget_unicode ();
1409 	  BUILD_OPERATOR (GT_TK);
1410 	}
1411 
1412     case '<':
1413       switch ((c = java_get_unicode ()))
1414 	{
1415 	case '=':
1416 	  BUILD_OPERATOR (LTE_TK);
1417 	case '<':
1418 	  if ((c = java_get_unicode ()) == '=')
1419 	    {
1420 	      BUILD_OPERATOR2 (LS_ASSIGN_TK);
1421 	    }
1422 	  else
1423 	    {
1424 	      java_unget_unicode ();
1425 	      BUILD_OPERATOR (LS_TK);
1426 	    }
1427 	default:
1428 	  java_unget_unicode ();
1429 	  BUILD_OPERATOR (LT_TK);
1430 	}
1431 
1432     case '&':
1433       switch ((c = java_get_unicode ()))
1434 	{
1435 	case '&':
1436 	  BUILD_OPERATOR (BOOL_AND_TK);
1437 	case '=':
1438 	  BUILD_OPERATOR2 (AND_ASSIGN_TK);
1439 	default:
1440 	  java_unget_unicode ();
1441 	  BUILD_OPERATOR (AND_TK);
1442 	}
1443 
1444     case '|':
1445       switch ((c = java_get_unicode ()))
1446 	{
1447 	case '|':
1448 	  BUILD_OPERATOR (BOOL_OR_TK);
1449 	case '=':
1450 	  BUILD_OPERATOR2 (OR_ASSIGN_TK);
1451 	default:
1452 	  java_unget_unicode ();
1453 	  BUILD_OPERATOR (OR_TK);
1454 	}
1455 
1456     case '+':
1457       switch ((c = java_get_unicode ()))
1458 	{
1459 	case '+':
1460 	  BUILD_OPERATOR (INCR_TK);
1461 	case '=':
1462 	  BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1463 	default:
1464 	  java_unget_unicode ();
1465 	  BUILD_OPERATOR (PLUS_TK);
1466 	}
1467 
1468     case '-':
1469       switch ((c = java_get_unicode ()))
1470 	{
1471 	case '-':
1472 	  BUILD_OPERATOR (DECR_TK);
1473 	case '=':
1474 	  BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1475 	default:
1476 	  java_unget_unicode ();
1477 	  BUILD_OPERATOR (MINUS_TK);
1478 	}
1479 
1480     case '*':
1481       if ((c = java_get_unicode ()) == '=')
1482 	{
1483 	  BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1484 	}
1485       else
1486 	{
1487 	  java_unget_unicode ();
1488 	  BUILD_OPERATOR (MULT_TK);
1489 	}
1490 
1491     case '/':
1492       if ((c = java_get_unicode ()) == '=')
1493 	{
1494 	  BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1495 	}
1496       else
1497 	{
1498 	  java_unget_unicode ();
1499 	  BUILD_OPERATOR (DIV_TK);
1500 	}
1501 
1502     case '^':
1503       if ((c = java_get_unicode ()) == '=')
1504 	{
1505 	  BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1506 	}
1507       else
1508 	{
1509 	  java_unget_unicode ();
1510 	  BUILD_OPERATOR (XOR_TK);
1511 	}
1512 
1513     case '%':
1514       if ((c = java_get_unicode ()) == '=')
1515 	{
1516 	  BUILD_OPERATOR2 (REM_ASSIGN_TK);
1517 	}
1518       else
1519 	{
1520 	  java_unget_unicode ();
1521 	  BUILD_OPERATOR (REM_TK);
1522 	}
1523 
1524     case '!':
1525       if ((c = java_get_unicode()) == '=')
1526 	{
1527 	  BUILD_OPERATOR (NEQ_TK);
1528 	}
1529       else
1530 	{
1531 	  java_unget_unicode ();
1532 	  BUILD_OPERATOR (NEG_TK);
1533 	}
1534 
1535     case '?':
1536       JAVA_LEX_OP ("?");
1537       BUILD_OPERATOR (REL_QM_TK);
1538     case ':':
1539       JAVA_LEX_OP (":");
1540       BUILD_OPERATOR (REL_CL_TK);
1541     case '~':
1542       BUILD_OPERATOR (NOT_TK);
1543     }
1544 
1545   /* Keyword, boolean literal or null literal.  */
1546   for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1547        c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1548     {
1549       java_unicode_2_utf8 (c);
1550       if (all_ascii && c >= 128)
1551         all_ascii = 0;
1552       ascii_index++;
1553     }
1554 
1555   obstack_1grow (&temporary_obstack, '\0');
1556   string = obstack_finish (&temporary_obstack);
1557   if (c != UEOF)
1558     java_unget_unicode ();
1559 
1560   /* If we have something all ascii, we consider a keyword, a boolean
1561      literal, a null literal or an all ASCII identifier.  Otherwise,
1562      this is an identifier (possibly not respecting formation rule).  */
1563   if (all_ascii)
1564     {
1565       const struct java_keyword *kw;
1566       if ((kw=java_keyword (string, ascii_index)))
1567 	{
1568 	  JAVA_LEX_KW (string);
1569 	  switch (kw->token)
1570 	    {
1571 	    case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
1572 	    case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
1573 	    case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1574 	    case PRIVATE_TK:      case STRICT_TK:
1575 	      SET_MODIFIER_CTX (kw->token);
1576 	      return MODIFIER_TK;
1577 	    case FLOAT_TK:
1578 	      SET_LVAL_NODE (float_type_node);
1579 	      return FP_TK;
1580 	    case DOUBLE_TK:
1581 	      SET_LVAL_NODE (double_type_node);
1582 	      return FP_TK;
1583 	    case BOOLEAN_TK:
1584 	      SET_LVAL_NODE (boolean_type_node);
1585 	      return BOOLEAN_TK;
1586 	    case BYTE_TK:
1587 	      SET_LVAL_NODE (byte_type_node);
1588 	      return INTEGRAL_TK;
1589 	    case SHORT_TK:
1590 	      SET_LVAL_NODE (short_type_node);
1591 	      return INTEGRAL_TK;
1592 	    case INT_TK:
1593 	      SET_LVAL_NODE (int_type_node);
1594 	      return INTEGRAL_TK;
1595 	    case LONG_TK:
1596 	      SET_LVAL_NODE (long_type_node);
1597 	      return INTEGRAL_TK;
1598 	    case CHAR_TK:
1599 	      SET_LVAL_NODE (char_type_node);
1600 	      return INTEGRAL_TK;
1601 
1602 	      /* Keyword based literals.  */
1603 	    case TRUE_TK:
1604 	    case FALSE_TK:
1605 	      SET_LVAL_NODE ((kw->token == TRUE_TK ?
1606 			      boolean_true_node : boolean_false_node));
1607 	      return BOOL_LIT_TK;
1608 	    case NULL_TK:
1609 	      SET_LVAL_NODE (null_pointer_node);
1610 	      return NULL_TK;
1611 
1612 	    case ASSERT_TK:
1613 	      if (flag_assert)
1614 		{
1615 		  BUILD_OPERATOR (kw->token);
1616 		  return kw->token;
1617 		}
1618 	      else
1619 		break;
1620 
1621 	      /* Some keyword we want to retain information on the location
1622 		 they where found.  */
1623 	    case CASE_TK:
1624 	    case DEFAULT_TK:
1625 	    case SUPER_TK:
1626 	    case THIS_TK:
1627 	    case RETURN_TK:
1628 	    case BREAK_TK:
1629 	    case CONTINUE_TK:
1630 	    case TRY_TK:
1631 	    case CATCH_TK:
1632 	    case THROW_TK:
1633 	    case INSTANCEOF_TK:
1634 	      BUILD_OPERATOR (kw->token);
1635 
1636 	    default:
1637 	      return kw->token;
1638 	    }
1639 	}
1640     }
1641 
1642   /* We may have an ID here.  */
1643   if (JAVA_START_CHAR_P (first_unicode))
1644     {
1645       JAVA_LEX_ID (string);
1646       java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1647       return ID_TK;
1648     }
1649 
1650   /* Everything else is an invalid character in the input.  */
1651   {
1652     char lex_error_buffer [128];
1653     sprintf (lex_error_buffer, "Invalid character `%s' in input",
1654 	     java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1655     java_lex_error (lex_error_buffer, 1);
1656   }
1657   return 0;
1658 }
1659 
1660 #ifndef JC1_LITE
1661 /* This is called by the parser to see if an error should be generated
1662    due to numeric overflow.  This function only handles the particular
1663    case of the largest negative value, and is only called in the case
1664    where this value is not preceded by `-'.  */
1665 static void
error_if_numeric_overflow(value)1666 error_if_numeric_overflow (value)
1667      tree value;
1668 {
1669   if (TREE_CODE (value) == INTEGER_CST
1670       && JAVA_RADIX10_FLAG (value)
1671       && tree_int_cst_sgn (value) < 0)
1672     {
1673       if (TREE_TYPE (value) == long_type_node)
1674 	java_lex_error ("Numeric overflow for `long' literal", 0);
1675       else
1676 	java_lex_error ("Numeric overflow for `int' literal", 0);
1677     }
1678 }
1679 #endif /* JC1_LITE */
1680 
1681 static void
java_unicode_2_utf8(unicode)1682 java_unicode_2_utf8 (unicode)
1683     unicode_t unicode;
1684 {
1685   if (RANGE (unicode, 0x01, 0x7f))
1686     obstack_1grow (&temporary_obstack, (char)unicode);
1687   else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1688     {
1689       obstack_1grow (&temporary_obstack,
1690 		     (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1691       obstack_1grow (&temporary_obstack,
1692 		     (unsigned char)(0x80 | (unicode & 0x3f)));
1693     }
1694   else				/* Range 0x800-0xffff.  */
1695     {
1696       obstack_1grow (&temporary_obstack,
1697 		     (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1698       obstack_1grow (&temporary_obstack,
1699 		     (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1700       obstack_1grow (&temporary_obstack,
1701 		     (unsigned char)(0x80 | (unicode & 0x003f)));
1702     }
1703 }
1704 
1705 #ifndef JC1_LITE
1706 static tree
build_wfl_node(node)1707 build_wfl_node (node)
1708      tree node;
1709 {
1710   node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1711   /* Prevent java_complete_lhs from short-circuiting node (if constant).  */
1712   TREE_TYPE (node) = NULL_TREE;
1713   return node;
1714 }
1715 #endif
1716 
1717 static void
java_lex_error(msg,forward)1718 java_lex_error (msg, forward)
1719      const char *msg ATTRIBUTE_UNUSED;
1720      int forward ATTRIBUTE_UNUSED;
1721 {
1722 #ifndef JC1_LITE
1723   ctxp->elc.line = ctxp->c_line->lineno;
1724   ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1725 
1726   /* Might be caught in the middle of some error report.  */
1727   ctxp->java_error_flag = 0;
1728   java_error (NULL);
1729   java_error (msg);
1730 #endif
1731 }
1732 
1733 #ifndef JC1_LITE
1734 static int
java_is_eol(fp,c)1735 java_is_eol (fp, c)
1736   FILE *fp;
1737   int c;
1738 {
1739   int next;
1740   switch (c)
1741     {
1742     case '\r':
1743       next = getc (fp);
1744       if (next != '\n' && next != EOF)
1745 	ungetc (next, fp);
1746       return 1;
1747     case '\n':
1748       return 1;
1749     default:
1750       return 0;
1751     }
1752 }
1753 #endif
1754 
1755 char *
java_get_line_col(filename,line,col)1756 java_get_line_col (filename, line, col)
1757      const char *filename ATTRIBUTE_UNUSED;
1758      int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1759 {
1760 #ifdef JC1_LITE
1761   return 0;
1762 #else
1763   /* Dumb implementation. Doesn't try to cache or optimize things.  */
1764   /* First line of the file is line 1, first column is 1.  */
1765 
1766   /* COL == -1 means, at the CR/LF in LINE.  */
1767   /* COL == -2 means, at the first non space char in LINE.  */
1768 
1769   FILE *fp;
1770   int c, ccol, cline = 1;
1771   int current_line_col = 0;
1772   int first_non_space = 0;
1773   char *base;
1774 
1775   if (!(fp = fopen (filename, "r")))
1776     fatal_io_error ("can't open %s", filename);
1777 
1778   while (cline != line)
1779     {
1780       c = getc (fp);
1781       if (c == EOF)
1782 	{
1783 	  static const char msg[] = "<<file too short - unexpected EOF>>";
1784 	  obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1785 	  goto have_line;
1786 	}
1787       if (java_is_eol (fp, c))
1788 	cline++;
1789     }
1790 
1791   /* Gather the chars of the current line in a buffer.  */
1792   for (;;)
1793     {
1794       c = getc (fp);
1795       if (c < 0 || java_is_eol (fp, c))
1796 	break;
1797       if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1798 	first_non_space = current_line_col;
1799       obstack_1grow (&temporary_obstack, c);
1800       current_line_col++;
1801     }
1802  have_line:
1803 
1804   obstack_1grow (&temporary_obstack, '\n');
1805 
1806   if (col == -1)
1807     {
1808       col = current_line_col;
1809       first_non_space = 0;
1810     }
1811   else if (col == -2)
1812     col = first_non_space;
1813   else
1814     first_non_space = 0;
1815 
1816   /* Place the '^' a the right position.  */
1817   base = obstack_base (&temporary_obstack);
1818   for (ccol = 1; ccol <= col+3; ccol++)
1819     {
1820       /* Compute \t when reaching first_non_space.  */
1821       char c = (first_non_space ?
1822 		(base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1823       obstack_1grow (&temporary_obstack, c);
1824     }
1825   obstack_grow0 (&temporary_obstack, "^", 1);
1826 
1827   fclose (fp);
1828   return obstack_finish (&temporary_obstack);
1829 #endif
1830 }
1831 
1832 #ifndef JC1_LITE
1833 static int
utf8_cmp(str,length,name)1834 utf8_cmp (str, length, name)
1835      const unsigned char *str;
1836      int length;
1837      const char *name;
1838 {
1839   const unsigned char *limit = str + length;
1840   int i;
1841 
1842   for (i = 0; name[i]; ++i)
1843     {
1844       int ch = UTF8_GET (str, limit);
1845       if (ch != name[i])
1846 	return ch - name[i];
1847     }
1848 
1849   return str == limit ? 0 : 1;
1850 }
1851 
1852 /* A sorted list of all C++ keywords.  */
1853 
1854 static const char *const cxx_keywords[] =
1855 {
1856   "_Complex",
1857   "__alignof",
1858   "__alignof__",
1859   "__asm",
1860   "__asm__",
1861   "__attribute",
1862   "__attribute__",
1863   "__builtin_va_arg",
1864   "__complex",
1865   "__complex__",
1866   "__const",
1867   "__const__",
1868   "__extension__",
1869   "__imag",
1870   "__imag__",
1871   "__inline",
1872   "__inline__",
1873   "__label__",
1874   "__null",
1875   "__real",
1876   "__real__",
1877   "__restrict",
1878   "__restrict__",
1879   "__signed",
1880   "__signed__",
1881   "__typeof",
1882   "__typeof__",
1883   "__volatile",
1884   "__volatile__",
1885   "and",
1886   "and_eq",
1887   "asm",
1888   "auto",
1889   "bitand",
1890   "bitor",
1891   "bool",
1892   "break",
1893   "case",
1894   "catch",
1895   "char",
1896   "class",
1897   "compl",
1898   "const",
1899   "const_cast",
1900   "continue",
1901   "default",
1902   "delete",
1903   "do",
1904   "double",
1905   "dynamic_cast",
1906   "else",
1907   "enum",
1908   "explicit",
1909   "export",
1910   "extern",
1911   "false",
1912   "float",
1913   "for",
1914   "friend",
1915   "goto",
1916   "if",
1917   "inline",
1918   "int",
1919   "long",
1920   "mutable",
1921   "namespace",
1922   "new",
1923   "not",
1924   "not_eq",
1925   "operator",
1926   "or",
1927   "or_eq",
1928   "private",
1929   "protected",
1930   "public",
1931   "register",
1932   "reinterpret_cast",
1933   "return",
1934   "short",
1935   "signed",
1936   "sizeof",
1937   "static",
1938   "static_cast",
1939   "struct",
1940   "switch",
1941   "template",
1942   "this",
1943   "throw",
1944   "true",
1945   "try",
1946   "typedef",
1947   "typeid",
1948   "typename",
1949   "typeof",
1950   "union",
1951   "unsigned",
1952   "using",
1953   "virtual",
1954   "void",
1955   "volatile",
1956   "wchar_t",
1957   "while",
1958   "xor",
1959   "xor_eq"
1960 };
1961 
1962 /* Return true if NAME is a C++ keyword.  */
1963 
1964 int
cxx_keyword_p(name,length)1965 cxx_keyword_p (name, length)
1966      const char *name;
1967      int length;
1968 {
1969   int last = ARRAY_SIZE (cxx_keywords);
1970   int first = 0;
1971   int mid = (last + first) / 2;
1972   int old = -1;
1973 
1974   for (mid = (last + first) / 2;
1975        mid != old;
1976        old = mid, mid = (last + first) / 2)
1977     {
1978       int kwl = strlen (cxx_keywords[mid]);
1979       int min_length = kwl > length ? length : kwl;
1980       int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1981 
1982       if (r == 0)
1983 	{
1984 	  int i;
1985 	  /* We've found a match if all the remaining characters are `$'.  */
1986 	  for (i = min_length; i < length && name[i] == '$'; ++i)
1987 	    ;
1988 	  if (i == length)
1989 	    return 1;
1990 	  r = 1;
1991 	}
1992 
1993       if (r < 0)
1994 	last = mid;
1995       else
1996 	first = mid;
1997     }
1998   return 0;
1999 }
2000 #endif /* JC1_LITE */
2001