xref: /openbsd/gnu/usr.bin/gcc/gcc/java/lex.c (revision c87b03e5)
1*c87b03e5Sespie /* Language lexer for the GNU compiler for the Java(TM) language.
2*c87b03e5Sespie    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3*c87b03e5Sespie    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
4*c87b03e5Sespie 
5*c87b03e5Sespie This file is part of GNU CC.
6*c87b03e5Sespie 
7*c87b03e5Sespie GNU CC is free software; you can redistribute it and/or modify
8*c87b03e5Sespie it under the terms of the GNU General Public License as published by
9*c87b03e5Sespie the Free Software Foundation; either version 2, or (at your option)
10*c87b03e5Sespie any later version.
11*c87b03e5Sespie 
12*c87b03e5Sespie GNU CC is distributed in the hope that it will be useful,
13*c87b03e5Sespie but WITHOUT ANY WARRANTY; without even the implied warranty of
14*c87b03e5Sespie MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15*c87b03e5Sespie GNU General Public License for more details.
16*c87b03e5Sespie 
17*c87b03e5Sespie You should have received a copy of the GNU General Public License
18*c87b03e5Sespie along with GNU CC; see the file COPYING.  If not, write to
19*c87b03e5Sespie the Free Software Foundation, 59 Temple Place - Suite 330,
20*c87b03e5Sespie Boston, MA 02111-1307, USA.
21*c87b03e5Sespie 
22*c87b03e5Sespie Java and all Java-based marks are trademarks or registered trademarks
23*c87b03e5Sespie of Sun Microsystems, Inc. in the United States and other countries.
24*c87b03e5Sespie The Free Software Foundation is independent of Sun Microsystems, Inc.  */
25*c87b03e5Sespie 
26*c87b03e5Sespie /* It defines java_lex (yylex) that reads a Java ASCII source file
27*c87b03e5Sespie    possibly containing Unicode escape sequence or utf8 encoded
28*c87b03e5Sespie    characters and returns a token for everything found but comments,
29*c87b03e5Sespie    white spaces and line terminators. When necessary, it also fills
30*c87b03e5Sespie    the java_lval (yylval) union. It's implemented to be called by a
31*c87b03e5Sespie    re-entrant parser generated by Bison.
32*c87b03e5Sespie 
33*c87b03e5Sespie    The lexical analysis conforms to the Java grammar described in "The
34*c87b03e5Sespie    Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35*c87b03e5Sespie    Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
36*c87b03e5Sespie 
37*c87b03e5Sespie #include "keyword.h"
38*c87b03e5Sespie #include "flags.h"
39*c87b03e5Sespie #include "chartables.h"
40*c87b03e5Sespie 
41*c87b03e5Sespie /* Function declarations.  */
42*c87b03e5Sespie static char *java_sprint_unicode PARAMS ((struct java_line *, int));
43*c87b03e5Sespie static void java_unicode_2_utf8 PARAMS ((unicode_t));
44*c87b03e5Sespie static void java_lex_error PARAMS ((const char *, int));
45*c87b03e5Sespie #ifndef JC1_LITE
46*c87b03e5Sespie static int java_is_eol PARAMS ((FILE *, int));
47*c87b03e5Sespie static tree build_wfl_node PARAMS ((tree));
48*c87b03e5Sespie #endif
49*c87b03e5Sespie static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
50*c87b03e5Sespie static int java_parse_escape_sequence PARAMS ((void));
51*c87b03e5Sespie static int java_start_char_p PARAMS ((unicode_t));
52*c87b03e5Sespie static int java_part_char_p PARAMS ((unicode_t));
53*c87b03e5Sespie static int java_parse_doc_section PARAMS ((int));
54*c87b03e5Sespie static void java_parse_end_comment PARAMS ((int));
55*c87b03e5Sespie static int java_get_unicode PARAMS ((void));
56*c87b03e5Sespie static int java_read_unicode PARAMS ((java_lexer *, int *));
57*c87b03e5Sespie static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
58*c87b03e5Sespie 							     int *));
59*c87b03e5Sespie static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60*c87b03e5Sespie static int java_read_char PARAMS ((java_lexer *));
61*c87b03e5Sespie static void java_allocate_new_line PARAMS ((void));
62*c87b03e5Sespie static void java_unget_unicode PARAMS ((void));
63*c87b03e5Sespie static unicode_t java_sneak_unicode PARAMS ((void));
64*c87b03e5Sespie #ifndef JC1_LITE
65*c87b03e5Sespie static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
66*c87b03e5Sespie #endif
67*c87b03e5Sespie 
68*c87b03e5Sespie java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
69*c87b03e5Sespie #ifndef JC1_LITE
70*c87b03e5Sespie static void error_if_numeric_overflow PARAMS ((tree));
71*c87b03e5Sespie #endif
72*c87b03e5Sespie 
73*c87b03e5Sespie #ifdef HAVE_ICONV
74*c87b03e5Sespie /* This is nonzero if we have initialized `need_byteswap'.  */
75*c87b03e5Sespie static int byteswap_init = 0;
76*c87b03e5Sespie 
77*c87b03e5Sespie /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
78*c87b03e5Sespie    big-endian order -- not native endian order.  We handle this by
79*c87b03e5Sespie    doing a conversion once at startup and seeing what happens.  This
80*c87b03e5Sespie    flag holds the results of this determination.  */
81*c87b03e5Sespie static int need_byteswap = 0;
82*c87b03e5Sespie #endif
83*c87b03e5Sespie 
84*c87b03e5Sespie void
java_init_lex(finput,encoding)85*c87b03e5Sespie java_init_lex (finput, encoding)
86*c87b03e5Sespie      FILE *finput;
87*c87b03e5Sespie      const char *encoding;
88*c87b03e5Sespie {
89*c87b03e5Sespie #ifndef JC1_LITE
90*c87b03e5Sespie   int java_lang_imported = 0;
91*c87b03e5Sespie 
92*c87b03e5Sespie   if (!java_lang_id)
93*c87b03e5Sespie     java_lang_id = get_identifier ("java.lang");
94*c87b03e5Sespie   if (!inst_id)
95*c87b03e5Sespie     inst_id = get_identifier ("inst$");
96*c87b03e5Sespie   if (!wpv_id)
97*c87b03e5Sespie     wpv_id = get_identifier ("write_parm_value$");
98*c87b03e5Sespie 
99*c87b03e5Sespie   if (!java_lang_imported)
100*c87b03e5Sespie     {
101*c87b03e5Sespie       tree node = build_tree_list
102*c87b03e5Sespie 	(build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
103*c87b03e5Sespie       read_import_dir (TREE_PURPOSE (node));
104*c87b03e5Sespie       TREE_CHAIN (node) = ctxp->import_demand_list;
105*c87b03e5Sespie       ctxp->import_demand_list = node;
106*c87b03e5Sespie       java_lang_imported = 1;
107*c87b03e5Sespie     }
108*c87b03e5Sespie 
109*c87b03e5Sespie   if (!wfl_operator)
110*c87b03e5Sespie     wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
111*c87b03e5Sespie   if (!label_id)
112*c87b03e5Sespie     label_id = get_identifier ("$L");
113*c87b03e5Sespie   if (!wfl_append)
114*c87b03e5Sespie     wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
115*c87b03e5Sespie   if (!wfl_string_buffer)
116*c87b03e5Sespie     wfl_string_buffer =
117*c87b03e5Sespie       build_expr_wfl (get_identifier (flag_emit_class_files
118*c87b03e5Sespie 				      ? "java.lang.StringBuffer"
119*c87b03e5Sespie 				      : "gnu.gcj.runtime.StringBuffer"),
120*c87b03e5Sespie 		      NULL, 0, 0);
121*c87b03e5Sespie   if (!wfl_to_string)
122*c87b03e5Sespie     wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
123*c87b03e5Sespie 
124*c87b03e5Sespie   CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
125*c87b03e5Sespie     CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
126*c87b03e5Sespie 
127*c87b03e5Sespie   memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
128*c87b03e5Sespie   memset (current_jcf, 0, sizeof (JCF));
129*c87b03e5Sespie   ctxp->current_parsed_class = NULL;
130*c87b03e5Sespie   ctxp->package = NULL_TREE;
131*c87b03e5Sespie #endif
132*c87b03e5Sespie 
133*c87b03e5Sespie   ctxp->filename = input_filename;
134*c87b03e5Sespie   ctxp->lineno = lineno = 0;
135*c87b03e5Sespie   ctxp->p_line = NULL;
136*c87b03e5Sespie   ctxp->c_line = NULL;
137*c87b03e5Sespie   ctxp->java_error_flag = 0;
138*c87b03e5Sespie   ctxp->lexer = java_new_lexer (finput, encoding);
139*c87b03e5Sespie }
140*c87b03e5Sespie 
141*c87b03e5Sespie static char *
java_sprint_unicode(line,i)142*c87b03e5Sespie java_sprint_unicode (line, i)
143*c87b03e5Sespie     struct java_line *line;
144*c87b03e5Sespie     int i;
145*c87b03e5Sespie {
146*c87b03e5Sespie   static char buffer [10];
147*c87b03e5Sespie   if (line->unicode_escape_p [i] || line->line [i] > 128)
148*c87b03e5Sespie     sprintf (buffer, "\\u%04x", line->line [i]);
149*c87b03e5Sespie   else
150*c87b03e5Sespie     {
151*c87b03e5Sespie       buffer [0] = line->line [i];
152*c87b03e5Sespie       buffer [1] = '\0';
153*c87b03e5Sespie     }
154*c87b03e5Sespie   return buffer;
155*c87b03e5Sespie }
156*c87b03e5Sespie 
157*c87b03e5Sespie static unicode_t
java_sneak_unicode()158*c87b03e5Sespie java_sneak_unicode ()
159*c87b03e5Sespie {
160*c87b03e5Sespie   return (ctxp->c_line->line [ctxp->c_line->current]);
161*c87b03e5Sespie }
162*c87b03e5Sespie 
163*c87b03e5Sespie static void
java_unget_unicode()164*c87b03e5Sespie java_unget_unicode ()
165*c87b03e5Sespie {
166*c87b03e5Sespie   if (!ctxp->c_line->current)
167*c87b03e5Sespie     /* Can't unget unicode.  */
168*c87b03e5Sespie     abort ();
169*c87b03e5Sespie 
170*c87b03e5Sespie   ctxp->c_line->current--;
171*c87b03e5Sespie   ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
172*c87b03e5Sespie }
173*c87b03e5Sespie 
174*c87b03e5Sespie static void
java_allocate_new_line()175*c87b03e5Sespie java_allocate_new_line ()
176*c87b03e5Sespie {
177*c87b03e5Sespie   unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
178*c87b03e5Sespie   char ahead_escape_p = (ctxp->c_line ?
179*c87b03e5Sespie 			 ctxp->c_line->unicode_escape_ahead_p : 0);
180*c87b03e5Sespie 
181*c87b03e5Sespie   if (ctxp->c_line && !ctxp->c_line->white_space_only)
182*c87b03e5Sespie     {
183*c87b03e5Sespie       if (ctxp->p_line)
184*c87b03e5Sespie 	{
185*c87b03e5Sespie 	  free (ctxp->p_line->unicode_escape_p);
186*c87b03e5Sespie 	  free (ctxp->p_line->line);
187*c87b03e5Sespie 	  free (ctxp->p_line);
188*c87b03e5Sespie 	}
189*c87b03e5Sespie       ctxp->p_line = ctxp->c_line;
190*c87b03e5Sespie       ctxp->c_line = NULL;		/* Reallocated.  */
191*c87b03e5Sespie     }
192*c87b03e5Sespie 
193*c87b03e5Sespie   if (!ctxp->c_line)
194*c87b03e5Sespie     {
195*c87b03e5Sespie       ctxp->c_line = xmalloc (sizeof (struct java_line));
196*c87b03e5Sespie       ctxp->c_line->max = JAVA_LINE_MAX;
197*c87b03e5Sespie       ctxp->c_line->line = xmalloc (sizeof (unicode_t)*ctxp->c_line->max);
198*c87b03e5Sespie       ctxp->c_line->unicode_escape_p =
199*c87b03e5Sespie 	xmalloc (sizeof (char)*ctxp->c_line->max);
200*c87b03e5Sespie       ctxp->c_line->white_space_only = 0;
201*c87b03e5Sespie     }
202*c87b03e5Sespie 
203*c87b03e5Sespie   ctxp->c_line->line [0] = ctxp->c_line->size = 0;
204*c87b03e5Sespie   ctxp->c_line->char_col = ctxp->c_line->current = 0;
205*c87b03e5Sespie   if (ahead)
206*c87b03e5Sespie     {
207*c87b03e5Sespie       ctxp->c_line->line [ctxp->c_line->size] = ahead;
208*c87b03e5Sespie       ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
209*c87b03e5Sespie       ctxp->c_line->size++;
210*c87b03e5Sespie     }
211*c87b03e5Sespie   ctxp->c_line->ahead [0] = 0;
212*c87b03e5Sespie   ctxp->c_line->unicode_escape_ahead_p = 0;
213*c87b03e5Sespie   ctxp->c_line->lineno = ++lineno;
214*c87b03e5Sespie   ctxp->c_line->white_space_only = 1;
215*c87b03e5Sespie }
216*c87b03e5Sespie 
217*c87b03e5Sespie /* Create a new lexer object.  */
218*c87b03e5Sespie 
219*c87b03e5Sespie java_lexer *
java_new_lexer(finput,encoding)220*c87b03e5Sespie java_new_lexer (finput, encoding)
221*c87b03e5Sespie      FILE *finput;
222*c87b03e5Sespie      const char *encoding;
223*c87b03e5Sespie {
224*c87b03e5Sespie   java_lexer *lex = xmalloc (sizeof (java_lexer));
225*c87b03e5Sespie   int enc_error = 0;
226*c87b03e5Sespie 
227*c87b03e5Sespie   lex->finput = finput;
228*c87b03e5Sespie   lex->bs_count = 0;
229*c87b03e5Sespie   lex->unget_value = 0;
230*c87b03e5Sespie   lex->hit_eof = 0;
231*c87b03e5Sespie 
232*c87b03e5Sespie #ifdef HAVE_ICONV
233*c87b03e5Sespie   lex->handle = iconv_open ("UCS-2", encoding);
234*c87b03e5Sespie   if (lex->handle != (iconv_t) -1)
235*c87b03e5Sespie     {
236*c87b03e5Sespie       lex->first = -1;
237*c87b03e5Sespie       lex->last = -1;
238*c87b03e5Sespie       lex->out_first = -1;
239*c87b03e5Sespie       lex->out_last = -1;
240*c87b03e5Sespie       lex->read_anything = 0;
241*c87b03e5Sespie       lex->use_fallback = 0;
242*c87b03e5Sespie 
243*c87b03e5Sespie       /* Work around broken iconv() implementations by doing checking at
244*c87b03e5Sespie 	 runtime.  We assume that if the UTF-8 => UCS-2 encoder is broken,
245*c87b03e5Sespie 	 then all UCS-2 encoders will be broken.  Perhaps not a valid
246*c87b03e5Sespie 	 assumption.  */
247*c87b03e5Sespie       if (! byteswap_init)
248*c87b03e5Sespie 	{
249*c87b03e5Sespie 	  iconv_t handle;
250*c87b03e5Sespie 
251*c87b03e5Sespie 	  byteswap_init = 1;
252*c87b03e5Sespie 
253*c87b03e5Sespie 	  handle = iconv_open ("UCS-2", "UTF-8");
254*c87b03e5Sespie 	  if (handle != (iconv_t) -1)
255*c87b03e5Sespie 	    {
256*c87b03e5Sespie 	      unicode_t result;
257*c87b03e5Sespie 	      unsigned char in[3];
258*c87b03e5Sespie 	      char *inp, *outp;
259*c87b03e5Sespie 	      size_t inc, outc, r;
260*c87b03e5Sespie 
261*c87b03e5Sespie 	      /* This is the UTF-8 encoding of \ufeff.  */
262*c87b03e5Sespie 	      in[0] = 0xef;
263*c87b03e5Sespie 	      in[1] = 0xbb;
264*c87b03e5Sespie 	      in[2] = 0xbf;
265*c87b03e5Sespie 
266*c87b03e5Sespie 	      inp = in;
267*c87b03e5Sespie 	      inc = 3;
268*c87b03e5Sespie 	      outp = (char *) &result;
269*c87b03e5Sespie 	      outc = 2;
270*c87b03e5Sespie 
271*c87b03e5Sespie 	      r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
272*c87b03e5Sespie 			 &outp, &outc);
273*c87b03e5Sespie 	      iconv_close (handle);
274*c87b03e5Sespie 	      /* Conversion must be complete for us to use the result.  */
275*c87b03e5Sespie 	      if (r != (size_t) -1 && inc == 0 && outc == 0)
276*c87b03e5Sespie 		need_byteswap = (result != 0xfeff);
277*c87b03e5Sespie 	    }
278*c87b03e5Sespie 	}
279*c87b03e5Sespie 
280*c87b03e5Sespie       lex->byte_swap = need_byteswap;
281*c87b03e5Sespie     }
282*c87b03e5Sespie   else
283*c87b03e5Sespie #endif /* HAVE_ICONV */
284*c87b03e5Sespie     {
285*c87b03e5Sespie       /* If iconv failed, use the internal decoder if the default
286*c87b03e5Sespie 	 encoding was requested.  This code is used on platforms where
287*c87b03e5Sespie 	 iconv exists but is insufficient for our needs.  For
288*c87b03e5Sespie 	 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
289*c87b03e5Sespie 
290*c87b03e5Sespie 	 On Solaris the default encoding, as returned by nl_langinfo(),
291*c87b03e5Sespie 	 is `646' (aka ASCII), but the Solaris iconv_open() doesn't
292*c87b03e5Sespie 	 understand that.  We work around that by pretending
293*c87b03e5Sespie 	 `646' to be the same as UTF-8.   */
294*c87b03e5Sespie       if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
295*c87b03e5Sespie 	enc_error = 1;
296*c87b03e5Sespie #ifdef HAVE_ICONV
297*c87b03e5Sespie       else
298*c87b03e5Sespie 	lex->use_fallback = 1;
299*c87b03e5Sespie #endif /* HAVE_ICONV */
300*c87b03e5Sespie     }
301*c87b03e5Sespie 
302*c87b03e5Sespie   if (enc_error)
303*c87b03e5Sespie     fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation.  If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
304*c87b03e5Sespie 
305*c87b03e5Sespie   return lex;
306*c87b03e5Sespie }
307*c87b03e5Sespie 
308*c87b03e5Sespie void
java_destroy_lexer(lex)309*c87b03e5Sespie java_destroy_lexer (lex)
310*c87b03e5Sespie      java_lexer *lex;
311*c87b03e5Sespie {
312*c87b03e5Sespie #ifdef HAVE_ICONV
313*c87b03e5Sespie   if (! lex->use_fallback)
314*c87b03e5Sespie     iconv_close (lex->handle);
315*c87b03e5Sespie #endif
316*c87b03e5Sespie   free (lex);
317*c87b03e5Sespie }
318*c87b03e5Sespie 
319*c87b03e5Sespie static int
java_read_char(lex)320*c87b03e5Sespie java_read_char (lex)
321*c87b03e5Sespie      java_lexer *lex;
322*c87b03e5Sespie {
323*c87b03e5Sespie   if (lex->unget_value)
324*c87b03e5Sespie     {
325*c87b03e5Sespie       unicode_t r = lex->unget_value;
326*c87b03e5Sespie       lex->unget_value = 0;
327*c87b03e5Sespie       return r;
328*c87b03e5Sespie     }
329*c87b03e5Sespie 
330*c87b03e5Sespie #ifdef HAVE_ICONV
331*c87b03e5Sespie   if (! lex->use_fallback)
332*c87b03e5Sespie     {
333*c87b03e5Sespie       size_t ir, inbytesleft, in_save, out_count, out_save;
334*c87b03e5Sespie       char *inp, *outp;
335*c87b03e5Sespie       unicode_t result;
336*c87b03e5Sespie 
337*c87b03e5Sespie       /* If there is data which has already been converted, use it.  */
338*c87b03e5Sespie       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
339*c87b03e5Sespie 	{
340*c87b03e5Sespie 	  lex->out_first = 0;
341*c87b03e5Sespie 	  lex->out_last = 0;
342*c87b03e5Sespie 
343*c87b03e5Sespie 	  while (1)
344*c87b03e5Sespie 	    {
345*c87b03e5Sespie 	      /* See if we need to read more data.  If FIRST == 0 then
346*c87b03e5Sespie 		 the previous conversion attempt ended in the middle of
347*c87b03e5Sespie 		 a character at the end of the buffer.  Otherwise we
348*c87b03e5Sespie 		 only have to read if the buffer is empty.  */
349*c87b03e5Sespie 	      if (lex->first == 0 || lex->first >= lex->last)
350*c87b03e5Sespie 		{
351*c87b03e5Sespie 		  int r;
352*c87b03e5Sespie 
353*c87b03e5Sespie 		  if (lex->first >= lex->last)
354*c87b03e5Sespie 		    {
355*c87b03e5Sespie 		      lex->first = 0;
356*c87b03e5Sespie 		      lex->last = 0;
357*c87b03e5Sespie 		    }
358*c87b03e5Sespie 		  if (feof (lex->finput))
359*c87b03e5Sespie 		    return UEOF;
360*c87b03e5Sespie 		  r = fread (&lex->buffer[lex->last], 1,
361*c87b03e5Sespie 			     sizeof (lex->buffer) - lex->last,
362*c87b03e5Sespie 			     lex->finput);
363*c87b03e5Sespie 		  lex->last += r;
364*c87b03e5Sespie 		}
365*c87b03e5Sespie 
366*c87b03e5Sespie 	      inbytesleft = lex->last - lex->first;
367*c87b03e5Sespie 	      out_count = sizeof (lex->out_buffer) - lex->out_last;
368*c87b03e5Sespie 
369*c87b03e5Sespie 	      if (inbytesleft == 0)
370*c87b03e5Sespie 		{
371*c87b03e5Sespie 		  /* We've tried to read and there is nothing left.  */
372*c87b03e5Sespie 		  return UEOF;
373*c87b03e5Sespie 		}
374*c87b03e5Sespie 
375*c87b03e5Sespie 	      in_save = inbytesleft;
376*c87b03e5Sespie 	      out_save = out_count;
377*c87b03e5Sespie 	      inp = &lex->buffer[lex->first];
378*c87b03e5Sespie 	      outp = &lex->out_buffer[lex->out_last];
379*c87b03e5Sespie 	      ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
380*c87b03e5Sespie 			  &inbytesleft, &outp, &out_count);
381*c87b03e5Sespie 
382*c87b03e5Sespie 	      /* If we haven't read any bytes, then look to see if we
383*c87b03e5Sespie 		 have read a BOM.  */
384*c87b03e5Sespie 	      if (! lex->read_anything && out_save - out_count >= 2)
385*c87b03e5Sespie 		{
386*c87b03e5Sespie 		  unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
387*c87b03e5Sespie 		  if (uc == 0xfeff)
388*c87b03e5Sespie 		    {
389*c87b03e5Sespie 		      lex->byte_swap = 0;
390*c87b03e5Sespie 		      lex->out_first += 2;
391*c87b03e5Sespie 		    }
392*c87b03e5Sespie 		  else if (uc == 0xfffe)
393*c87b03e5Sespie 		    {
394*c87b03e5Sespie 		      lex->byte_swap = 1;
395*c87b03e5Sespie 		      lex->out_first += 2;
396*c87b03e5Sespie 		    }
397*c87b03e5Sespie 		  lex->read_anything = 1;
398*c87b03e5Sespie 		}
399*c87b03e5Sespie 
400*c87b03e5Sespie 	      if (lex->byte_swap)
401*c87b03e5Sespie 		{
402*c87b03e5Sespie 		  unsigned int i;
403*c87b03e5Sespie 		  for (i = 0; i < out_save - out_count; i += 2)
404*c87b03e5Sespie 		    {
405*c87b03e5Sespie 		      char t = lex->out_buffer[lex->out_last + i];
406*c87b03e5Sespie 		      lex->out_buffer[lex->out_last + i]
407*c87b03e5Sespie 			= lex->out_buffer[lex->out_last + i + 1];
408*c87b03e5Sespie 		      lex->out_buffer[lex->out_last + i + 1] = t;
409*c87b03e5Sespie 		    }
410*c87b03e5Sespie 		}
411*c87b03e5Sespie 
412*c87b03e5Sespie 	      lex->first += in_save - inbytesleft;
413*c87b03e5Sespie 	      lex->out_last += out_save - out_count;
414*c87b03e5Sespie 
415*c87b03e5Sespie 	      /* If we converted anything at all, move along.  */
416*c87b03e5Sespie 	      if (out_count != out_save)
417*c87b03e5Sespie 		break;
418*c87b03e5Sespie 
419*c87b03e5Sespie 	      if (ir == (size_t) -1)
420*c87b03e5Sespie 		{
421*c87b03e5Sespie 		  if (errno == EINVAL)
422*c87b03e5Sespie 		    {
423*c87b03e5Sespie 		      /* This is ok.  This means that the end of our buffer
424*c87b03e5Sespie 			 is in the middle of a character sequence.  We just
425*c87b03e5Sespie 			 move the valid part of the buffer to the beginning
426*c87b03e5Sespie 			 to force a read.  */
427*c87b03e5Sespie 		      memmove (&lex->buffer[0], &lex->buffer[lex->first],
428*c87b03e5Sespie 			       lex->last - lex->first);
429*c87b03e5Sespie 		      lex->last -= lex->first;
430*c87b03e5Sespie 		      lex->first = 0;
431*c87b03e5Sespie 		    }
432*c87b03e5Sespie 		  else
433*c87b03e5Sespie 		    {
434*c87b03e5Sespie 		      /* A more serious error.  */
435*c87b03e5Sespie 		      java_lex_error ("unrecognized character in input stream",
436*c87b03e5Sespie 				      0);
437*c87b03e5Sespie 		      return UEOF;
438*c87b03e5Sespie 		    }
439*c87b03e5Sespie 		}
440*c87b03e5Sespie 	    }
441*c87b03e5Sespie 	}
442*c87b03e5Sespie 
443*c87b03e5Sespie       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
444*c87b03e5Sespie 	{
445*c87b03e5Sespie 	  /* Don't have any data.  */
446*c87b03e5Sespie 	  return UEOF;
447*c87b03e5Sespie 	}
448*c87b03e5Sespie 
449*c87b03e5Sespie       /* Success.  */
450*c87b03e5Sespie       result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
451*c87b03e5Sespie       lex->out_first += 2;
452*c87b03e5Sespie       return result;
453*c87b03e5Sespie     }
454*c87b03e5Sespie   else
455*c87b03e5Sespie #endif /* HAVE_ICONV */
456*c87b03e5Sespie     {
457*c87b03e5Sespie       int c, c1, c2;
458*c87b03e5Sespie       c = getc (lex->finput);
459*c87b03e5Sespie 
460*c87b03e5Sespie       if (c == EOF)
461*c87b03e5Sespie 	return UEOF;
462*c87b03e5Sespie       if (c < 128)
463*c87b03e5Sespie 	return (unicode_t) c;
464*c87b03e5Sespie       else
465*c87b03e5Sespie 	{
466*c87b03e5Sespie 	  if ((c & 0xe0) == 0xc0)
467*c87b03e5Sespie 	    {
468*c87b03e5Sespie 	      c1 = getc (lex->finput);
469*c87b03e5Sespie 	      if ((c1 & 0xc0) == 0x80)
470*c87b03e5Sespie 		{
471*c87b03e5Sespie 		  unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
472*c87b03e5Sespie 		  /* Check for valid 2-byte characters.  We explicitly
473*c87b03e5Sespie 		     allow \0 because this encoding is common in the
474*c87b03e5Sespie 		     Java world.  */
475*c87b03e5Sespie 		  if (r == 0 || (r >= 0x80 && r <= 0x7ff))
476*c87b03e5Sespie 		    return r;
477*c87b03e5Sespie 		}
478*c87b03e5Sespie 	    }
479*c87b03e5Sespie 	  else if ((c & 0xf0) == 0xe0)
480*c87b03e5Sespie 	    {
481*c87b03e5Sespie 	      c1 = getc (lex->finput);
482*c87b03e5Sespie 	      if ((c1 & 0xc0) == 0x80)
483*c87b03e5Sespie 		{
484*c87b03e5Sespie 		  c2 = getc (lex->finput);
485*c87b03e5Sespie 		  if ((c2 & 0xc0) == 0x80)
486*c87b03e5Sespie 		    {
487*c87b03e5Sespie 		      unicode_t r =  (unicode_t)(((c & 0xf) << 12) +
488*c87b03e5Sespie 						 (( c1 & 0x3f) << 6)
489*c87b03e5Sespie 						 + (c2 & 0x3f));
490*c87b03e5Sespie 		      /* Check for valid 3-byte characters.
491*c87b03e5Sespie 			 Don't allow surrogate, \ufffe or \uffff.  */
492*c87b03e5Sespie 		      if (IN_RANGE (r, 0x800, 0xffff)
493*c87b03e5Sespie 			  && ! IN_RANGE (r, 0xd800, 0xdfff)
494*c87b03e5Sespie 			  && r != 0xfffe && r != 0xffff)
495*c87b03e5Sespie 			return r;
496*c87b03e5Sespie 		    }
497*c87b03e5Sespie 		}
498*c87b03e5Sespie 	    }
499*c87b03e5Sespie 
500*c87b03e5Sespie 	  /* We simply don't support invalid characters.  We also
501*c87b03e5Sespie 	     don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
502*c87b03e5Sespie 	     cannot be valid Java characters.  */
503*c87b03e5Sespie 	  java_lex_error ("malformed UTF-8 character", 0);
504*c87b03e5Sespie 	}
505*c87b03e5Sespie     }
506*c87b03e5Sespie 
507*c87b03e5Sespie   /* We only get here on error.  */
508*c87b03e5Sespie   return UEOF;
509*c87b03e5Sespie }
510*c87b03e5Sespie 
511*c87b03e5Sespie static void
java_store_unicode(l,c,unicode_escape_p)512*c87b03e5Sespie java_store_unicode (l, c, unicode_escape_p)
513*c87b03e5Sespie     struct java_line *l;
514*c87b03e5Sespie     unicode_t c;
515*c87b03e5Sespie     int unicode_escape_p;
516*c87b03e5Sespie {
517*c87b03e5Sespie   if (l->size == l->max)
518*c87b03e5Sespie     {
519*c87b03e5Sespie       l->max += JAVA_LINE_MAX;
520*c87b03e5Sespie       l->line = xrealloc (l->line, sizeof (unicode_t)*l->max);
521*c87b03e5Sespie       l->unicode_escape_p = xrealloc (l->unicode_escape_p,
522*c87b03e5Sespie 				      sizeof (char)*l->max);
523*c87b03e5Sespie     }
524*c87b03e5Sespie   l->line [l->size] = c;
525*c87b03e5Sespie   l->unicode_escape_p [l->size++] = unicode_escape_p;
526*c87b03e5Sespie }
527*c87b03e5Sespie 
528*c87b03e5Sespie static int
java_read_unicode(lex,unicode_escape_p)529*c87b03e5Sespie java_read_unicode (lex, unicode_escape_p)
530*c87b03e5Sespie      java_lexer *lex;
531*c87b03e5Sespie      int *unicode_escape_p;
532*c87b03e5Sespie {
533*c87b03e5Sespie   int c;
534*c87b03e5Sespie 
535*c87b03e5Sespie   c = java_read_char (lex);
536*c87b03e5Sespie   *unicode_escape_p = 0;
537*c87b03e5Sespie 
538*c87b03e5Sespie   if (c != '\\')
539*c87b03e5Sespie     {
540*c87b03e5Sespie       lex->bs_count = 0;
541*c87b03e5Sespie       return c;
542*c87b03e5Sespie     }
543*c87b03e5Sespie 
544*c87b03e5Sespie   ++lex->bs_count;
545*c87b03e5Sespie   if ((lex->bs_count) % 2 == 1)
546*c87b03e5Sespie     {
547*c87b03e5Sespie       /* Odd number of \ seen.  */
548*c87b03e5Sespie       c = java_read_char (lex);
549*c87b03e5Sespie       if (c == 'u')
550*c87b03e5Sespie         {
551*c87b03e5Sespie 	  unicode_t unicode = 0;
552*c87b03e5Sespie 	  int shift = 12;
553*c87b03e5Sespie 
554*c87b03e5Sespie 	  /* Recognize any number of `u's in \u.  */
555*c87b03e5Sespie 	  while ((c = java_read_char (lex)) == 'u')
556*c87b03e5Sespie 	    ;
557*c87b03e5Sespie 
558*c87b03e5Sespie 	  shift = 12;
559*c87b03e5Sespie 	  do
560*c87b03e5Sespie 	    {
561*c87b03e5Sespie 	      if (c == UEOF)
562*c87b03e5Sespie 		{
563*c87b03e5Sespie 		  java_lex_error ("prematurely terminated \\u sequence", 0);
564*c87b03e5Sespie 		  return UEOF;
565*c87b03e5Sespie 		}
566*c87b03e5Sespie 
567*c87b03e5Sespie 	      if (hex_p (c))
568*c87b03e5Sespie 		unicode |= (unicode_t)(hex_value (c) << shift);
569*c87b03e5Sespie 	      else
570*c87b03e5Sespie 		{
571*c87b03e5Sespie 		  java_lex_error ("non-hex digit in \\u sequence", 0);
572*c87b03e5Sespie 		  break;
573*c87b03e5Sespie 		}
574*c87b03e5Sespie 
575*c87b03e5Sespie 	      c = java_read_char (lex);
576*c87b03e5Sespie 	      shift -= 4;
577*c87b03e5Sespie 	    }
578*c87b03e5Sespie 	  while (shift >= 0);
579*c87b03e5Sespie 
580*c87b03e5Sespie 	  if (c != UEOF)
581*c87b03e5Sespie 	    lex->unget_value = c;
582*c87b03e5Sespie 
583*c87b03e5Sespie 	  lex->bs_count = 0;
584*c87b03e5Sespie 	  *unicode_escape_p = 1;
585*c87b03e5Sespie 	  return unicode;
586*c87b03e5Sespie 	}
587*c87b03e5Sespie       lex->unget_value = c;
588*c87b03e5Sespie     }
589*c87b03e5Sespie   return (unicode_t) '\\';
590*c87b03e5Sespie }
591*c87b03e5Sespie 
592*c87b03e5Sespie static int
java_read_unicode_collapsing_terminators(lex,unicode_escape_p)593*c87b03e5Sespie java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
594*c87b03e5Sespie      java_lexer *lex;
595*c87b03e5Sespie      int *unicode_escape_p;
596*c87b03e5Sespie {
597*c87b03e5Sespie   int c = java_read_unicode (lex, unicode_escape_p);
598*c87b03e5Sespie 
599*c87b03e5Sespie   if (c == '\r')
600*c87b03e5Sespie     {
601*c87b03e5Sespie       /* We have to read ahead to see if we got \r\n.  In that case we
602*c87b03e5Sespie 	 return a single line terminator.  */
603*c87b03e5Sespie       int dummy;
604*c87b03e5Sespie       c = java_read_unicode (lex, &dummy);
605*c87b03e5Sespie       if (c != '\n' && c != UEOF)
606*c87b03e5Sespie 	lex->unget_value = c;
607*c87b03e5Sespie       /* In either case we must return a newline.  */
608*c87b03e5Sespie       c = '\n';
609*c87b03e5Sespie     }
610*c87b03e5Sespie 
611*c87b03e5Sespie   return c;
612*c87b03e5Sespie }
613*c87b03e5Sespie 
614*c87b03e5Sespie static int
java_get_unicode()615*c87b03e5Sespie java_get_unicode ()
616*c87b03e5Sespie {
617*c87b03e5Sespie   /* It's time to read a line when...  */
618*c87b03e5Sespie   if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
619*c87b03e5Sespie     {
620*c87b03e5Sespie       int c;
621*c87b03e5Sespie       int found_chars = 0;
622*c87b03e5Sespie 
623*c87b03e5Sespie       if (ctxp->lexer->hit_eof)
624*c87b03e5Sespie 	return UEOF;
625*c87b03e5Sespie 
626*c87b03e5Sespie       java_allocate_new_line ();
627*c87b03e5Sespie       if (ctxp->c_line->line[0] != '\n')
628*c87b03e5Sespie 	{
629*c87b03e5Sespie 	  for (;;)
630*c87b03e5Sespie 	    {
631*c87b03e5Sespie 	      int unicode_escape_p;
632*c87b03e5Sespie 	      c = java_read_unicode_collapsing_terminators (ctxp->lexer,
633*c87b03e5Sespie 							    &unicode_escape_p);
634*c87b03e5Sespie 	      if (c != UEOF)
635*c87b03e5Sespie 		{
636*c87b03e5Sespie 		  found_chars = 1;
637*c87b03e5Sespie 		  java_store_unicode (ctxp->c_line, c, unicode_escape_p);
638*c87b03e5Sespie 		  if (ctxp->c_line->white_space_only
639*c87b03e5Sespie 		      && !JAVA_WHITE_SPACE_P (c)
640*c87b03e5Sespie 		      && c != '\n')
641*c87b03e5Sespie 		    ctxp->c_line->white_space_only = 0;
642*c87b03e5Sespie 		}
643*c87b03e5Sespie 	      if ((c == '\n') || (c == UEOF))
644*c87b03e5Sespie 		break;
645*c87b03e5Sespie 	    }
646*c87b03e5Sespie 
647*c87b03e5Sespie 	  if (c == UEOF && ! found_chars)
648*c87b03e5Sespie 	    {
649*c87b03e5Sespie 	      ctxp->lexer->hit_eof = 1;
650*c87b03e5Sespie 	      return UEOF;
651*c87b03e5Sespie 	    }
652*c87b03e5Sespie 	}
653*c87b03e5Sespie     }
654*c87b03e5Sespie   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
655*c87b03e5Sespie   JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
656*c87b03e5Sespie   return ctxp->c_line->line [ctxp->c_line->current++];
657*c87b03e5Sespie }
658*c87b03e5Sespie 
659*c87b03e5Sespie /* Parse the end of a C style comment.
660*c87b03e5Sespie  * C is the first character following the '/' and '*'.  */
661*c87b03e5Sespie static void
java_parse_end_comment(c)662*c87b03e5Sespie java_parse_end_comment (c)
663*c87b03e5Sespie      int c;
664*c87b03e5Sespie {
665*c87b03e5Sespie   for ( ;; c = java_get_unicode ())
666*c87b03e5Sespie     {
667*c87b03e5Sespie       switch (c)
668*c87b03e5Sespie 	{
669*c87b03e5Sespie 	case UEOF:
670*c87b03e5Sespie 	  java_lex_error ("Comment not terminated at end of input", 0);
671*c87b03e5Sespie 	  return;
672*c87b03e5Sespie 	case '*':
673*c87b03e5Sespie 	  switch (c = java_get_unicode ())
674*c87b03e5Sespie 	    {
675*c87b03e5Sespie 	    case UEOF:
676*c87b03e5Sespie 	      java_lex_error ("Comment not terminated at end of input", 0);
677*c87b03e5Sespie 	      return;
678*c87b03e5Sespie 	    case '/':
679*c87b03e5Sespie 	      return;
680*c87b03e5Sespie 	    case '*':	/* Reparse only '*'.  */
681*c87b03e5Sespie 	      java_unget_unicode ();
682*c87b03e5Sespie 	    }
683*c87b03e5Sespie 	}
684*c87b03e5Sespie     }
685*c87b03e5Sespie }
686*c87b03e5Sespie 
687*c87b03e5Sespie /* Parse the documentation section. Keywords must be at the beginning
688*c87b03e5Sespie    of a documentation comment line (ignoring white space and any `*'
689*c87b03e5Sespie    character). Parsed keyword(s): @DEPRECATED.  */
690*c87b03e5Sespie 
691*c87b03e5Sespie static int
java_parse_doc_section(c)692*c87b03e5Sespie java_parse_doc_section (c)
693*c87b03e5Sespie      int c;
694*c87b03e5Sespie {
695*c87b03e5Sespie   int valid_tag = 0, seen_star = 0;
696*c87b03e5Sespie 
697*c87b03e5Sespie   while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
698*c87b03e5Sespie     {
699*c87b03e5Sespie       switch (c)
700*c87b03e5Sespie 	{
701*c87b03e5Sespie 	case '*':
702*c87b03e5Sespie 	  seen_star = 1;
703*c87b03e5Sespie 	  break;
704*c87b03e5Sespie 	case '\n': /* ULT */
705*c87b03e5Sespie 	  valid_tag = 1;
706*c87b03e5Sespie 	default:
707*c87b03e5Sespie 	  seen_star = 0;
708*c87b03e5Sespie 	}
709*c87b03e5Sespie       c = java_get_unicode();
710*c87b03e5Sespie     }
711*c87b03e5Sespie 
712*c87b03e5Sespie   if (c == UEOF)
713*c87b03e5Sespie     java_lex_error ("Comment not terminated at end of input", 0);
714*c87b03e5Sespie 
715*c87b03e5Sespie   if (seen_star && (c == '/'))
716*c87b03e5Sespie     return 1;			/* Goto step1 in caller.  */
717*c87b03e5Sespie 
718*c87b03e5Sespie   /* We're parsing `@deprecated'.  */
719*c87b03e5Sespie   if (valid_tag && (c == '@'))
720*c87b03e5Sespie     {
721*c87b03e5Sespie       char tag [11];
722*c87b03e5Sespie       int  tag_index = 0;
723*c87b03e5Sespie 
724*c87b03e5Sespie       while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
725*c87b03e5Sespie 	{
726*c87b03e5Sespie 	  c = java_get_unicode ();
727*c87b03e5Sespie 	  tag [tag_index++] = c;
728*c87b03e5Sespie 	}
729*c87b03e5Sespie 
730*c87b03e5Sespie       if (c == UEOF)
731*c87b03e5Sespie 	java_lex_error ("Comment not terminated at end of input", 0);
732*c87b03e5Sespie       tag [tag_index] = '\0';
733*c87b03e5Sespie 
734*c87b03e5Sespie       if (!strcmp (tag, "deprecated"))
735*c87b03e5Sespie 	ctxp->deprecated = 1;
736*c87b03e5Sespie     }
737*c87b03e5Sespie   java_unget_unicode ();
738*c87b03e5Sespie   return 0;
739*c87b03e5Sespie }
740*c87b03e5Sespie 
741*c87b03e5Sespie /* Return true if C is a valid start character for a Java identifier.
742*c87b03e5Sespie    This is only called if C >= 128 -- smaller values are handled
743*c87b03e5Sespie    inline.  However, this function handles all values anyway.  */
744*c87b03e5Sespie static int
java_start_char_p(c)745*c87b03e5Sespie java_start_char_p (c)
746*c87b03e5Sespie      unicode_t c;
747*c87b03e5Sespie {
748*c87b03e5Sespie   unsigned int hi = c / 256;
749*c87b03e5Sespie   const char *const page = type_table[hi];
750*c87b03e5Sespie   unsigned long val = (unsigned long) page;
751*c87b03e5Sespie   int flags;
752*c87b03e5Sespie 
753*c87b03e5Sespie   if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
754*c87b03e5Sespie     flags = page[c & 255];
755*c87b03e5Sespie   else
756*c87b03e5Sespie     flags = val;
757*c87b03e5Sespie 
758*c87b03e5Sespie   return flags & LETTER_START;
759*c87b03e5Sespie }
760*c87b03e5Sespie 
761*c87b03e5Sespie /* Return true if C is a valid part character for a Java identifier.
762*c87b03e5Sespie    This is only called if C >= 128 -- smaller values are handled
763*c87b03e5Sespie    inline.  However, this function handles all values anyway.  */
764*c87b03e5Sespie static int
java_part_char_p(c)765*c87b03e5Sespie java_part_char_p (c)
766*c87b03e5Sespie      unicode_t c;
767*c87b03e5Sespie {
768*c87b03e5Sespie   unsigned int hi = c / 256;
769*c87b03e5Sespie   const char *const page = type_table[hi];
770*c87b03e5Sespie   unsigned long val = (unsigned long) page;
771*c87b03e5Sespie   int flags;
772*c87b03e5Sespie 
773*c87b03e5Sespie   if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
774*c87b03e5Sespie     flags = page[c & 255];
775*c87b03e5Sespie   else
776*c87b03e5Sespie     flags = val;
777*c87b03e5Sespie 
778*c87b03e5Sespie   return flags & LETTER_PART;
779*c87b03e5Sespie }
780*c87b03e5Sespie 
781*c87b03e5Sespie static int
java_parse_escape_sequence()782*c87b03e5Sespie java_parse_escape_sequence ()
783*c87b03e5Sespie {
784*c87b03e5Sespie   unicode_t char_lit;
785*c87b03e5Sespie   int c;
786*c87b03e5Sespie 
787*c87b03e5Sespie   switch (c = java_get_unicode ())
788*c87b03e5Sespie     {
789*c87b03e5Sespie     case 'b':
790*c87b03e5Sespie       return (unicode_t)0x8;
791*c87b03e5Sespie     case 't':
792*c87b03e5Sespie       return (unicode_t)0x9;
793*c87b03e5Sespie     case 'n':
794*c87b03e5Sespie       return (unicode_t)0xa;
795*c87b03e5Sespie     case 'f':
796*c87b03e5Sespie       return (unicode_t)0xc;
797*c87b03e5Sespie     case 'r':
798*c87b03e5Sespie       return (unicode_t)0xd;
799*c87b03e5Sespie     case '"':
800*c87b03e5Sespie       return (unicode_t)0x22;
801*c87b03e5Sespie     case '\'':
802*c87b03e5Sespie       return (unicode_t)0x27;
803*c87b03e5Sespie     case '\\':
804*c87b03e5Sespie       return (unicode_t)0x5c;
805*c87b03e5Sespie     case '0': case '1': case '2': case '3': case '4':
806*c87b03e5Sespie     case '5': case '6': case '7':
807*c87b03e5Sespie       {
808*c87b03e5Sespie 	int octal_escape[3];
809*c87b03e5Sespie 	int octal_escape_index = 0;
810*c87b03e5Sespie 	int max = 3;
811*c87b03e5Sespie 	int i, shift;
812*c87b03e5Sespie 
813*c87b03e5Sespie 	for (; octal_escape_index < max && RANGE (c, '0', '7');
814*c87b03e5Sespie 	     c = java_get_unicode ())
815*c87b03e5Sespie 	  {
816*c87b03e5Sespie 	    if (octal_escape_index == 0 && c > '3')
817*c87b03e5Sespie 	      {
818*c87b03e5Sespie 		/* According to the grammar, `\477' has a well-defined
819*c87b03e5Sespie 		   meaning -- it is `\47' followed by `7'.  */
820*c87b03e5Sespie 		--max;
821*c87b03e5Sespie 	      }
822*c87b03e5Sespie 	    octal_escape [octal_escape_index++] = c;
823*c87b03e5Sespie 	  }
824*c87b03e5Sespie 
825*c87b03e5Sespie 	java_unget_unicode ();
826*c87b03e5Sespie 
827*c87b03e5Sespie 	for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
828*c87b03e5Sespie 	     i < octal_escape_index; i++, shift -= 3)
829*c87b03e5Sespie 	  char_lit |= (octal_escape [i] - '0') << shift;
830*c87b03e5Sespie 
831*c87b03e5Sespie 	return char_lit;
832*c87b03e5Sespie       }
833*c87b03e5Sespie     default:
834*c87b03e5Sespie       java_lex_error ("Invalid character in escape sequence", 0);
835*c87b03e5Sespie       return JAVA_CHAR_ERROR;
836*c87b03e5Sespie     }
837*c87b03e5Sespie }
838*c87b03e5Sespie 
839*c87b03e5Sespie #ifndef JC1_LITE
840*c87b03e5Sespie #define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
841*c87b03e5Sespie 
842*c87b03e5Sespie /* Subroutine of java_lex: converts floating-point literals to tree
843*c87b03e5Sespie    nodes.  LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
844*c87b03e5Sespie    store the result.  FFLAG indicates whether the literal was tagged
845*c87b03e5Sespie    with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
846*c87b03e5Sespie    is the line number on which to report any error.  */
847*c87b03e5Sespie 
848*c87b03e5Sespie static void java_perform_atof	PARAMS ((YYSTYPE *, char *, int, int));
849*c87b03e5Sespie 
850*c87b03e5Sespie static void
java_perform_atof(java_lval,literal_token,fflag,number_beginning)851*c87b03e5Sespie java_perform_atof (java_lval, literal_token, fflag, number_beginning)
852*c87b03e5Sespie      YYSTYPE *java_lval;
853*c87b03e5Sespie      char *literal_token;
854*c87b03e5Sespie      int fflag;
855*c87b03e5Sespie      int number_beginning;
856*c87b03e5Sespie {
857*c87b03e5Sespie   REAL_VALUE_TYPE value;
858*c87b03e5Sespie   tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
859*c87b03e5Sespie 
860*c87b03e5Sespie   SET_REAL_VALUE_ATOF (value,
861*c87b03e5Sespie 		       REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
862*c87b03e5Sespie 
863*c87b03e5Sespie   if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
864*c87b03e5Sespie     {
865*c87b03e5Sespie       JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
866*c87b03e5Sespie       value = DCONST0;
867*c87b03e5Sespie     }
868*c87b03e5Sespie   else if (IS_ZERO (value))
869*c87b03e5Sespie     {
870*c87b03e5Sespie       /* We check to see if the value is really 0 or if we've found an
871*c87b03e5Sespie 	 underflow.  We do this in the most primitive imaginable way.  */
872*c87b03e5Sespie       int really_zero = 1;
873*c87b03e5Sespie       char *p = literal_token;
874*c87b03e5Sespie       if (*p == '-')
875*c87b03e5Sespie 	++p;
876*c87b03e5Sespie       while (*p && *p != 'e' && *p != 'E')
877*c87b03e5Sespie 	{
878*c87b03e5Sespie 	  if (*p != '0' && *p != '.')
879*c87b03e5Sespie 	    {
880*c87b03e5Sespie 	      really_zero = 0;
881*c87b03e5Sespie 	      break;
882*c87b03e5Sespie 	    }
883*c87b03e5Sespie 	  ++p;
884*c87b03e5Sespie 	}
885*c87b03e5Sespie       if (! really_zero)
886*c87b03e5Sespie 	{
887*c87b03e5Sespie 	  int i = ctxp->c_line->current;
888*c87b03e5Sespie 	  ctxp->c_line->current = number_beginning;
889*c87b03e5Sespie 	  java_lex_error ("Floating point literal underflow", 0);
890*c87b03e5Sespie 	  ctxp->c_line->current = i;
891*c87b03e5Sespie 	}
892*c87b03e5Sespie     }
893*c87b03e5Sespie 
894*c87b03e5Sespie   SET_LVAL_NODE_TYPE (build_real (type, value), type);
895*c87b03e5Sespie }
896*c87b03e5Sespie #endif
897*c87b03e5Sespie 
898*c87b03e5Sespie static int yylex		PARAMS ((YYSTYPE *));
899*c87b03e5Sespie 
900*c87b03e5Sespie static int
901*c87b03e5Sespie #ifdef JC1_LITE
yylex(java_lval)902*c87b03e5Sespie yylex (java_lval)
903*c87b03e5Sespie #else
904*c87b03e5Sespie java_lex (java_lval)
905*c87b03e5Sespie #endif
906*c87b03e5Sespie      YYSTYPE *java_lval;
907*c87b03e5Sespie {
908*c87b03e5Sespie   int c;
909*c87b03e5Sespie   unicode_t first_unicode;
910*c87b03e5Sespie   int ascii_index, all_ascii;
911*c87b03e5Sespie   char *string;
912*c87b03e5Sespie 
913*c87b03e5Sespie   /* Translation of the Unicode escape in the raw stream of Unicode
914*c87b03e5Sespie      characters. Takes care of line terminator.  */
915*c87b03e5Sespie  step1:
916*c87b03e5Sespie   /* Skip white spaces: SP, TAB and FF or ULT.  */
917*c87b03e5Sespie   for (c = java_get_unicode ();
918*c87b03e5Sespie        c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
919*c87b03e5Sespie     if (c == '\n')
920*c87b03e5Sespie       {
921*c87b03e5Sespie 	ctxp->elc.line = ctxp->c_line->lineno;
922*c87b03e5Sespie 	ctxp->elc.col  = ctxp->c_line->char_col-2;
923*c87b03e5Sespie       }
924*c87b03e5Sespie 
925*c87b03e5Sespie   ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
926*c87b03e5Sespie 
927*c87b03e5Sespie   if (c == 0x1a)		/* CTRL-Z.  */
928*c87b03e5Sespie     {
929*c87b03e5Sespie       if ((c = java_get_unicode ()) == UEOF)
930*c87b03e5Sespie 	return 0;		/* Ok here.  */
931*c87b03e5Sespie       else
932*c87b03e5Sespie 	java_unget_unicode ();	/* Caught later, at the end of the
933*c87b03e5Sespie                                    function.  */
934*c87b03e5Sespie     }
935*c87b03e5Sespie   /* Handle EOF here.  */
936*c87b03e5Sespie   if (c == UEOF)	/* Should probably do something here...  */
937*c87b03e5Sespie     return 0;
938*c87b03e5Sespie 
939*c87b03e5Sespie   /* Take care of eventual comments.  */
940*c87b03e5Sespie   if (c == '/')
941*c87b03e5Sespie     {
942*c87b03e5Sespie       switch (c = java_get_unicode ())
943*c87b03e5Sespie 	{
944*c87b03e5Sespie 	case '/':
945*c87b03e5Sespie 	  for (;;)
946*c87b03e5Sespie 	    {
947*c87b03e5Sespie 	      c = java_get_unicode ();
948*c87b03e5Sespie 	      if (c == UEOF)
949*c87b03e5Sespie 		{
950*c87b03e5Sespie 		  /* It is ok to end a `//' comment with EOF, unless
951*c87b03e5Sespie 		     we're being pedantic.  */
952*c87b03e5Sespie 		  if (pedantic)
953*c87b03e5Sespie 		    java_lex_error ("Comment not terminated at end of input",
954*c87b03e5Sespie 				    0);
955*c87b03e5Sespie 		  return 0;
956*c87b03e5Sespie 		}
957*c87b03e5Sespie 	      if (c == '\n')	/* ULT */
958*c87b03e5Sespie 		goto step1;
959*c87b03e5Sespie 	    }
960*c87b03e5Sespie 	  break;
961*c87b03e5Sespie 
962*c87b03e5Sespie 	case '*':
963*c87b03e5Sespie 	  if ((c = java_get_unicode ()) == '*')
964*c87b03e5Sespie 	    {
965*c87b03e5Sespie 	      if ((c = java_get_unicode ()) == '/')
966*c87b03e5Sespie 		goto step1;	/* Empty documentation comment.  */
967*c87b03e5Sespie 	      else if (java_parse_doc_section (c))
968*c87b03e5Sespie 		goto step1;
969*c87b03e5Sespie 	    }
970*c87b03e5Sespie 
971*c87b03e5Sespie 	  java_parse_end_comment ((c = java_get_unicode ()));
972*c87b03e5Sespie 	  goto step1;
973*c87b03e5Sespie 	  break;
974*c87b03e5Sespie 	default:
975*c87b03e5Sespie 	  java_unget_unicode ();
976*c87b03e5Sespie 	  c = '/';
977*c87b03e5Sespie 	  break;
978*c87b03e5Sespie 	}
979*c87b03e5Sespie     }
980*c87b03e5Sespie 
981*c87b03e5Sespie   ctxp->elc.line = ctxp->c_line->lineno;
982*c87b03e5Sespie   ctxp->elc.prev_col = ctxp->elc.col;
983*c87b03e5Sespie   ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
984*c87b03e5Sespie   if (ctxp->elc.col < 0)
985*c87b03e5Sespie     abort ();
986*c87b03e5Sespie 
987*c87b03e5Sespie   /* Numeric literals.  */
988*c87b03e5Sespie   if (JAVA_ASCII_DIGIT (c) || (c == '.'))
989*c87b03e5Sespie     {
990*c87b03e5Sespie       /* This section of code is borrowed from gcc/c-lex.c.  */
991*c87b03e5Sespie #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
992*c87b03e5Sespie       int parts[TOTAL_PARTS];
993*c87b03e5Sespie       HOST_WIDE_INT high, low;
994*c87b03e5Sespie       /* End borrowed section.  */
995*c87b03e5Sespie       char literal_token [256];
996*c87b03e5Sespie       int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
997*c87b03e5Sespie       int  found_hex_digits = 0, found_non_octal_digits = 0;
998*c87b03e5Sespie       int  i;
999*c87b03e5Sespie #ifndef JC1_LITE
1000*c87b03e5Sespie       int  number_beginning = ctxp->c_line->current;
1001*c87b03e5Sespie       tree value;
1002*c87b03e5Sespie #endif
1003*c87b03e5Sespie 
1004*c87b03e5Sespie       /* We might have a . separator instead of a FP like .[0-9]*.  */
1005*c87b03e5Sespie       if (c == '.')
1006*c87b03e5Sespie 	{
1007*c87b03e5Sespie 	  unicode_t peep = java_sneak_unicode ();
1008*c87b03e5Sespie 
1009*c87b03e5Sespie 	  if (!JAVA_ASCII_DIGIT (peep))
1010*c87b03e5Sespie 	    {
1011*c87b03e5Sespie 	      JAVA_LEX_SEP('.');
1012*c87b03e5Sespie 	      BUILD_OPERATOR (DOT_TK);
1013*c87b03e5Sespie 	    }
1014*c87b03e5Sespie 	}
1015*c87b03e5Sespie 
1016*c87b03e5Sespie       for (i = 0; i < TOTAL_PARTS; i++)
1017*c87b03e5Sespie 	parts [i] = 0;
1018*c87b03e5Sespie 
1019*c87b03e5Sespie       if (c == '0')
1020*c87b03e5Sespie 	{
1021*c87b03e5Sespie 	  c = java_get_unicode ();
1022*c87b03e5Sespie 	  if (c == 'x' || c == 'X')
1023*c87b03e5Sespie 	    {
1024*c87b03e5Sespie 	      radix = 16;
1025*c87b03e5Sespie 	      c = java_get_unicode ();
1026*c87b03e5Sespie 	    }
1027*c87b03e5Sespie 	  else if (JAVA_ASCII_DIGIT (c))
1028*c87b03e5Sespie 	    radix = 8;
1029*c87b03e5Sespie 	  else if (c == '.' || c == 'e' || c =='E')
1030*c87b03e5Sespie 	    {
1031*c87b03e5Sespie 	      /* Push the '.', 'e', or 'E' back and prepare for a FP
1032*c87b03e5Sespie 		 parsing...  */
1033*c87b03e5Sespie 	      java_unget_unicode ();
1034*c87b03e5Sespie 	      c = '0';
1035*c87b03e5Sespie 	    }
1036*c87b03e5Sespie 	  else
1037*c87b03e5Sespie 	    {
1038*c87b03e5Sespie 	      /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}.  */
1039*c87b03e5Sespie 	      JAVA_LEX_LIT ("0", 10);
1040*c87b03e5Sespie               switch (c)
1041*c87b03e5Sespie 		{
1042*c87b03e5Sespie 		case 'L': case 'l':
1043*c87b03e5Sespie 		  SET_LVAL_NODE (long_zero_node);
1044*c87b03e5Sespie 		  return (INT_LIT_TK);
1045*c87b03e5Sespie 		case 'f': case 'F':
1046*c87b03e5Sespie 		  SET_LVAL_NODE (float_zero_node);
1047*c87b03e5Sespie 		  return (FP_LIT_TK);
1048*c87b03e5Sespie 		case 'd': case 'D':
1049*c87b03e5Sespie 		  SET_LVAL_NODE (double_zero_node);
1050*c87b03e5Sespie 		  return (FP_LIT_TK);
1051*c87b03e5Sespie 		default:
1052*c87b03e5Sespie 		  java_unget_unicode ();
1053*c87b03e5Sespie 		  SET_LVAL_NODE (integer_zero_node);
1054*c87b03e5Sespie 		  return (INT_LIT_TK);
1055*c87b03e5Sespie 		}
1056*c87b03e5Sespie 	    }
1057*c87b03e5Sespie 	}
1058*c87b03e5Sespie       /* Parse the first part of the literal, until we find something
1059*c87b03e5Sespie 	 which is not a number.  */
1060*c87b03e5Sespie       while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1061*c87b03e5Sespie 	     JAVA_ASCII_DIGIT (c))
1062*c87b03e5Sespie 	{
1063*c87b03e5Sespie 	  /* We store in a string (in case it turns out to be a FP) and in
1064*c87b03e5Sespie 	     PARTS if we have to process a integer literal.  */
1065*c87b03e5Sespie 	  int numeric = hex_value (c);
1066*c87b03e5Sespie 	  int count;
1067*c87b03e5Sespie 
1068*c87b03e5Sespie 	  /* Remember when we find a valid hexadecimal digit.  */
1069*c87b03e5Sespie 	  if (radix == 16)
1070*c87b03e5Sespie 	    found_hex_digits = 1;
1071*c87b03e5Sespie           /* Remember when we find an invalid octal digit.  */
1072*c87b03e5Sespie           else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
1073*c87b03e5Sespie             found_non_octal_digits = 1;
1074*c87b03e5Sespie 
1075*c87b03e5Sespie 	  literal_token [literal_index++] = c;
1076*c87b03e5Sespie 	  /* This section of code if borrowed from gcc/c-lex.c.  */
1077*c87b03e5Sespie 	  for (count = 0; count < TOTAL_PARTS; count++)
1078*c87b03e5Sespie 	    {
1079*c87b03e5Sespie 	      parts[count] *= radix;
1080*c87b03e5Sespie 	      if (count)
1081*c87b03e5Sespie 		{
1082*c87b03e5Sespie 		  parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
1083*c87b03e5Sespie 		  parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1084*c87b03e5Sespie 		}
1085*c87b03e5Sespie 	      else
1086*c87b03e5Sespie 		parts[0] += numeric;
1087*c87b03e5Sespie 	    }
1088*c87b03e5Sespie 	  if (parts [TOTAL_PARTS-1] != 0)
1089*c87b03e5Sespie 	    overflow = 1;
1090*c87b03e5Sespie 	  /* End borrowed section.  */
1091*c87b03e5Sespie 	  c = java_get_unicode ();
1092*c87b03e5Sespie 	}
1093*c87b03e5Sespie 
1094*c87b03e5Sespie       /* If we have something from the FP char set but not a digit, parse
1095*c87b03e5Sespie 	 a FP literal.  */
1096*c87b03e5Sespie       if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1097*c87b03e5Sespie 	{
1098*c87b03e5Sespie 	  int stage = 0;
1099*c87b03e5Sespie 	  int seen_digit = (literal_index ? 1 : 0);
1100*c87b03e5Sespie 	  int seen_exponent = 0;
1101*c87b03e5Sespie 	  int fflag = 0;	/* 1 for {f,F}, 0 for {d,D}. FP literal are
1102*c87b03e5Sespie 				   double unless specified.  */
1103*c87b03e5Sespie 
1104*c87b03e5Sespie 	  /* It is ok if the radix is 8 because this just means we've
1105*c87b03e5Sespie 	     seen a leading `0'.  However, radix==16 is invalid.  */
1106*c87b03e5Sespie 	  if (radix == 16)
1107*c87b03e5Sespie 	    java_lex_error ("Can't express non-decimal FP literal", 0);
1108*c87b03e5Sespie 	  radix = 10;
1109*c87b03e5Sespie 
1110*c87b03e5Sespie 	  for (;;)
1111*c87b03e5Sespie 	    {
1112*c87b03e5Sespie 	      if (c == '.')
1113*c87b03e5Sespie 		{
1114*c87b03e5Sespie 		  if (stage < 1)
1115*c87b03e5Sespie 		    {
1116*c87b03e5Sespie 		      stage = 1;
1117*c87b03e5Sespie 		      literal_token [literal_index++ ] = c;
1118*c87b03e5Sespie 		      c = java_get_unicode ();
1119*c87b03e5Sespie 		    }
1120*c87b03e5Sespie 		  else
1121*c87b03e5Sespie 		    java_lex_error ("Invalid character in FP literal", 0);
1122*c87b03e5Sespie 		}
1123*c87b03e5Sespie 
1124*c87b03e5Sespie 	      if (c == 'e' || c == 'E')
1125*c87b03e5Sespie 		{
1126*c87b03e5Sespie 		  if (stage < 2)
1127*c87b03e5Sespie 		    {
1128*c87b03e5Sespie 		      /* {E,e} must have seen at least a digit.  */
1129*c87b03e5Sespie 		      if (!seen_digit)
1130*c87b03e5Sespie 			java_lex_error
1131*c87b03e5Sespie                           ("Invalid FP literal, mantissa must have digit", 0);
1132*c87b03e5Sespie 		      seen_digit = 0;
1133*c87b03e5Sespie 		      seen_exponent = 1;
1134*c87b03e5Sespie 		      stage = 2;
1135*c87b03e5Sespie 		      literal_token [literal_index++] = c;
1136*c87b03e5Sespie 		      c = java_get_unicode ();
1137*c87b03e5Sespie 		    }
1138*c87b03e5Sespie 		  else
1139*c87b03e5Sespie 		    java_lex_error ("Invalid character in FP literal", 0);
1140*c87b03e5Sespie 		}
1141*c87b03e5Sespie 	      if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1142*c87b03e5Sespie 		{
1143*c87b03e5Sespie 		  fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1144*c87b03e5Sespie 		  stage = 4;	/* So we fall through.  */
1145*c87b03e5Sespie 		}
1146*c87b03e5Sespie 
1147*c87b03e5Sespie 	      if ((c=='-' || c =='+') && stage == 2)
1148*c87b03e5Sespie 		{
1149*c87b03e5Sespie 		  stage = 3;
1150*c87b03e5Sespie 		  literal_token [literal_index++] = c;
1151*c87b03e5Sespie 		  c = java_get_unicode ();
1152*c87b03e5Sespie 		}
1153*c87b03e5Sespie 
1154*c87b03e5Sespie 	      if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1155*c87b03e5Sespie 		  (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1156*c87b03e5Sespie 		  (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1157*c87b03e5Sespie 		  (stage == 3 && JAVA_ASCII_DIGIT (c)))
1158*c87b03e5Sespie 		{
1159*c87b03e5Sespie 		  if (JAVA_ASCII_DIGIT (c))
1160*c87b03e5Sespie 		    seen_digit = 1;
1161*c87b03e5Sespie                   if (stage == 2)
1162*c87b03e5Sespie                     stage = 3;
1163*c87b03e5Sespie 		  literal_token [literal_index++ ] = c;
1164*c87b03e5Sespie 		  c = java_get_unicode ();
1165*c87b03e5Sespie 		}
1166*c87b03e5Sespie 	      else
1167*c87b03e5Sespie 		{
1168*c87b03e5Sespie 		  if (stage != 4) /* Don't push back fF/dD.  */
1169*c87b03e5Sespie 		    java_unget_unicode ();
1170*c87b03e5Sespie 
1171*c87b03e5Sespie 		  /* An exponent (if any) must have seen a digit.  */
1172*c87b03e5Sespie 		  if (seen_exponent && !seen_digit)
1173*c87b03e5Sespie 		    java_lex_error
1174*c87b03e5Sespie                       ("Invalid FP literal, exponent must have digit", 0);
1175*c87b03e5Sespie 
1176*c87b03e5Sespie 		  literal_token [literal_index] = '\0';
1177*c87b03e5Sespie 		  JAVA_LEX_LIT (literal_token, radix);
1178*c87b03e5Sespie 
1179*c87b03e5Sespie #ifndef JC1_LITE
1180*c87b03e5Sespie 		  java_perform_atof (java_lval, literal_token,
1181*c87b03e5Sespie 				     fflag, number_beginning);
1182*c87b03e5Sespie #endif
1183*c87b03e5Sespie 		  return FP_LIT_TK;
1184*c87b03e5Sespie 		}
1185*c87b03e5Sespie 	    }
1186*c87b03e5Sespie 	} /* JAVA_ASCII_FPCHAR (c) */
1187*c87b03e5Sespie 
1188*c87b03e5Sespie       /* Here we get back to converting the integral literal.  */
1189*c87b03e5Sespie       if (radix == 16 && ! found_hex_digits)
1190*c87b03e5Sespie 	java_lex_error
1191*c87b03e5Sespie 	  ("0x must be followed by at least one hexadecimal digit", 0);
1192*c87b03e5Sespie       else if (radix == 8 && found_non_octal_digits)
1193*c87b03e5Sespie 	java_lex_error ("Octal literal contains digit out of range", 0);
1194*c87b03e5Sespie       else if (c == 'L' || c == 'l')
1195*c87b03e5Sespie 	long_suffix = 1;
1196*c87b03e5Sespie       else
1197*c87b03e5Sespie 	java_unget_unicode ();
1198*c87b03e5Sespie 
1199*c87b03e5Sespie #ifdef JAVA_LEX_DEBUG
1200*c87b03e5Sespie       literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe.  */
1201*c87b03e5Sespie       JAVA_LEX_LIT (literal_token, radix);
1202*c87b03e5Sespie #endif
1203*c87b03e5Sespie       /* This section of code is borrowed from gcc/c-lex.c.  */
1204*c87b03e5Sespie       if (!overflow)
1205*c87b03e5Sespie 	{
1206*c87b03e5Sespie 	  bytes = GET_TYPE_PRECISION (long_type_node);
1207*c87b03e5Sespie 	  for (i = bytes; i < TOTAL_PARTS; i++)
1208*c87b03e5Sespie 	    if (parts [i])
1209*c87b03e5Sespie 	      {
1210*c87b03e5Sespie 	        overflow = 1;
1211*c87b03e5Sespie 		break;
1212*c87b03e5Sespie 	      }
1213*c87b03e5Sespie 	}
1214*c87b03e5Sespie       high = low = 0;
1215*c87b03e5Sespie       for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1216*c87b03e5Sespie 	{
1217*c87b03e5Sespie 	  high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1218*c87b03e5Sespie 					      / HOST_BITS_PER_CHAR)]
1219*c87b03e5Sespie 		   << (i * HOST_BITS_PER_CHAR));
1220*c87b03e5Sespie 	  low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1221*c87b03e5Sespie 	}
1222*c87b03e5Sespie       /* End borrowed section.  */
1223*c87b03e5Sespie 
1224*c87b03e5Sespie #ifndef JC1_LITE
1225*c87b03e5Sespie       /* Range checking.  */
1226*c87b03e5Sespie       value = build_int_2 (low, high);
1227*c87b03e5Sespie       /* Temporarily set type to unsigned.  */
1228*c87b03e5Sespie       SET_LVAL_NODE_TYPE (value, (long_suffix
1229*c87b03e5Sespie 				  ? unsigned_long_type_node
1230*c87b03e5Sespie 				  : unsigned_int_type_node));
1231*c87b03e5Sespie 
1232*c87b03e5Sespie       /* For base 10 numbers, only values up to the highest value
1233*c87b03e5Sespie 	 (plus one) can be written.  For instance, only ints up to
1234*c87b03e5Sespie 	 2147483648 can be written.  The special case of the largest
1235*c87b03e5Sespie 	 negative value is handled elsewhere.  For other bases, any
1236*c87b03e5Sespie 	 number can be represented.  */
1237*c87b03e5Sespie       if (overflow || (radix == 10
1238*c87b03e5Sespie 		       && tree_int_cst_lt (long_suffix
1239*c87b03e5Sespie 					   ? decimal_long_max
1240*c87b03e5Sespie 					   : decimal_int_max,
1241*c87b03e5Sespie 					   value)))
1242*c87b03e5Sespie 	{
1243*c87b03e5Sespie 	  if (long_suffix)
1244*c87b03e5Sespie 	    JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1245*c87b03e5Sespie 	  else
1246*c87b03e5Sespie 	    JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1247*c87b03e5Sespie 	}
1248*c87b03e5Sespie 
1249*c87b03e5Sespie       /* Sign extend the value.  */
1250*c87b03e5Sespie       SET_LVAL_NODE_TYPE (value, (long_suffix ? long_type_node : int_type_node));
1251*c87b03e5Sespie       force_fit_type (value, 0);
1252*c87b03e5Sespie       JAVA_RADIX10_FLAG (value) = radix == 10;
1253*c87b03e5Sespie #else
1254*c87b03e5Sespie       SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1255*c87b03e5Sespie 			  long_suffix ? long_type_node : int_type_node);
1256*c87b03e5Sespie #endif
1257*c87b03e5Sespie       return INT_LIT_TK;
1258*c87b03e5Sespie     }
1259*c87b03e5Sespie 
1260*c87b03e5Sespie   /* Character literals.  */
1261*c87b03e5Sespie   if (c == '\'')
1262*c87b03e5Sespie     {
1263*c87b03e5Sespie       int char_lit;
1264*c87b03e5Sespie       if ((c = java_get_unicode ()) == '\\')
1265*c87b03e5Sespie 	char_lit = java_parse_escape_sequence ();
1266*c87b03e5Sespie       else
1267*c87b03e5Sespie 	{
1268*c87b03e5Sespie 	  if (c == '\n' || c == '\'')
1269*c87b03e5Sespie 	    java_lex_error ("Invalid character literal", 0);
1270*c87b03e5Sespie 	  char_lit = c;
1271*c87b03e5Sespie 	}
1272*c87b03e5Sespie 
1273*c87b03e5Sespie       c = java_get_unicode ();
1274*c87b03e5Sespie 
1275*c87b03e5Sespie       if ((c == '\n') || (c == UEOF))
1276*c87b03e5Sespie 	java_lex_error ("Character literal not terminated at end of line", 0);
1277*c87b03e5Sespie       if (c != '\'')
1278*c87b03e5Sespie 	java_lex_error ("Syntax error in character literal", 0);
1279*c87b03e5Sespie 
1280*c87b03e5Sespie       if (char_lit == JAVA_CHAR_ERROR)
1281*c87b03e5Sespie         char_lit = 0;		/* We silently convert it to zero.  */
1282*c87b03e5Sespie 
1283*c87b03e5Sespie       JAVA_LEX_CHAR_LIT (char_lit);
1284*c87b03e5Sespie       SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1285*c87b03e5Sespie       return CHAR_LIT_TK;
1286*c87b03e5Sespie     }
1287*c87b03e5Sespie 
1288*c87b03e5Sespie   /* String literals.  */
1289*c87b03e5Sespie   if (c == '"')
1290*c87b03e5Sespie     {
1291*c87b03e5Sespie       int no_error;
1292*c87b03e5Sespie       char *string;
1293*c87b03e5Sespie 
1294*c87b03e5Sespie       for (no_error = 1, c = java_get_unicode ();
1295*c87b03e5Sespie 	   c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1296*c87b03e5Sespie 	{
1297*c87b03e5Sespie 	  if (c == '\\')
1298*c87b03e5Sespie 	    c = java_parse_escape_sequence ();
1299*c87b03e5Sespie 	  if (c == JAVA_CHAR_ERROR)
1300*c87b03e5Sespie 	    {
1301*c87b03e5Sespie 	      no_error = 0;
1302*c87b03e5Sespie 	      c = 0;		/* We silently convert it to zero.  */
1303*c87b03e5Sespie 	    }
1304*c87b03e5Sespie 	  java_unicode_2_utf8 (c);
1305*c87b03e5Sespie 	}
1306*c87b03e5Sespie       if (c == '\n' || c == UEOF) /* ULT.  */
1307*c87b03e5Sespie 	{
1308*c87b03e5Sespie 	  lineno--;	/* Refer to the line where the terminator was seen.  */
1309*c87b03e5Sespie 	  java_lex_error ("String not terminated at end of line", 0);
1310*c87b03e5Sespie 	  lineno++;
1311*c87b03e5Sespie 	}
1312*c87b03e5Sespie 
1313*c87b03e5Sespie       obstack_1grow (&temporary_obstack, '\0');
1314*c87b03e5Sespie       string = obstack_finish (&temporary_obstack);
1315*c87b03e5Sespie #ifndef JC1_LITE
1316*c87b03e5Sespie       if (!no_error || (c != '"'))
1317*c87b03e5Sespie 	java_lval->node = error_mark_node; /* FIXME: Requires futher
1318*c87b03e5Sespie                                               testing.  */
1319*c87b03e5Sespie       else
1320*c87b03e5Sespie 	java_lval->node = build_string (strlen (string), string);
1321*c87b03e5Sespie #endif
1322*c87b03e5Sespie       obstack_free (&temporary_obstack, string);
1323*c87b03e5Sespie       return STRING_LIT_TK;
1324*c87b03e5Sespie     }
1325*c87b03e5Sespie 
1326*c87b03e5Sespie   /* Separator.  */
1327*c87b03e5Sespie   switch (c)
1328*c87b03e5Sespie     {
1329*c87b03e5Sespie     case '(':
1330*c87b03e5Sespie       JAVA_LEX_SEP (c);
1331*c87b03e5Sespie       BUILD_OPERATOR (OP_TK);
1332*c87b03e5Sespie     case ')':
1333*c87b03e5Sespie       JAVA_LEX_SEP (c);
1334*c87b03e5Sespie       return CP_TK;
1335*c87b03e5Sespie     case '{':
1336*c87b03e5Sespie       JAVA_LEX_SEP (c);
1337*c87b03e5Sespie       if (ctxp->ccb_indent == 1)
1338*c87b03e5Sespie 	ctxp->first_ccb_indent1 = lineno;
1339*c87b03e5Sespie       ctxp->ccb_indent++;
1340*c87b03e5Sespie       BUILD_OPERATOR (OCB_TK);
1341*c87b03e5Sespie     case '}':
1342*c87b03e5Sespie       JAVA_LEX_SEP (c);
1343*c87b03e5Sespie       ctxp->ccb_indent--;
1344*c87b03e5Sespie       if (ctxp->ccb_indent == 1)
1345*c87b03e5Sespie         ctxp->last_ccb_indent1 = lineno;
1346*c87b03e5Sespie       BUILD_OPERATOR (CCB_TK);
1347*c87b03e5Sespie     case '[':
1348*c87b03e5Sespie       JAVA_LEX_SEP (c);
1349*c87b03e5Sespie       BUILD_OPERATOR (OSB_TK);
1350*c87b03e5Sespie     case ']':
1351*c87b03e5Sespie       JAVA_LEX_SEP (c);
1352*c87b03e5Sespie       return CSB_TK;
1353*c87b03e5Sespie     case ';':
1354*c87b03e5Sespie       JAVA_LEX_SEP (c);
1355*c87b03e5Sespie       return SC_TK;
1356*c87b03e5Sespie     case ',':
1357*c87b03e5Sespie       JAVA_LEX_SEP (c);
1358*c87b03e5Sespie       return C_TK;
1359*c87b03e5Sespie     case '.':
1360*c87b03e5Sespie       JAVA_LEX_SEP (c);
1361*c87b03e5Sespie       BUILD_OPERATOR (DOT_TK);
1362*c87b03e5Sespie       /*      return DOT_TK; */
1363*c87b03e5Sespie     }
1364*c87b03e5Sespie 
1365*c87b03e5Sespie   /* Operators.  */
1366*c87b03e5Sespie   switch (c)
1367*c87b03e5Sespie     {
1368*c87b03e5Sespie     case '=':
1369*c87b03e5Sespie       if ((c = java_get_unicode ()) == '=')
1370*c87b03e5Sespie 	{
1371*c87b03e5Sespie 	  BUILD_OPERATOR (EQ_TK);
1372*c87b03e5Sespie 	}
1373*c87b03e5Sespie       else
1374*c87b03e5Sespie 	{
1375*c87b03e5Sespie 	  /* Equals is used in two different locations. In the
1376*c87b03e5Sespie 	     variable_declarator: rule, it has to be seen as '=' as opposed
1377*c87b03e5Sespie 	     to being seen as an ordinary assignment operator in
1378*c87b03e5Sespie 	     assignment_operators: rule.  */
1379*c87b03e5Sespie 	  java_unget_unicode ();
1380*c87b03e5Sespie 	  BUILD_OPERATOR (ASSIGN_TK);
1381*c87b03e5Sespie 	}
1382*c87b03e5Sespie 
1383*c87b03e5Sespie     case '>':
1384*c87b03e5Sespie       switch ((c = java_get_unicode ()))
1385*c87b03e5Sespie 	{
1386*c87b03e5Sespie 	case '=':
1387*c87b03e5Sespie 	  BUILD_OPERATOR (GTE_TK);
1388*c87b03e5Sespie 	case '>':
1389*c87b03e5Sespie 	  switch ((c = java_get_unicode ()))
1390*c87b03e5Sespie 	    {
1391*c87b03e5Sespie 	    case '>':
1392*c87b03e5Sespie 	      if ((c = java_get_unicode ()) == '=')
1393*c87b03e5Sespie 		{
1394*c87b03e5Sespie 		  BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1395*c87b03e5Sespie 		}
1396*c87b03e5Sespie 	      else
1397*c87b03e5Sespie 		{
1398*c87b03e5Sespie 		  java_unget_unicode ();
1399*c87b03e5Sespie 		  BUILD_OPERATOR (ZRS_TK);
1400*c87b03e5Sespie 		}
1401*c87b03e5Sespie 	    case '=':
1402*c87b03e5Sespie 	      BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1403*c87b03e5Sespie 	    default:
1404*c87b03e5Sespie 	      java_unget_unicode ();
1405*c87b03e5Sespie 	      BUILD_OPERATOR (SRS_TK);
1406*c87b03e5Sespie 	    }
1407*c87b03e5Sespie 	default:
1408*c87b03e5Sespie 	  java_unget_unicode ();
1409*c87b03e5Sespie 	  BUILD_OPERATOR (GT_TK);
1410*c87b03e5Sespie 	}
1411*c87b03e5Sespie 
1412*c87b03e5Sespie     case '<':
1413*c87b03e5Sespie       switch ((c = java_get_unicode ()))
1414*c87b03e5Sespie 	{
1415*c87b03e5Sespie 	case '=':
1416*c87b03e5Sespie 	  BUILD_OPERATOR (LTE_TK);
1417*c87b03e5Sespie 	case '<':
1418*c87b03e5Sespie 	  if ((c = java_get_unicode ()) == '=')
1419*c87b03e5Sespie 	    {
1420*c87b03e5Sespie 	      BUILD_OPERATOR2 (LS_ASSIGN_TK);
1421*c87b03e5Sespie 	    }
1422*c87b03e5Sespie 	  else
1423*c87b03e5Sespie 	    {
1424*c87b03e5Sespie 	      java_unget_unicode ();
1425*c87b03e5Sespie 	      BUILD_OPERATOR (LS_TK);
1426*c87b03e5Sespie 	    }
1427*c87b03e5Sespie 	default:
1428*c87b03e5Sespie 	  java_unget_unicode ();
1429*c87b03e5Sespie 	  BUILD_OPERATOR (LT_TK);
1430*c87b03e5Sespie 	}
1431*c87b03e5Sespie 
1432*c87b03e5Sespie     case '&':
1433*c87b03e5Sespie       switch ((c = java_get_unicode ()))
1434*c87b03e5Sespie 	{
1435*c87b03e5Sespie 	case '&':
1436*c87b03e5Sespie 	  BUILD_OPERATOR (BOOL_AND_TK);
1437*c87b03e5Sespie 	case '=':
1438*c87b03e5Sespie 	  BUILD_OPERATOR2 (AND_ASSIGN_TK);
1439*c87b03e5Sespie 	default:
1440*c87b03e5Sespie 	  java_unget_unicode ();
1441*c87b03e5Sespie 	  BUILD_OPERATOR (AND_TK);
1442*c87b03e5Sespie 	}
1443*c87b03e5Sespie 
1444*c87b03e5Sespie     case '|':
1445*c87b03e5Sespie       switch ((c = java_get_unicode ()))
1446*c87b03e5Sespie 	{
1447*c87b03e5Sespie 	case '|':
1448*c87b03e5Sespie 	  BUILD_OPERATOR (BOOL_OR_TK);
1449*c87b03e5Sespie 	case '=':
1450*c87b03e5Sespie 	  BUILD_OPERATOR2 (OR_ASSIGN_TK);
1451*c87b03e5Sespie 	default:
1452*c87b03e5Sespie 	  java_unget_unicode ();
1453*c87b03e5Sespie 	  BUILD_OPERATOR (OR_TK);
1454*c87b03e5Sespie 	}
1455*c87b03e5Sespie 
1456*c87b03e5Sespie     case '+':
1457*c87b03e5Sespie       switch ((c = java_get_unicode ()))
1458*c87b03e5Sespie 	{
1459*c87b03e5Sespie 	case '+':
1460*c87b03e5Sespie 	  BUILD_OPERATOR (INCR_TK);
1461*c87b03e5Sespie 	case '=':
1462*c87b03e5Sespie 	  BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1463*c87b03e5Sespie 	default:
1464*c87b03e5Sespie 	  java_unget_unicode ();
1465*c87b03e5Sespie 	  BUILD_OPERATOR (PLUS_TK);
1466*c87b03e5Sespie 	}
1467*c87b03e5Sespie 
1468*c87b03e5Sespie     case '-':
1469*c87b03e5Sespie       switch ((c = java_get_unicode ()))
1470*c87b03e5Sespie 	{
1471*c87b03e5Sespie 	case '-':
1472*c87b03e5Sespie 	  BUILD_OPERATOR (DECR_TK);
1473*c87b03e5Sespie 	case '=':
1474*c87b03e5Sespie 	  BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1475*c87b03e5Sespie 	default:
1476*c87b03e5Sespie 	  java_unget_unicode ();
1477*c87b03e5Sespie 	  BUILD_OPERATOR (MINUS_TK);
1478*c87b03e5Sespie 	}
1479*c87b03e5Sespie 
1480*c87b03e5Sespie     case '*':
1481*c87b03e5Sespie       if ((c = java_get_unicode ()) == '=')
1482*c87b03e5Sespie 	{
1483*c87b03e5Sespie 	  BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1484*c87b03e5Sespie 	}
1485*c87b03e5Sespie       else
1486*c87b03e5Sespie 	{
1487*c87b03e5Sespie 	  java_unget_unicode ();
1488*c87b03e5Sespie 	  BUILD_OPERATOR (MULT_TK);
1489*c87b03e5Sespie 	}
1490*c87b03e5Sespie 
1491*c87b03e5Sespie     case '/':
1492*c87b03e5Sespie       if ((c = java_get_unicode ()) == '=')
1493*c87b03e5Sespie 	{
1494*c87b03e5Sespie 	  BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1495*c87b03e5Sespie 	}
1496*c87b03e5Sespie       else
1497*c87b03e5Sespie 	{
1498*c87b03e5Sespie 	  java_unget_unicode ();
1499*c87b03e5Sespie 	  BUILD_OPERATOR (DIV_TK);
1500*c87b03e5Sespie 	}
1501*c87b03e5Sespie 
1502*c87b03e5Sespie     case '^':
1503*c87b03e5Sespie       if ((c = java_get_unicode ()) == '=')
1504*c87b03e5Sespie 	{
1505*c87b03e5Sespie 	  BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1506*c87b03e5Sespie 	}
1507*c87b03e5Sespie       else
1508*c87b03e5Sespie 	{
1509*c87b03e5Sespie 	  java_unget_unicode ();
1510*c87b03e5Sespie 	  BUILD_OPERATOR (XOR_TK);
1511*c87b03e5Sespie 	}
1512*c87b03e5Sespie 
1513*c87b03e5Sespie     case '%':
1514*c87b03e5Sespie       if ((c = java_get_unicode ()) == '=')
1515*c87b03e5Sespie 	{
1516*c87b03e5Sespie 	  BUILD_OPERATOR2 (REM_ASSIGN_TK);
1517*c87b03e5Sespie 	}
1518*c87b03e5Sespie       else
1519*c87b03e5Sespie 	{
1520*c87b03e5Sespie 	  java_unget_unicode ();
1521*c87b03e5Sespie 	  BUILD_OPERATOR (REM_TK);
1522*c87b03e5Sespie 	}
1523*c87b03e5Sespie 
1524*c87b03e5Sespie     case '!':
1525*c87b03e5Sespie       if ((c = java_get_unicode()) == '=')
1526*c87b03e5Sespie 	{
1527*c87b03e5Sespie 	  BUILD_OPERATOR (NEQ_TK);
1528*c87b03e5Sespie 	}
1529*c87b03e5Sespie       else
1530*c87b03e5Sespie 	{
1531*c87b03e5Sespie 	  java_unget_unicode ();
1532*c87b03e5Sespie 	  BUILD_OPERATOR (NEG_TK);
1533*c87b03e5Sespie 	}
1534*c87b03e5Sespie 
1535*c87b03e5Sespie     case '?':
1536*c87b03e5Sespie       JAVA_LEX_OP ("?");
1537*c87b03e5Sespie       BUILD_OPERATOR (REL_QM_TK);
1538*c87b03e5Sespie     case ':':
1539*c87b03e5Sespie       JAVA_LEX_OP (":");
1540*c87b03e5Sespie       BUILD_OPERATOR (REL_CL_TK);
1541*c87b03e5Sespie     case '~':
1542*c87b03e5Sespie       BUILD_OPERATOR (NOT_TK);
1543*c87b03e5Sespie     }
1544*c87b03e5Sespie 
1545*c87b03e5Sespie   /* Keyword, boolean literal or null literal.  */
1546*c87b03e5Sespie   for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1547*c87b03e5Sespie        c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1548*c87b03e5Sespie     {
1549*c87b03e5Sespie       java_unicode_2_utf8 (c);
1550*c87b03e5Sespie       if (all_ascii && c >= 128)
1551*c87b03e5Sespie         all_ascii = 0;
1552*c87b03e5Sespie       ascii_index++;
1553*c87b03e5Sespie     }
1554*c87b03e5Sespie 
1555*c87b03e5Sespie   obstack_1grow (&temporary_obstack, '\0');
1556*c87b03e5Sespie   string = obstack_finish (&temporary_obstack);
1557*c87b03e5Sespie   if (c != UEOF)
1558*c87b03e5Sespie     java_unget_unicode ();
1559*c87b03e5Sespie 
1560*c87b03e5Sespie   /* If we have something all ascii, we consider a keyword, a boolean
1561*c87b03e5Sespie      literal, a null literal or an all ASCII identifier.  Otherwise,
1562*c87b03e5Sespie      this is an identifier (possibly not respecting formation rule).  */
1563*c87b03e5Sespie   if (all_ascii)
1564*c87b03e5Sespie     {
1565*c87b03e5Sespie       const struct java_keyword *kw;
1566*c87b03e5Sespie       if ((kw=java_keyword (string, ascii_index)))
1567*c87b03e5Sespie 	{
1568*c87b03e5Sespie 	  JAVA_LEX_KW (string);
1569*c87b03e5Sespie 	  switch (kw->token)
1570*c87b03e5Sespie 	    {
1571*c87b03e5Sespie 	    case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
1572*c87b03e5Sespie 	    case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
1573*c87b03e5Sespie 	    case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1574*c87b03e5Sespie 	    case PRIVATE_TK:      case STRICT_TK:
1575*c87b03e5Sespie 	      SET_MODIFIER_CTX (kw->token);
1576*c87b03e5Sespie 	      return MODIFIER_TK;
1577*c87b03e5Sespie 	    case FLOAT_TK:
1578*c87b03e5Sespie 	      SET_LVAL_NODE (float_type_node);
1579*c87b03e5Sespie 	      return FP_TK;
1580*c87b03e5Sespie 	    case DOUBLE_TK:
1581*c87b03e5Sespie 	      SET_LVAL_NODE (double_type_node);
1582*c87b03e5Sespie 	      return FP_TK;
1583*c87b03e5Sespie 	    case BOOLEAN_TK:
1584*c87b03e5Sespie 	      SET_LVAL_NODE (boolean_type_node);
1585*c87b03e5Sespie 	      return BOOLEAN_TK;
1586*c87b03e5Sespie 	    case BYTE_TK:
1587*c87b03e5Sespie 	      SET_LVAL_NODE (byte_type_node);
1588*c87b03e5Sespie 	      return INTEGRAL_TK;
1589*c87b03e5Sespie 	    case SHORT_TK:
1590*c87b03e5Sespie 	      SET_LVAL_NODE (short_type_node);
1591*c87b03e5Sespie 	      return INTEGRAL_TK;
1592*c87b03e5Sespie 	    case INT_TK:
1593*c87b03e5Sespie 	      SET_LVAL_NODE (int_type_node);
1594*c87b03e5Sespie 	      return INTEGRAL_TK;
1595*c87b03e5Sespie 	    case LONG_TK:
1596*c87b03e5Sespie 	      SET_LVAL_NODE (long_type_node);
1597*c87b03e5Sespie 	      return INTEGRAL_TK;
1598*c87b03e5Sespie 	    case CHAR_TK:
1599*c87b03e5Sespie 	      SET_LVAL_NODE (char_type_node);
1600*c87b03e5Sespie 	      return INTEGRAL_TK;
1601*c87b03e5Sespie 
1602*c87b03e5Sespie 	      /* Keyword based literals.  */
1603*c87b03e5Sespie 	    case TRUE_TK:
1604*c87b03e5Sespie 	    case FALSE_TK:
1605*c87b03e5Sespie 	      SET_LVAL_NODE ((kw->token == TRUE_TK ?
1606*c87b03e5Sespie 			      boolean_true_node : boolean_false_node));
1607*c87b03e5Sespie 	      return BOOL_LIT_TK;
1608*c87b03e5Sespie 	    case NULL_TK:
1609*c87b03e5Sespie 	      SET_LVAL_NODE (null_pointer_node);
1610*c87b03e5Sespie 	      return NULL_TK;
1611*c87b03e5Sespie 
1612*c87b03e5Sespie 	    case ASSERT_TK:
1613*c87b03e5Sespie 	      if (flag_assert)
1614*c87b03e5Sespie 		{
1615*c87b03e5Sespie 		  BUILD_OPERATOR (kw->token);
1616*c87b03e5Sespie 		  return kw->token;
1617*c87b03e5Sespie 		}
1618*c87b03e5Sespie 	      else
1619*c87b03e5Sespie 		break;
1620*c87b03e5Sespie 
1621*c87b03e5Sespie 	      /* Some keyword we want to retain information on the location
1622*c87b03e5Sespie 		 they where found.  */
1623*c87b03e5Sespie 	    case CASE_TK:
1624*c87b03e5Sespie 	    case DEFAULT_TK:
1625*c87b03e5Sespie 	    case SUPER_TK:
1626*c87b03e5Sespie 	    case THIS_TK:
1627*c87b03e5Sespie 	    case RETURN_TK:
1628*c87b03e5Sespie 	    case BREAK_TK:
1629*c87b03e5Sespie 	    case CONTINUE_TK:
1630*c87b03e5Sespie 	    case TRY_TK:
1631*c87b03e5Sespie 	    case CATCH_TK:
1632*c87b03e5Sespie 	    case THROW_TK:
1633*c87b03e5Sespie 	    case INSTANCEOF_TK:
1634*c87b03e5Sespie 	      BUILD_OPERATOR (kw->token);
1635*c87b03e5Sespie 
1636*c87b03e5Sespie 	    default:
1637*c87b03e5Sespie 	      return kw->token;
1638*c87b03e5Sespie 	    }
1639*c87b03e5Sespie 	}
1640*c87b03e5Sespie     }
1641*c87b03e5Sespie 
1642*c87b03e5Sespie   /* We may have an ID here.  */
1643*c87b03e5Sespie   if (JAVA_START_CHAR_P (first_unicode))
1644*c87b03e5Sespie     {
1645*c87b03e5Sespie       JAVA_LEX_ID (string);
1646*c87b03e5Sespie       java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1647*c87b03e5Sespie       return ID_TK;
1648*c87b03e5Sespie     }
1649*c87b03e5Sespie 
1650*c87b03e5Sespie   /* Everything else is an invalid character in the input.  */
1651*c87b03e5Sespie   {
1652*c87b03e5Sespie     char lex_error_buffer [128];
1653*c87b03e5Sespie     sprintf (lex_error_buffer, "Invalid character `%s' in input",
1654*c87b03e5Sespie 	     java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1655*c87b03e5Sespie     java_lex_error (lex_error_buffer, 1);
1656*c87b03e5Sespie   }
1657*c87b03e5Sespie   return 0;
1658*c87b03e5Sespie }
1659*c87b03e5Sespie 
1660*c87b03e5Sespie #ifndef JC1_LITE
1661*c87b03e5Sespie /* This is called by the parser to see if an error should be generated
1662*c87b03e5Sespie    due to numeric overflow.  This function only handles the particular
1663*c87b03e5Sespie    case of the largest negative value, and is only called in the case
1664*c87b03e5Sespie    where this value is not preceded by `-'.  */
1665*c87b03e5Sespie static void
error_if_numeric_overflow(value)1666*c87b03e5Sespie error_if_numeric_overflow (value)
1667*c87b03e5Sespie      tree value;
1668*c87b03e5Sespie {
1669*c87b03e5Sespie   if (TREE_CODE (value) == INTEGER_CST
1670*c87b03e5Sespie       && JAVA_RADIX10_FLAG (value)
1671*c87b03e5Sespie       && tree_int_cst_sgn (value) < 0)
1672*c87b03e5Sespie     {
1673*c87b03e5Sespie       if (TREE_TYPE (value) == long_type_node)
1674*c87b03e5Sespie 	java_lex_error ("Numeric overflow for `long' literal", 0);
1675*c87b03e5Sespie       else
1676*c87b03e5Sespie 	java_lex_error ("Numeric overflow for `int' literal", 0);
1677*c87b03e5Sespie     }
1678*c87b03e5Sespie }
1679*c87b03e5Sespie #endif /* JC1_LITE */
1680*c87b03e5Sespie 
1681*c87b03e5Sespie static void
java_unicode_2_utf8(unicode)1682*c87b03e5Sespie java_unicode_2_utf8 (unicode)
1683*c87b03e5Sespie     unicode_t unicode;
1684*c87b03e5Sespie {
1685*c87b03e5Sespie   if (RANGE (unicode, 0x01, 0x7f))
1686*c87b03e5Sespie     obstack_1grow (&temporary_obstack, (char)unicode);
1687*c87b03e5Sespie   else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1688*c87b03e5Sespie     {
1689*c87b03e5Sespie       obstack_1grow (&temporary_obstack,
1690*c87b03e5Sespie 		     (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1691*c87b03e5Sespie       obstack_1grow (&temporary_obstack,
1692*c87b03e5Sespie 		     (unsigned char)(0x80 | (unicode & 0x3f)));
1693*c87b03e5Sespie     }
1694*c87b03e5Sespie   else				/* Range 0x800-0xffff.  */
1695*c87b03e5Sespie     {
1696*c87b03e5Sespie       obstack_1grow (&temporary_obstack,
1697*c87b03e5Sespie 		     (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1698*c87b03e5Sespie       obstack_1grow (&temporary_obstack,
1699*c87b03e5Sespie 		     (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1700*c87b03e5Sespie       obstack_1grow (&temporary_obstack,
1701*c87b03e5Sespie 		     (unsigned char)(0x80 | (unicode & 0x003f)));
1702*c87b03e5Sespie     }
1703*c87b03e5Sespie }
1704*c87b03e5Sespie 
1705*c87b03e5Sespie #ifndef JC1_LITE
1706*c87b03e5Sespie static tree
build_wfl_node(node)1707*c87b03e5Sespie build_wfl_node (node)
1708*c87b03e5Sespie      tree node;
1709*c87b03e5Sespie {
1710*c87b03e5Sespie   node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1711*c87b03e5Sespie   /* Prevent java_complete_lhs from short-circuiting node (if constant).  */
1712*c87b03e5Sespie   TREE_TYPE (node) = NULL_TREE;
1713*c87b03e5Sespie   return node;
1714*c87b03e5Sespie }
1715*c87b03e5Sespie #endif
1716*c87b03e5Sespie 
1717*c87b03e5Sespie static void
java_lex_error(msg,forward)1718*c87b03e5Sespie java_lex_error (msg, forward)
1719*c87b03e5Sespie      const char *msg ATTRIBUTE_UNUSED;
1720*c87b03e5Sespie      int forward ATTRIBUTE_UNUSED;
1721*c87b03e5Sespie {
1722*c87b03e5Sespie #ifndef JC1_LITE
1723*c87b03e5Sespie   ctxp->elc.line = ctxp->c_line->lineno;
1724*c87b03e5Sespie   ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1725*c87b03e5Sespie 
1726*c87b03e5Sespie   /* Might be caught in the middle of some error report.  */
1727*c87b03e5Sespie   ctxp->java_error_flag = 0;
1728*c87b03e5Sespie   java_error (NULL);
1729*c87b03e5Sespie   java_error (msg);
1730*c87b03e5Sespie #endif
1731*c87b03e5Sespie }
1732*c87b03e5Sespie 
1733*c87b03e5Sespie #ifndef JC1_LITE
1734*c87b03e5Sespie static int
java_is_eol(fp,c)1735*c87b03e5Sespie java_is_eol (fp, c)
1736*c87b03e5Sespie   FILE *fp;
1737*c87b03e5Sespie   int c;
1738*c87b03e5Sespie {
1739*c87b03e5Sespie   int next;
1740*c87b03e5Sespie   switch (c)
1741*c87b03e5Sespie     {
1742*c87b03e5Sespie     case '\r':
1743*c87b03e5Sespie       next = getc (fp);
1744*c87b03e5Sespie       if (next != '\n' && next != EOF)
1745*c87b03e5Sespie 	ungetc (next, fp);
1746*c87b03e5Sespie       return 1;
1747*c87b03e5Sespie     case '\n':
1748*c87b03e5Sespie       return 1;
1749*c87b03e5Sespie     default:
1750*c87b03e5Sespie       return 0;
1751*c87b03e5Sespie     }
1752*c87b03e5Sespie }
1753*c87b03e5Sespie #endif
1754*c87b03e5Sespie 
1755*c87b03e5Sespie char *
java_get_line_col(filename,line,col)1756*c87b03e5Sespie java_get_line_col (filename, line, col)
1757*c87b03e5Sespie      const char *filename ATTRIBUTE_UNUSED;
1758*c87b03e5Sespie      int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1759*c87b03e5Sespie {
1760*c87b03e5Sespie #ifdef JC1_LITE
1761*c87b03e5Sespie   return 0;
1762*c87b03e5Sespie #else
1763*c87b03e5Sespie   /* Dumb implementation. Doesn't try to cache or optimize things.  */
1764*c87b03e5Sespie   /* First line of the file is line 1, first column is 1.  */
1765*c87b03e5Sespie 
1766*c87b03e5Sespie   /* COL == -1 means, at the CR/LF in LINE.  */
1767*c87b03e5Sespie   /* COL == -2 means, at the first non space char in LINE.  */
1768*c87b03e5Sespie 
1769*c87b03e5Sespie   FILE *fp;
1770*c87b03e5Sespie   int c, ccol, cline = 1;
1771*c87b03e5Sespie   int current_line_col = 0;
1772*c87b03e5Sespie   int first_non_space = 0;
1773*c87b03e5Sespie   char *base;
1774*c87b03e5Sespie 
1775*c87b03e5Sespie   if (!(fp = fopen (filename, "r")))
1776*c87b03e5Sespie     fatal_io_error ("can't open %s", filename);
1777*c87b03e5Sespie 
1778*c87b03e5Sespie   while (cline != line)
1779*c87b03e5Sespie     {
1780*c87b03e5Sespie       c = getc (fp);
1781*c87b03e5Sespie       if (c == EOF)
1782*c87b03e5Sespie 	{
1783*c87b03e5Sespie 	  static const char msg[] = "<<file too short - unexpected EOF>>";
1784*c87b03e5Sespie 	  obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1785*c87b03e5Sespie 	  goto have_line;
1786*c87b03e5Sespie 	}
1787*c87b03e5Sespie       if (java_is_eol (fp, c))
1788*c87b03e5Sespie 	cline++;
1789*c87b03e5Sespie     }
1790*c87b03e5Sespie 
1791*c87b03e5Sespie   /* Gather the chars of the current line in a buffer.  */
1792*c87b03e5Sespie   for (;;)
1793*c87b03e5Sespie     {
1794*c87b03e5Sespie       c = getc (fp);
1795*c87b03e5Sespie       if (c < 0 || java_is_eol (fp, c))
1796*c87b03e5Sespie 	break;
1797*c87b03e5Sespie       if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1798*c87b03e5Sespie 	first_non_space = current_line_col;
1799*c87b03e5Sespie       obstack_1grow (&temporary_obstack, c);
1800*c87b03e5Sespie       current_line_col++;
1801*c87b03e5Sespie     }
1802*c87b03e5Sespie  have_line:
1803*c87b03e5Sespie 
1804*c87b03e5Sespie   obstack_1grow (&temporary_obstack, '\n');
1805*c87b03e5Sespie 
1806*c87b03e5Sespie   if (col == -1)
1807*c87b03e5Sespie     {
1808*c87b03e5Sespie       col = current_line_col;
1809*c87b03e5Sespie       first_non_space = 0;
1810*c87b03e5Sespie     }
1811*c87b03e5Sespie   else if (col == -2)
1812*c87b03e5Sespie     col = first_non_space;
1813*c87b03e5Sespie   else
1814*c87b03e5Sespie     first_non_space = 0;
1815*c87b03e5Sespie 
1816*c87b03e5Sespie   /* Place the '^' a the right position.  */
1817*c87b03e5Sespie   base = obstack_base (&temporary_obstack);
1818*c87b03e5Sespie   for (ccol = 1; ccol <= col+3; ccol++)
1819*c87b03e5Sespie     {
1820*c87b03e5Sespie       /* Compute \t when reaching first_non_space.  */
1821*c87b03e5Sespie       char c = (first_non_space ?
1822*c87b03e5Sespie 		(base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1823*c87b03e5Sespie       obstack_1grow (&temporary_obstack, c);
1824*c87b03e5Sespie     }
1825*c87b03e5Sespie   obstack_grow0 (&temporary_obstack, "^", 1);
1826*c87b03e5Sespie 
1827*c87b03e5Sespie   fclose (fp);
1828*c87b03e5Sespie   return obstack_finish (&temporary_obstack);
1829*c87b03e5Sespie #endif
1830*c87b03e5Sespie }
1831*c87b03e5Sespie 
1832*c87b03e5Sespie #ifndef JC1_LITE
1833*c87b03e5Sespie static int
utf8_cmp(str,length,name)1834*c87b03e5Sespie utf8_cmp (str, length, name)
1835*c87b03e5Sespie      const unsigned char *str;
1836*c87b03e5Sespie      int length;
1837*c87b03e5Sespie      const char *name;
1838*c87b03e5Sespie {
1839*c87b03e5Sespie   const unsigned char *limit = str + length;
1840*c87b03e5Sespie   int i;
1841*c87b03e5Sespie 
1842*c87b03e5Sespie   for (i = 0; name[i]; ++i)
1843*c87b03e5Sespie     {
1844*c87b03e5Sespie       int ch = UTF8_GET (str, limit);
1845*c87b03e5Sespie       if (ch != name[i])
1846*c87b03e5Sespie 	return ch - name[i];
1847*c87b03e5Sespie     }
1848*c87b03e5Sespie 
1849*c87b03e5Sespie   return str == limit ? 0 : 1;
1850*c87b03e5Sespie }
1851*c87b03e5Sespie 
1852*c87b03e5Sespie /* A sorted list of all C++ keywords.  */
1853*c87b03e5Sespie 
1854*c87b03e5Sespie static const char *const cxx_keywords[] =
1855*c87b03e5Sespie {
1856*c87b03e5Sespie   "_Complex",
1857*c87b03e5Sespie   "__alignof",
1858*c87b03e5Sespie   "__alignof__",
1859*c87b03e5Sespie   "__asm",
1860*c87b03e5Sespie   "__asm__",
1861*c87b03e5Sespie   "__attribute",
1862*c87b03e5Sespie   "__attribute__",
1863*c87b03e5Sespie   "__builtin_va_arg",
1864*c87b03e5Sespie   "__complex",
1865*c87b03e5Sespie   "__complex__",
1866*c87b03e5Sespie   "__const",
1867*c87b03e5Sespie   "__const__",
1868*c87b03e5Sespie   "__extension__",
1869*c87b03e5Sespie   "__imag",
1870*c87b03e5Sespie   "__imag__",
1871*c87b03e5Sespie   "__inline",
1872*c87b03e5Sespie   "__inline__",
1873*c87b03e5Sespie   "__label__",
1874*c87b03e5Sespie   "__null",
1875*c87b03e5Sespie   "__real",
1876*c87b03e5Sespie   "__real__",
1877*c87b03e5Sespie   "__restrict",
1878*c87b03e5Sespie   "__restrict__",
1879*c87b03e5Sespie   "__signed",
1880*c87b03e5Sespie   "__signed__",
1881*c87b03e5Sespie   "__typeof",
1882*c87b03e5Sespie   "__typeof__",
1883*c87b03e5Sespie   "__volatile",
1884*c87b03e5Sespie   "__volatile__",
1885*c87b03e5Sespie   "and",
1886*c87b03e5Sespie   "and_eq",
1887*c87b03e5Sespie   "asm",
1888*c87b03e5Sespie   "auto",
1889*c87b03e5Sespie   "bitand",
1890*c87b03e5Sespie   "bitor",
1891*c87b03e5Sespie   "bool",
1892*c87b03e5Sespie   "break",
1893*c87b03e5Sespie   "case",
1894*c87b03e5Sespie   "catch",
1895*c87b03e5Sespie   "char",
1896*c87b03e5Sespie   "class",
1897*c87b03e5Sespie   "compl",
1898*c87b03e5Sespie   "const",
1899*c87b03e5Sespie   "const_cast",
1900*c87b03e5Sespie   "continue",
1901*c87b03e5Sespie   "default",
1902*c87b03e5Sespie   "delete",
1903*c87b03e5Sespie   "do",
1904*c87b03e5Sespie   "double",
1905*c87b03e5Sespie   "dynamic_cast",
1906*c87b03e5Sespie   "else",
1907*c87b03e5Sespie   "enum",
1908*c87b03e5Sespie   "explicit",
1909*c87b03e5Sespie   "export",
1910*c87b03e5Sespie   "extern",
1911*c87b03e5Sespie   "false",
1912*c87b03e5Sespie   "float",
1913*c87b03e5Sespie   "for",
1914*c87b03e5Sespie   "friend",
1915*c87b03e5Sespie   "goto",
1916*c87b03e5Sespie   "if",
1917*c87b03e5Sespie   "inline",
1918*c87b03e5Sespie   "int",
1919*c87b03e5Sespie   "long",
1920*c87b03e5Sespie   "mutable",
1921*c87b03e5Sespie   "namespace",
1922*c87b03e5Sespie   "new",
1923*c87b03e5Sespie   "not",
1924*c87b03e5Sespie   "not_eq",
1925*c87b03e5Sespie   "operator",
1926*c87b03e5Sespie   "or",
1927*c87b03e5Sespie   "or_eq",
1928*c87b03e5Sespie   "private",
1929*c87b03e5Sespie   "protected",
1930*c87b03e5Sespie   "public",
1931*c87b03e5Sespie   "register",
1932*c87b03e5Sespie   "reinterpret_cast",
1933*c87b03e5Sespie   "return",
1934*c87b03e5Sespie   "short",
1935*c87b03e5Sespie   "signed",
1936*c87b03e5Sespie   "sizeof",
1937*c87b03e5Sespie   "static",
1938*c87b03e5Sespie   "static_cast",
1939*c87b03e5Sespie   "struct",
1940*c87b03e5Sespie   "switch",
1941*c87b03e5Sespie   "template",
1942*c87b03e5Sespie   "this",
1943*c87b03e5Sespie   "throw",
1944*c87b03e5Sespie   "true",
1945*c87b03e5Sespie   "try",
1946*c87b03e5Sespie   "typedef",
1947*c87b03e5Sespie   "typeid",
1948*c87b03e5Sespie   "typename",
1949*c87b03e5Sespie   "typeof",
1950*c87b03e5Sespie   "union",
1951*c87b03e5Sespie   "unsigned",
1952*c87b03e5Sespie   "using",
1953*c87b03e5Sespie   "virtual",
1954*c87b03e5Sespie   "void",
1955*c87b03e5Sespie   "volatile",
1956*c87b03e5Sespie   "wchar_t",
1957*c87b03e5Sespie   "while",
1958*c87b03e5Sespie   "xor",
1959*c87b03e5Sespie   "xor_eq"
1960*c87b03e5Sespie };
1961*c87b03e5Sespie 
1962*c87b03e5Sespie /* Return true if NAME is a C++ keyword.  */
1963*c87b03e5Sespie 
1964*c87b03e5Sespie int
cxx_keyword_p(name,length)1965*c87b03e5Sespie cxx_keyword_p (name, length)
1966*c87b03e5Sespie      const char *name;
1967*c87b03e5Sespie      int length;
1968*c87b03e5Sespie {
1969*c87b03e5Sespie   int last = ARRAY_SIZE (cxx_keywords);
1970*c87b03e5Sespie   int first = 0;
1971*c87b03e5Sespie   int mid = (last + first) / 2;
1972*c87b03e5Sespie   int old = -1;
1973*c87b03e5Sespie 
1974*c87b03e5Sespie   for (mid = (last + first) / 2;
1975*c87b03e5Sespie        mid != old;
1976*c87b03e5Sespie        old = mid, mid = (last + first) / 2)
1977*c87b03e5Sespie     {
1978*c87b03e5Sespie       int kwl = strlen (cxx_keywords[mid]);
1979*c87b03e5Sespie       int min_length = kwl > length ? length : kwl;
1980*c87b03e5Sespie       int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1981*c87b03e5Sespie 
1982*c87b03e5Sespie       if (r == 0)
1983*c87b03e5Sespie 	{
1984*c87b03e5Sespie 	  int i;
1985*c87b03e5Sespie 	  /* We've found a match if all the remaining characters are `$'.  */
1986*c87b03e5Sespie 	  for (i = min_length; i < length && name[i] == '$'; ++i)
1987*c87b03e5Sespie 	    ;
1988*c87b03e5Sespie 	  if (i == length)
1989*c87b03e5Sespie 	    return 1;
1990*c87b03e5Sespie 	  r = 1;
1991*c87b03e5Sespie 	}
1992*c87b03e5Sespie 
1993*c87b03e5Sespie       if (r < 0)
1994*c87b03e5Sespie 	last = mid;
1995*c87b03e5Sespie       else
1996*c87b03e5Sespie 	first = mid;
1997*c87b03e5Sespie     }
1998*c87b03e5Sespie   return 0;
1999*c87b03e5Sespie }
2000*c87b03e5Sespie #endif /* JC1_LITE */
2001