1*c87b03e5Sespie /* Language lexer for the GNU compiler for the Java(TM) language.
2*c87b03e5Sespie Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3*c87b03e5Sespie Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
4*c87b03e5Sespie
5*c87b03e5Sespie This file is part of GNU CC.
6*c87b03e5Sespie
7*c87b03e5Sespie GNU CC is free software; you can redistribute it and/or modify
8*c87b03e5Sespie it under the terms of the GNU General Public License as published by
9*c87b03e5Sespie the Free Software Foundation; either version 2, or (at your option)
10*c87b03e5Sespie any later version.
11*c87b03e5Sespie
12*c87b03e5Sespie GNU CC is distributed in the hope that it will be useful,
13*c87b03e5Sespie but WITHOUT ANY WARRANTY; without even the implied warranty of
14*c87b03e5Sespie MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15*c87b03e5Sespie GNU General Public License for more details.
16*c87b03e5Sespie
17*c87b03e5Sespie You should have received a copy of the GNU General Public License
18*c87b03e5Sespie along with GNU CC; see the file COPYING. If not, write to
19*c87b03e5Sespie the Free Software Foundation, 59 Temple Place - Suite 330,
20*c87b03e5Sespie Boston, MA 02111-1307, USA.
21*c87b03e5Sespie
22*c87b03e5Sespie Java and all Java-based marks are trademarks or registered trademarks
23*c87b03e5Sespie of Sun Microsystems, Inc. in the United States and other countries.
24*c87b03e5Sespie The Free Software Foundation is independent of Sun Microsystems, Inc. */
25*c87b03e5Sespie
26*c87b03e5Sespie /* It defines java_lex (yylex) that reads a Java ASCII source file
27*c87b03e5Sespie possibly containing Unicode escape sequence or utf8 encoded
28*c87b03e5Sespie characters and returns a token for everything found but comments,
29*c87b03e5Sespie white spaces and line terminators. When necessary, it also fills
30*c87b03e5Sespie the java_lval (yylval) union. It's implemented to be called by a
31*c87b03e5Sespie re-entrant parser generated by Bison.
32*c87b03e5Sespie
33*c87b03e5Sespie The lexical analysis conforms to the Java grammar described in "The
34*c87b03e5Sespie Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35*c87b03e5Sespie Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
36*c87b03e5Sespie
37*c87b03e5Sespie #include "keyword.h"
38*c87b03e5Sespie #include "flags.h"
39*c87b03e5Sespie #include "chartables.h"
40*c87b03e5Sespie
41*c87b03e5Sespie /* Function declarations. */
42*c87b03e5Sespie static char *java_sprint_unicode PARAMS ((struct java_line *, int));
43*c87b03e5Sespie static void java_unicode_2_utf8 PARAMS ((unicode_t));
44*c87b03e5Sespie static void java_lex_error PARAMS ((const char *, int));
45*c87b03e5Sespie #ifndef JC1_LITE
46*c87b03e5Sespie static int java_is_eol PARAMS ((FILE *, int));
47*c87b03e5Sespie static tree build_wfl_node PARAMS ((tree));
48*c87b03e5Sespie #endif
49*c87b03e5Sespie static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
50*c87b03e5Sespie static int java_parse_escape_sequence PARAMS ((void));
51*c87b03e5Sespie static int java_start_char_p PARAMS ((unicode_t));
52*c87b03e5Sespie static int java_part_char_p PARAMS ((unicode_t));
53*c87b03e5Sespie static int java_parse_doc_section PARAMS ((int));
54*c87b03e5Sespie static void java_parse_end_comment PARAMS ((int));
55*c87b03e5Sespie static int java_get_unicode PARAMS ((void));
56*c87b03e5Sespie static int java_read_unicode PARAMS ((java_lexer *, int *));
57*c87b03e5Sespie static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
58*c87b03e5Sespie int *));
59*c87b03e5Sespie static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60*c87b03e5Sespie static int java_read_char PARAMS ((java_lexer *));
61*c87b03e5Sespie static void java_allocate_new_line PARAMS ((void));
62*c87b03e5Sespie static void java_unget_unicode PARAMS ((void));
63*c87b03e5Sespie static unicode_t java_sneak_unicode PARAMS ((void));
64*c87b03e5Sespie #ifndef JC1_LITE
65*c87b03e5Sespie static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
66*c87b03e5Sespie #endif
67*c87b03e5Sespie
68*c87b03e5Sespie java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
69*c87b03e5Sespie #ifndef JC1_LITE
70*c87b03e5Sespie static void error_if_numeric_overflow PARAMS ((tree));
71*c87b03e5Sespie #endif
72*c87b03e5Sespie
73*c87b03e5Sespie #ifdef HAVE_ICONV
74*c87b03e5Sespie /* This is nonzero if we have initialized `need_byteswap'. */
75*c87b03e5Sespie static int byteswap_init = 0;
76*c87b03e5Sespie
77*c87b03e5Sespie /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
78*c87b03e5Sespie big-endian order -- not native endian order. We handle this by
79*c87b03e5Sespie doing a conversion once at startup and seeing what happens. This
80*c87b03e5Sespie flag holds the results of this determination. */
81*c87b03e5Sespie static int need_byteswap = 0;
82*c87b03e5Sespie #endif
83*c87b03e5Sespie
84*c87b03e5Sespie void
java_init_lex(finput,encoding)85*c87b03e5Sespie java_init_lex (finput, encoding)
86*c87b03e5Sespie FILE *finput;
87*c87b03e5Sespie const char *encoding;
88*c87b03e5Sespie {
89*c87b03e5Sespie #ifndef JC1_LITE
90*c87b03e5Sespie int java_lang_imported = 0;
91*c87b03e5Sespie
92*c87b03e5Sespie if (!java_lang_id)
93*c87b03e5Sespie java_lang_id = get_identifier ("java.lang");
94*c87b03e5Sespie if (!inst_id)
95*c87b03e5Sespie inst_id = get_identifier ("inst$");
96*c87b03e5Sespie if (!wpv_id)
97*c87b03e5Sespie wpv_id = get_identifier ("write_parm_value$");
98*c87b03e5Sespie
99*c87b03e5Sespie if (!java_lang_imported)
100*c87b03e5Sespie {
101*c87b03e5Sespie tree node = build_tree_list
102*c87b03e5Sespie (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
103*c87b03e5Sespie read_import_dir (TREE_PURPOSE (node));
104*c87b03e5Sespie TREE_CHAIN (node) = ctxp->import_demand_list;
105*c87b03e5Sespie ctxp->import_demand_list = node;
106*c87b03e5Sespie java_lang_imported = 1;
107*c87b03e5Sespie }
108*c87b03e5Sespie
109*c87b03e5Sespie if (!wfl_operator)
110*c87b03e5Sespie wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
111*c87b03e5Sespie if (!label_id)
112*c87b03e5Sespie label_id = get_identifier ("$L");
113*c87b03e5Sespie if (!wfl_append)
114*c87b03e5Sespie wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
115*c87b03e5Sespie if (!wfl_string_buffer)
116*c87b03e5Sespie wfl_string_buffer =
117*c87b03e5Sespie build_expr_wfl (get_identifier (flag_emit_class_files
118*c87b03e5Sespie ? "java.lang.StringBuffer"
119*c87b03e5Sespie : "gnu.gcj.runtime.StringBuffer"),
120*c87b03e5Sespie NULL, 0, 0);
121*c87b03e5Sespie if (!wfl_to_string)
122*c87b03e5Sespie wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
123*c87b03e5Sespie
124*c87b03e5Sespie CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
125*c87b03e5Sespie CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
126*c87b03e5Sespie
127*c87b03e5Sespie memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
128*c87b03e5Sespie memset (current_jcf, 0, sizeof (JCF));
129*c87b03e5Sespie ctxp->current_parsed_class = NULL;
130*c87b03e5Sespie ctxp->package = NULL_TREE;
131*c87b03e5Sespie #endif
132*c87b03e5Sespie
133*c87b03e5Sespie ctxp->filename = input_filename;
134*c87b03e5Sespie ctxp->lineno = lineno = 0;
135*c87b03e5Sespie ctxp->p_line = NULL;
136*c87b03e5Sespie ctxp->c_line = NULL;
137*c87b03e5Sespie ctxp->java_error_flag = 0;
138*c87b03e5Sespie ctxp->lexer = java_new_lexer (finput, encoding);
139*c87b03e5Sespie }
140*c87b03e5Sespie
141*c87b03e5Sespie static char *
java_sprint_unicode(line,i)142*c87b03e5Sespie java_sprint_unicode (line, i)
143*c87b03e5Sespie struct java_line *line;
144*c87b03e5Sespie int i;
145*c87b03e5Sespie {
146*c87b03e5Sespie static char buffer [10];
147*c87b03e5Sespie if (line->unicode_escape_p [i] || line->line [i] > 128)
148*c87b03e5Sespie sprintf (buffer, "\\u%04x", line->line [i]);
149*c87b03e5Sespie else
150*c87b03e5Sespie {
151*c87b03e5Sespie buffer [0] = line->line [i];
152*c87b03e5Sespie buffer [1] = '\0';
153*c87b03e5Sespie }
154*c87b03e5Sespie return buffer;
155*c87b03e5Sespie }
156*c87b03e5Sespie
157*c87b03e5Sespie static unicode_t
java_sneak_unicode()158*c87b03e5Sespie java_sneak_unicode ()
159*c87b03e5Sespie {
160*c87b03e5Sespie return (ctxp->c_line->line [ctxp->c_line->current]);
161*c87b03e5Sespie }
162*c87b03e5Sespie
163*c87b03e5Sespie static void
java_unget_unicode()164*c87b03e5Sespie java_unget_unicode ()
165*c87b03e5Sespie {
166*c87b03e5Sespie if (!ctxp->c_line->current)
167*c87b03e5Sespie /* Can't unget unicode. */
168*c87b03e5Sespie abort ();
169*c87b03e5Sespie
170*c87b03e5Sespie ctxp->c_line->current--;
171*c87b03e5Sespie ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
172*c87b03e5Sespie }
173*c87b03e5Sespie
174*c87b03e5Sespie static void
java_allocate_new_line()175*c87b03e5Sespie java_allocate_new_line ()
176*c87b03e5Sespie {
177*c87b03e5Sespie unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
178*c87b03e5Sespie char ahead_escape_p = (ctxp->c_line ?
179*c87b03e5Sespie ctxp->c_line->unicode_escape_ahead_p : 0);
180*c87b03e5Sespie
181*c87b03e5Sespie if (ctxp->c_line && !ctxp->c_line->white_space_only)
182*c87b03e5Sespie {
183*c87b03e5Sespie if (ctxp->p_line)
184*c87b03e5Sespie {
185*c87b03e5Sespie free (ctxp->p_line->unicode_escape_p);
186*c87b03e5Sespie free (ctxp->p_line->line);
187*c87b03e5Sespie free (ctxp->p_line);
188*c87b03e5Sespie }
189*c87b03e5Sespie ctxp->p_line = ctxp->c_line;
190*c87b03e5Sespie ctxp->c_line = NULL; /* Reallocated. */
191*c87b03e5Sespie }
192*c87b03e5Sespie
193*c87b03e5Sespie if (!ctxp->c_line)
194*c87b03e5Sespie {
195*c87b03e5Sespie ctxp->c_line = xmalloc (sizeof (struct java_line));
196*c87b03e5Sespie ctxp->c_line->max = JAVA_LINE_MAX;
197*c87b03e5Sespie ctxp->c_line->line = xmalloc (sizeof (unicode_t)*ctxp->c_line->max);
198*c87b03e5Sespie ctxp->c_line->unicode_escape_p =
199*c87b03e5Sespie xmalloc (sizeof (char)*ctxp->c_line->max);
200*c87b03e5Sespie ctxp->c_line->white_space_only = 0;
201*c87b03e5Sespie }
202*c87b03e5Sespie
203*c87b03e5Sespie ctxp->c_line->line [0] = ctxp->c_line->size = 0;
204*c87b03e5Sespie ctxp->c_line->char_col = ctxp->c_line->current = 0;
205*c87b03e5Sespie if (ahead)
206*c87b03e5Sespie {
207*c87b03e5Sespie ctxp->c_line->line [ctxp->c_line->size] = ahead;
208*c87b03e5Sespie ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
209*c87b03e5Sespie ctxp->c_line->size++;
210*c87b03e5Sespie }
211*c87b03e5Sespie ctxp->c_line->ahead [0] = 0;
212*c87b03e5Sespie ctxp->c_line->unicode_escape_ahead_p = 0;
213*c87b03e5Sespie ctxp->c_line->lineno = ++lineno;
214*c87b03e5Sespie ctxp->c_line->white_space_only = 1;
215*c87b03e5Sespie }
216*c87b03e5Sespie
217*c87b03e5Sespie /* Create a new lexer object. */
218*c87b03e5Sespie
219*c87b03e5Sespie java_lexer *
java_new_lexer(finput,encoding)220*c87b03e5Sespie java_new_lexer (finput, encoding)
221*c87b03e5Sespie FILE *finput;
222*c87b03e5Sespie const char *encoding;
223*c87b03e5Sespie {
224*c87b03e5Sespie java_lexer *lex = xmalloc (sizeof (java_lexer));
225*c87b03e5Sespie int enc_error = 0;
226*c87b03e5Sespie
227*c87b03e5Sespie lex->finput = finput;
228*c87b03e5Sespie lex->bs_count = 0;
229*c87b03e5Sespie lex->unget_value = 0;
230*c87b03e5Sespie lex->hit_eof = 0;
231*c87b03e5Sespie
232*c87b03e5Sespie #ifdef HAVE_ICONV
233*c87b03e5Sespie lex->handle = iconv_open ("UCS-2", encoding);
234*c87b03e5Sespie if (lex->handle != (iconv_t) -1)
235*c87b03e5Sespie {
236*c87b03e5Sespie lex->first = -1;
237*c87b03e5Sespie lex->last = -1;
238*c87b03e5Sespie lex->out_first = -1;
239*c87b03e5Sespie lex->out_last = -1;
240*c87b03e5Sespie lex->read_anything = 0;
241*c87b03e5Sespie lex->use_fallback = 0;
242*c87b03e5Sespie
243*c87b03e5Sespie /* Work around broken iconv() implementations by doing checking at
244*c87b03e5Sespie runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
245*c87b03e5Sespie then all UCS-2 encoders will be broken. Perhaps not a valid
246*c87b03e5Sespie assumption. */
247*c87b03e5Sespie if (! byteswap_init)
248*c87b03e5Sespie {
249*c87b03e5Sespie iconv_t handle;
250*c87b03e5Sespie
251*c87b03e5Sespie byteswap_init = 1;
252*c87b03e5Sespie
253*c87b03e5Sespie handle = iconv_open ("UCS-2", "UTF-8");
254*c87b03e5Sespie if (handle != (iconv_t) -1)
255*c87b03e5Sespie {
256*c87b03e5Sespie unicode_t result;
257*c87b03e5Sespie unsigned char in[3];
258*c87b03e5Sespie char *inp, *outp;
259*c87b03e5Sespie size_t inc, outc, r;
260*c87b03e5Sespie
261*c87b03e5Sespie /* This is the UTF-8 encoding of \ufeff. */
262*c87b03e5Sespie in[0] = 0xef;
263*c87b03e5Sespie in[1] = 0xbb;
264*c87b03e5Sespie in[2] = 0xbf;
265*c87b03e5Sespie
266*c87b03e5Sespie inp = in;
267*c87b03e5Sespie inc = 3;
268*c87b03e5Sespie outp = (char *) &result;
269*c87b03e5Sespie outc = 2;
270*c87b03e5Sespie
271*c87b03e5Sespie r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
272*c87b03e5Sespie &outp, &outc);
273*c87b03e5Sespie iconv_close (handle);
274*c87b03e5Sespie /* Conversion must be complete for us to use the result. */
275*c87b03e5Sespie if (r != (size_t) -1 && inc == 0 && outc == 0)
276*c87b03e5Sespie need_byteswap = (result != 0xfeff);
277*c87b03e5Sespie }
278*c87b03e5Sespie }
279*c87b03e5Sespie
280*c87b03e5Sespie lex->byte_swap = need_byteswap;
281*c87b03e5Sespie }
282*c87b03e5Sespie else
283*c87b03e5Sespie #endif /* HAVE_ICONV */
284*c87b03e5Sespie {
285*c87b03e5Sespie /* If iconv failed, use the internal decoder if the default
286*c87b03e5Sespie encoding was requested. This code is used on platforms where
287*c87b03e5Sespie iconv exists but is insufficient for our needs. For
288*c87b03e5Sespie instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
289*c87b03e5Sespie
290*c87b03e5Sespie On Solaris the default encoding, as returned by nl_langinfo(),
291*c87b03e5Sespie is `646' (aka ASCII), but the Solaris iconv_open() doesn't
292*c87b03e5Sespie understand that. We work around that by pretending
293*c87b03e5Sespie `646' to be the same as UTF-8. */
294*c87b03e5Sespie if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
295*c87b03e5Sespie enc_error = 1;
296*c87b03e5Sespie #ifdef HAVE_ICONV
297*c87b03e5Sespie else
298*c87b03e5Sespie lex->use_fallback = 1;
299*c87b03e5Sespie #endif /* HAVE_ICONV */
300*c87b03e5Sespie }
301*c87b03e5Sespie
302*c87b03e5Sespie if (enc_error)
303*c87b03e5Sespie fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
304*c87b03e5Sespie
305*c87b03e5Sespie return lex;
306*c87b03e5Sespie }
307*c87b03e5Sespie
308*c87b03e5Sespie void
java_destroy_lexer(lex)309*c87b03e5Sespie java_destroy_lexer (lex)
310*c87b03e5Sespie java_lexer *lex;
311*c87b03e5Sespie {
312*c87b03e5Sespie #ifdef HAVE_ICONV
313*c87b03e5Sespie if (! lex->use_fallback)
314*c87b03e5Sespie iconv_close (lex->handle);
315*c87b03e5Sespie #endif
316*c87b03e5Sespie free (lex);
317*c87b03e5Sespie }
318*c87b03e5Sespie
319*c87b03e5Sespie static int
java_read_char(lex)320*c87b03e5Sespie java_read_char (lex)
321*c87b03e5Sespie java_lexer *lex;
322*c87b03e5Sespie {
323*c87b03e5Sespie if (lex->unget_value)
324*c87b03e5Sespie {
325*c87b03e5Sespie unicode_t r = lex->unget_value;
326*c87b03e5Sespie lex->unget_value = 0;
327*c87b03e5Sespie return r;
328*c87b03e5Sespie }
329*c87b03e5Sespie
330*c87b03e5Sespie #ifdef HAVE_ICONV
331*c87b03e5Sespie if (! lex->use_fallback)
332*c87b03e5Sespie {
333*c87b03e5Sespie size_t ir, inbytesleft, in_save, out_count, out_save;
334*c87b03e5Sespie char *inp, *outp;
335*c87b03e5Sespie unicode_t result;
336*c87b03e5Sespie
337*c87b03e5Sespie /* If there is data which has already been converted, use it. */
338*c87b03e5Sespie if (lex->out_first == -1 || lex->out_first >= lex->out_last)
339*c87b03e5Sespie {
340*c87b03e5Sespie lex->out_first = 0;
341*c87b03e5Sespie lex->out_last = 0;
342*c87b03e5Sespie
343*c87b03e5Sespie while (1)
344*c87b03e5Sespie {
345*c87b03e5Sespie /* See if we need to read more data. If FIRST == 0 then
346*c87b03e5Sespie the previous conversion attempt ended in the middle of
347*c87b03e5Sespie a character at the end of the buffer. Otherwise we
348*c87b03e5Sespie only have to read if the buffer is empty. */
349*c87b03e5Sespie if (lex->first == 0 || lex->first >= lex->last)
350*c87b03e5Sespie {
351*c87b03e5Sespie int r;
352*c87b03e5Sespie
353*c87b03e5Sespie if (lex->first >= lex->last)
354*c87b03e5Sespie {
355*c87b03e5Sespie lex->first = 0;
356*c87b03e5Sespie lex->last = 0;
357*c87b03e5Sespie }
358*c87b03e5Sespie if (feof (lex->finput))
359*c87b03e5Sespie return UEOF;
360*c87b03e5Sespie r = fread (&lex->buffer[lex->last], 1,
361*c87b03e5Sespie sizeof (lex->buffer) - lex->last,
362*c87b03e5Sespie lex->finput);
363*c87b03e5Sespie lex->last += r;
364*c87b03e5Sespie }
365*c87b03e5Sespie
366*c87b03e5Sespie inbytesleft = lex->last - lex->first;
367*c87b03e5Sespie out_count = sizeof (lex->out_buffer) - lex->out_last;
368*c87b03e5Sespie
369*c87b03e5Sespie if (inbytesleft == 0)
370*c87b03e5Sespie {
371*c87b03e5Sespie /* We've tried to read and there is nothing left. */
372*c87b03e5Sespie return UEOF;
373*c87b03e5Sespie }
374*c87b03e5Sespie
375*c87b03e5Sespie in_save = inbytesleft;
376*c87b03e5Sespie out_save = out_count;
377*c87b03e5Sespie inp = &lex->buffer[lex->first];
378*c87b03e5Sespie outp = &lex->out_buffer[lex->out_last];
379*c87b03e5Sespie ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
380*c87b03e5Sespie &inbytesleft, &outp, &out_count);
381*c87b03e5Sespie
382*c87b03e5Sespie /* If we haven't read any bytes, then look to see if we
383*c87b03e5Sespie have read a BOM. */
384*c87b03e5Sespie if (! lex->read_anything && out_save - out_count >= 2)
385*c87b03e5Sespie {
386*c87b03e5Sespie unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
387*c87b03e5Sespie if (uc == 0xfeff)
388*c87b03e5Sespie {
389*c87b03e5Sespie lex->byte_swap = 0;
390*c87b03e5Sespie lex->out_first += 2;
391*c87b03e5Sespie }
392*c87b03e5Sespie else if (uc == 0xfffe)
393*c87b03e5Sespie {
394*c87b03e5Sespie lex->byte_swap = 1;
395*c87b03e5Sespie lex->out_first += 2;
396*c87b03e5Sespie }
397*c87b03e5Sespie lex->read_anything = 1;
398*c87b03e5Sespie }
399*c87b03e5Sespie
400*c87b03e5Sespie if (lex->byte_swap)
401*c87b03e5Sespie {
402*c87b03e5Sespie unsigned int i;
403*c87b03e5Sespie for (i = 0; i < out_save - out_count; i += 2)
404*c87b03e5Sespie {
405*c87b03e5Sespie char t = lex->out_buffer[lex->out_last + i];
406*c87b03e5Sespie lex->out_buffer[lex->out_last + i]
407*c87b03e5Sespie = lex->out_buffer[lex->out_last + i + 1];
408*c87b03e5Sespie lex->out_buffer[lex->out_last + i + 1] = t;
409*c87b03e5Sespie }
410*c87b03e5Sespie }
411*c87b03e5Sespie
412*c87b03e5Sespie lex->first += in_save - inbytesleft;
413*c87b03e5Sespie lex->out_last += out_save - out_count;
414*c87b03e5Sespie
415*c87b03e5Sespie /* If we converted anything at all, move along. */
416*c87b03e5Sespie if (out_count != out_save)
417*c87b03e5Sespie break;
418*c87b03e5Sespie
419*c87b03e5Sespie if (ir == (size_t) -1)
420*c87b03e5Sespie {
421*c87b03e5Sespie if (errno == EINVAL)
422*c87b03e5Sespie {
423*c87b03e5Sespie /* This is ok. This means that the end of our buffer
424*c87b03e5Sespie is in the middle of a character sequence. We just
425*c87b03e5Sespie move the valid part of the buffer to the beginning
426*c87b03e5Sespie to force a read. */
427*c87b03e5Sespie memmove (&lex->buffer[0], &lex->buffer[lex->first],
428*c87b03e5Sespie lex->last - lex->first);
429*c87b03e5Sespie lex->last -= lex->first;
430*c87b03e5Sespie lex->first = 0;
431*c87b03e5Sespie }
432*c87b03e5Sespie else
433*c87b03e5Sespie {
434*c87b03e5Sespie /* A more serious error. */
435*c87b03e5Sespie java_lex_error ("unrecognized character in input stream",
436*c87b03e5Sespie 0);
437*c87b03e5Sespie return UEOF;
438*c87b03e5Sespie }
439*c87b03e5Sespie }
440*c87b03e5Sespie }
441*c87b03e5Sespie }
442*c87b03e5Sespie
443*c87b03e5Sespie if (lex->out_first == -1 || lex->out_first >= lex->out_last)
444*c87b03e5Sespie {
445*c87b03e5Sespie /* Don't have any data. */
446*c87b03e5Sespie return UEOF;
447*c87b03e5Sespie }
448*c87b03e5Sespie
449*c87b03e5Sespie /* Success. */
450*c87b03e5Sespie result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
451*c87b03e5Sespie lex->out_first += 2;
452*c87b03e5Sespie return result;
453*c87b03e5Sespie }
454*c87b03e5Sespie else
455*c87b03e5Sespie #endif /* HAVE_ICONV */
456*c87b03e5Sespie {
457*c87b03e5Sespie int c, c1, c2;
458*c87b03e5Sespie c = getc (lex->finput);
459*c87b03e5Sespie
460*c87b03e5Sespie if (c == EOF)
461*c87b03e5Sespie return UEOF;
462*c87b03e5Sespie if (c < 128)
463*c87b03e5Sespie return (unicode_t) c;
464*c87b03e5Sespie else
465*c87b03e5Sespie {
466*c87b03e5Sespie if ((c & 0xe0) == 0xc0)
467*c87b03e5Sespie {
468*c87b03e5Sespie c1 = getc (lex->finput);
469*c87b03e5Sespie if ((c1 & 0xc0) == 0x80)
470*c87b03e5Sespie {
471*c87b03e5Sespie unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
472*c87b03e5Sespie /* Check for valid 2-byte characters. We explicitly
473*c87b03e5Sespie allow \0 because this encoding is common in the
474*c87b03e5Sespie Java world. */
475*c87b03e5Sespie if (r == 0 || (r >= 0x80 && r <= 0x7ff))
476*c87b03e5Sespie return r;
477*c87b03e5Sespie }
478*c87b03e5Sespie }
479*c87b03e5Sespie else if ((c & 0xf0) == 0xe0)
480*c87b03e5Sespie {
481*c87b03e5Sespie c1 = getc (lex->finput);
482*c87b03e5Sespie if ((c1 & 0xc0) == 0x80)
483*c87b03e5Sespie {
484*c87b03e5Sespie c2 = getc (lex->finput);
485*c87b03e5Sespie if ((c2 & 0xc0) == 0x80)
486*c87b03e5Sespie {
487*c87b03e5Sespie unicode_t r = (unicode_t)(((c & 0xf) << 12) +
488*c87b03e5Sespie (( c1 & 0x3f) << 6)
489*c87b03e5Sespie + (c2 & 0x3f));
490*c87b03e5Sespie /* Check for valid 3-byte characters.
491*c87b03e5Sespie Don't allow surrogate, \ufffe or \uffff. */
492*c87b03e5Sespie if (IN_RANGE (r, 0x800, 0xffff)
493*c87b03e5Sespie && ! IN_RANGE (r, 0xd800, 0xdfff)
494*c87b03e5Sespie && r != 0xfffe && r != 0xffff)
495*c87b03e5Sespie return r;
496*c87b03e5Sespie }
497*c87b03e5Sespie }
498*c87b03e5Sespie }
499*c87b03e5Sespie
500*c87b03e5Sespie /* We simply don't support invalid characters. We also
501*c87b03e5Sespie don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
502*c87b03e5Sespie cannot be valid Java characters. */
503*c87b03e5Sespie java_lex_error ("malformed UTF-8 character", 0);
504*c87b03e5Sespie }
505*c87b03e5Sespie }
506*c87b03e5Sespie
507*c87b03e5Sespie /* We only get here on error. */
508*c87b03e5Sespie return UEOF;
509*c87b03e5Sespie }
510*c87b03e5Sespie
511*c87b03e5Sespie static void
java_store_unicode(l,c,unicode_escape_p)512*c87b03e5Sespie java_store_unicode (l, c, unicode_escape_p)
513*c87b03e5Sespie struct java_line *l;
514*c87b03e5Sespie unicode_t c;
515*c87b03e5Sespie int unicode_escape_p;
516*c87b03e5Sespie {
517*c87b03e5Sespie if (l->size == l->max)
518*c87b03e5Sespie {
519*c87b03e5Sespie l->max += JAVA_LINE_MAX;
520*c87b03e5Sespie l->line = xrealloc (l->line, sizeof (unicode_t)*l->max);
521*c87b03e5Sespie l->unicode_escape_p = xrealloc (l->unicode_escape_p,
522*c87b03e5Sespie sizeof (char)*l->max);
523*c87b03e5Sespie }
524*c87b03e5Sespie l->line [l->size] = c;
525*c87b03e5Sespie l->unicode_escape_p [l->size++] = unicode_escape_p;
526*c87b03e5Sespie }
527*c87b03e5Sespie
528*c87b03e5Sespie static int
java_read_unicode(lex,unicode_escape_p)529*c87b03e5Sespie java_read_unicode (lex, unicode_escape_p)
530*c87b03e5Sespie java_lexer *lex;
531*c87b03e5Sespie int *unicode_escape_p;
532*c87b03e5Sespie {
533*c87b03e5Sespie int c;
534*c87b03e5Sespie
535*c87b03e5Sespie c = java_read_char (lex);
536*c87b03e5Sespie *unicode_escape_p = 0;
537*c87b03e5Sespie
538*c87b03e5Sespie if (c != '\\')
539*c87b03e5Sespie {
540*c87b03e5Sespie lex->bs_count = 0;
541*c87b03e5Sespie return c;
542*c87b03e5Sespie }
543*c87b03e5Sespie
544*c87b03e5Sespie ++lex->bs_count;
545*c87b03e5Sespie if ((lex->bs_count) % 2 == 1)
546*c87b03e5Sespie {
547*c87b03e5Sespie /* Odd number of \ seen. */
548*c87b03e5Sespie c = java_read_char (lex);
549*c87b03e5Sespie if (c == 'u')
550*c87b03e5Sespie {
551*c87b03e5Sespie unicode_t unicode = 0;
552*c87b03e5Sespie int shift = 12;
553*c87b03e5Sespie
554*c87b03e5Sespie /* Recognize any number of `u's in \u. */
555*c87b03e5Sespie while ((c = java_read_char (lex)) == 'u')
556*c87b03e5Sespie ;
557*c87b03e5Sespie
558*c87b03e5Sespie shift = 12;
559*c87b03e5Sespie do
560*c87b03e5Sespie {
561*c87b03e5Sespie if (c == UEOF)
562*c87b03e5Sespie {
563*c87b03e5Sespie java_lex_error ("prematurely terminated \\u sequence", 0);
564*c87b03e5Sespie return UEOF;
565*c87b03e5Sespie }
566*c87b03e5Sespie
567*c87b03e5Sespie if (hex_p (c))
568*c87b03e5Sespie unicode |= (unicode_t)(hex_value (c) << shift);
569*c87b03e5Sespie else
570*c87b03e5Sespie {
571*c87b03e5Sespie java_lex_error ("non-hex digit in \\u sequence", 0);
572*c87b03e5Sespie break;
573*c87b03e5Sespie }
574*c87b03e5Sespie
575*c87b03e5Sespie c = java_read_char (lex);
576*c87b03e5Sespie shift -= 4;
577*c87b03e5Sespie }
578*c87b03e5Sespie while (shift >= 0);
579*c87b03e5Sespie
580*c87b03e5Sespie if (c != UEOF)
581*c87b03e5Sespie lex->unget_value = c;
582*c87b03e5Sespie
583*c87b03e5Sespie lex->bs_count = 0;
584*c87b03e5Sespie *unicode_escape_p = 1;
585*c87b03e5Sespie return unicode;
586*c87b03e5Sespie }
587*c87b03e5Sespie lex->unget_value = c;
588*c87b03e5Sespie }
589*c87b03e5Sespie return (unicode_t) '\\';
590*c87b03e5Sespie }
591*c87b03e5Sespie
592*c87b03e5Sespie static int
java_read_unicode_collapsing_terminators(lex,unicode_escape_p)593*c87b03e5Sespie java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
594*c87b03e5Sespie java_lexer *lex;
595*c87b03e5Sespie int *unicode_escape_p;
596*c87b03e5Sespie {
597*c87b03e5Sespie int c = java_read_unicode (lex, unicode_escape_p);
598*c87b03e5Sespie
599*c87b03e5Sespie if (c == '\r')
600*c87b03e5Sespie {
601*c87b03e5Sespie /* We have to read ahead to see if we got \r\n. In that case we
602*c87b03e5Sespie return a single line terminator. */
603*c87b03e5Sespie int dummy;
604*c87b03e5Sespie c = java_read_unicode (lex, &dummy);
605*c87b03e5Sespie if (c != '\n' && c != UEOF)
606*c87b03e5Sespie lex->unget_value = c;
607*c87b03e5Sespie /* In either case we must return a newline. */
608*c87b03e5Sespie c = '\n';
609*c87b03e5Sespie }
610*c87b03e5Sespie
611*c87b03e5Sespie return c;
612*c87b03e5Sespie }
613*c87b03e5Sespie
614*c87b03e5Sespie static int
java_get_unicode()615*c87b03e5Sespie java_get_unicode ()
616*c87b03e5Sespie {
617*c87b03e5Sespie /* It's time to read a line when... */
618*c87b03e5Sespie if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
619*c87b03e5Sespie {
620*c87b03e5Sespie int c;
621*c87b03e5Sespie int found_chars = 0;
622*c87b03e5Sespie
623*c87b03e5Sespie if (ctxp->lexer->hit_eof)
624*c87b03e5Sespie return UEOF;
625*c87b03e5Sespie
626*c87b03e5Sespie java_allocate_new_line ();
627*c87b03e5Sespie if (ctxp->c_line->line[0] != '\n')
628*c87b03e5Sespie {
629*c87b03e5Sespie for (;;)
630*c87b03e5Sespie {
631*c87b03e5Sespie int unicode_escape_p;
632*c87b03e5Sespie c = java_read_unicode_collapsing_terminators (ctxp->lexer,
633*c87b03e5Sespie &unicode_escape_p);
634*c87b03e5Sespie if (c != UEOF)
635*c87b03e5Sespie {
636*c87b03e5Sespie found_chars = 1;
637*c87b03e5Sespie java_store_unicode (ctxp->c_line, c, unicode_escape_p);
638*c87b03e5Sespie if (ctxp->c_line->white_space_only
639*c87b03e5Sespie && !JAVA_WHITE_SPACE_P (c)
640*c87b03e5Sespie && c != '\n')
641*c87b03e5Sespie ctxp->c_line->white_space_only = 0;
642*c87b03e5Sespie }
643*c87b03e5Sespie if ((c == '\n') || (c == UEOF))
644*c87b03e5Sespie break;
645*c87b03e5Sespie }
646*c87b03e5Sespie
647*c87b03e5Sespie if (c == UEOF && ! found_chars)
648*c87b03e5Sespie {
649*c87b03e5Sespie ctxp->lexer->hit_eof = 1;
650*c87b03e5Sespie return UEOF;
651*c87b03e5Sespie }
652*c87b03e5Sespie }
653*c87b03e5Sespie }
654*c87b03e5Sespie ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
655*c87b03e5Sespie JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
656*c87b03e5Sespie return ctxp->c_line->line [ctxp->c_line->current++];
657*c87b03e5Sespie }
658*c87b03e5Sespie
659*c87b03e5Sespie /* Parse the end of a C style comment.
660*c87b03e5Sespie * C is the first character following the '/' and '*'. */
661*c87b03e5Sespie static void
java_parse_end_comment(c)662*c87b03e5Sespie java_parse_end_comment (c)
663*c87b03e5Sespie int c;
664*c87b03e5Sespie {
665*c87b03e5Sespie for ( ;; c = java_get_unicode ())
666*c87b03e5Sespie {
667*c87b03e5Sespie switch (c)
668*c87b03e5Sespie {
669*c87b03e5Sespie case UEOF:
670*c87b03e5Sespie java_lex_error ("Comment not terminated at end of input", 0);
671*c87b03e5Sespie return;
672*c87b03e5Sespie case '*':
673*c87b03e5Sespie switch (c = java_get_unicode ())
674*c87b03e5Sespie {
675*c87b03e5Sespie case UEOF:
676*c87b03e5Sespie java_lex_error ("Comment not terminated at end of input", 0);
677*c87b03e5Sespie return;
678*c87b03e5Sespie case '/':
679*c87b03e5Sespie return;
680*c87b03e5Sespie case '*': /* Reparse only '*'. */
681*c87b03e5Sespie java_unget_unicode ();
682*c87b03e5Sespie }
683*c87b03e5Sespie }
684*c87b03e5Sespie }
685*c87b03e5Sespie }
686*c87b03e5Sespie
687*c87b03e5Sespie /* Parse the documentation section. Keywords must be at the beginning
688*c87b03e5Sespie of a documentation comment line (ignoring white space and any `*'
689*c87b03e5Sespie character). Parsed keyword(s): @DEPRECATED. */
690*c87b03e5Sespie
691*c87b03e5Sespie static int
java_parse_doc_section(c)692*c87b03e5Sespie java_parse_doc_section (c)
693*c87b03e5Sespie int c;
694*c87b03e5Sespie {
695*c87b03e5Sespie int valid_tag = 0, seen_star = 0;
696*c87b03e5Sespie
697*c87b03e5Sespie while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
698*c87b03e5Sespie {
699*c87b03e5Sespie switch (c)
700*c87b03e5Sespie {
701*c87b03e5Sespie case '*':
702*c87b03e5Sespie seen_star = 1;
703*c87b03e5Sespie break;
704*c87b03e5Sespie case '\n': /* ULT */
705*c87b03e5Sespie valid_tag = 1;
706*c87b03e5Sespie default:
707*c87b03e5Sespie seen_star = 0;
708*c87b03e5Sespie }
709*c87b03e5Sespie c = java_get_unicode();
710*c87b03e5Sespie }
711*c87b03e5Sespie
712*c87b03e5Sespie if (c == UEOF)
713*c87b03e5Sespie java_lex_error ("Comment not terminated at end of input", 0);
714*c87b03e5Sespie
715*c87b03e5Sespie if (seen_star && (c == '/'))
716*c87b03e5Sespie return 1; /* Goto step1 in caller. */
717*c87b03e5Sespie
718*c87b03e5Sespie /* We're parsing `@deprecated'. */
719*c87b03e5Sespie if (valid_tag && (c == '@'))
720*c87b03e5Sespie {
721*c87b03e5Sespie char tag [11];
722*c87b03e5Sespie int tag_index = 0;
723*c87b03e5Sespie
724*c87b03e5Sespie while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
725*c87b03e5Sespie {
726*c87b03e5Sespie c = java_get_unicode ();
727*c87b03e5Sespie tag [tag_index++] = c;
728*c87b03e5Sespie }
729*c87b03e5Sespie
730*c87b03e5Sespie if (c == UEOF)
731*c87b03e5Sespie java_lex_error ("Comment not terminated at end of input", 0);
732*c87b03e5Sespie tag [tag_index] = '\0';
733*c87b03e5Sespie
734*c87b03e5Sespie if (!strcmp (tag, "deprecated"))
735*c87b03e5Sespie ctxp->deprecated = 1;
736*c87b03e5Sespie }
737*c87b03e5Sespie java_unget_unicode ();
738*c87b03e5Sespie return 0;
739*c87b03e5Sespie }
740*c87b03e5Sespie
741*c87b03e5Sespie /* Return true if C is a valid start character for a Java identifier.
742*c87b03e5Sespie This is only called if C >= 128 -- smaller values are handled
743*c87b03e5Sespie inline. However, this function handles all values anyway. */
744*c87b03e5Sespie static int
java_start_char_p(c)745*c87b03e5Sespie java_start_char_p (c)
746*c87b03e5Sespie unicode_t c;
747*c87b03e5Sespie {
748*c87b03e5Sespie unsigned int hi = c / 256;
749*c87b03e5Sespie const char *const page = type_table[hi];
750*c87b03e5Sespie unsigned long val = (unsigned long) page;
751*c87b03e5Sespie int flags;
752*c87b03e5Sespie
753*c87b03e5Sespie if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
754*c87b03e5Sespie flags = page[c & 255];
755*c87b03e5Sespie else
756*c87b03e5Sespie flags = val;
757*c87b03e5Sespie
758*c87b03e5Sespie return flags & LETTER_START;
759*c87b03e5Sespie }
760*c87b03e5Sespie
761*c87b03e5Sespie /* Return true if C is a valid part character for a Java identifier.
762*c87b03e5Sespie This is only called if C >= 128 -- smaller values are handled
763*c87b03e5Sespie inline. However, this function handles all values anyway. */
764*c87b03e5Sespie static int
java_part_char_p(c)765*c87b03e5Sespie java_part_char_p (c)
766*c87b03e5Sespie unicode_t c;
767*c87b03e5Sespie {
768*c87b03e5Sespie unsigned int hi = c / 256;
769*c87b03e5Sespie const char *const page = type_table[hi];
770*c87b03e5Sespie unsigned long val = (unsigned long) page;
771*c87b03e5Sespie int flags;
772*c87b03e5Sespie
773*c87b03e5Sespie if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
774*c87b03e5Sespie flags = page[c & 255];
775*c87b03e5Sespie else
776*c87b03e5Sespie flags = val;
777*c87b03e5Sespie
778*c87b03e5Sespie return flags & LETTER_PART;
779*c87b03e5Sespie }
780*c87b03e5Sespie
781*c87b03e5Sespie static int
java_parse_escape_sequence()782*c87b03e5Sespie java_parse_escape_sequence ()
783*c87b03e5Sespie {
784*c87b03e5Sespie unicode_t char_lit;
785*c87b03e5Sespie int c;
786*c87b03e5Sespie
787*c87b03e5Sespie switch (c = java_get_unicode ())
788*c87b03e5Sespie {
789*c87b03e5Sespie case 'b':
790*c87b03e5Sespie return (unicode_t)0x8;
791*c87b03e5Sespie case 't':
792*c87b03e5Sespie return (unicode_t)0x9;
793*c87b03e5Sespie case 'n':
794*c87b03e5Sespie return (unicode_t)0xa;
795*c87b03e5Sespie case 'f':
796*c87b03e5Sespie return (unicode_t)0xc;
797*c87b03e5Sespie case 'r':
798*c87b03e5Sespie return (unicode_t)0xd;
799*c87b03e5Sespie case '"':
800*c87b03e5Sespie return (unicode_t)0x22;
801*c87b03e5Sespie case '\'':
802*c87b03e5Sespie return (unicode_t)0x27;
803*c87b03e5Sespie case '\\':
804*c87b03e5Sespie return (unicode_t)0x5c;
805*c87b03e5Sespie case '0': case '1': case '2': case '3': case '4':
806*c87b03e5Sespie case '5': case '6': case '7':
807*c87b03e5Sespie {
808*c87b03e5Sespie int octal_escape[3];
809*c87b03e5Sespie int octal_escape_index = 0;
810*c87b03e5Sespie int max = 3;
811*c87b03e5Sespie int i, shift;
812*c87b03e5Sespie
813*c87b03e5Sespie for (; octal_escape_index < max && RANGE (c, '0', '7');
814*c87b03e5Sespie c = java_get_unicode ())
815*c87b03e5Sespie {
816*c87b03e5Sespie if (octal_escape_index == 0 && c > '3')
817*c87b03e5Sespie {
818*c87b03e5Sespie /* According to the grammar, `\477' has a well-defined
819*c87b03e5Sespie meaning -- it is `\47' followed by `7'. */
820*c87b03e5Sespie --max;
821*c87b03e5Sespie }
822*c87b03e5Sespie octal_escape [octal_escape_index++] = c;
823*c87b03e5Sespie }
824*c87b03e5Sespie
825*c87b03e5Sespie java_unget_unicode ();
826*c87b03e5Sespie
827*c87b03e5Sespie for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
828*c87b03e5Sespie i < octal_escape_index; i++, shift -= 3)
829*c87b03e5Sespie char_lit |= (octal_escape [i] - '0') << shift;
830*c87b03e5Sespie
831*c87b03e5Sespie return char_lit;
832*c87b03e5Sespie }
833*c87b03e5Sespie default:
834*c87b03e5Sespie java_lex_error ("Invalid character in escape sequence", 0);
835*c87b03e5Sespie return JAVA_CHAR_ERROR;
836*c87b03e5Sespie }
837*c87b03e5Sespie }
838*c87b03e5Sespie
839*c87b03e5Sespie #ifndef JC1_LITE
840*c87b03e5Sespie #define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
841*c87b03e5Sespie
842*c87b03e5Sespie /* Subroutine of java_lex: converts floating-point literals to tree
843*c87b03e5Sespie nodes. LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
844*c87b03e5Sespie store the result. FFLAG indicates whether the literal was tagged
845*c87b03e5Sespie with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
846*c87b03e5Sespie is the line number on which to report any error. */
847*c87b03e5Sespie
848*c87b03e5Sespie static void java_perform_atof PARAMS ((YYSTYPE *, char *, int, int));
849*c87b03e5Sespie
850*c87b03e5Sespie static void
java_perform_atof(java_lval,literal_token,fflag,number_beginning)851*c87b03e5Sespie java_perform_atof (java_lval, literal_token, fflag, number_beginning)
852*c87b03e5Sespie YYSTYPE *java_lval;
853*c87b03e5Sespie char *literal_token;
854*c87b03e5Sespie int fflag;
855*c87b03e5Sespie int number_beginning;
856*c87b03e5Sespie {
857*c87b03e5Sespie REAL_VALUE_TYPE value;
858*c87b03e5Sespie tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
859*c87b03e5Sespie
860*c87b03e5Sespie SET_REAL_VALUE_ATOF (value,
861*c87b03e5Sespie REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
862*c87b03e5Sespie
863*c87b03e5Sespie if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
864*c87b03e5Sespie {
865*c87b03e5Sespie JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
866*c87b03e5Sespie value = DCONST0;
867*c87b03e5Sespie }
868*c87b03e5Sespie else if (IS_ZERO (value))
869*c87b03e5Sespie {
870*c87b03e5Sespie /* We check to see if the value is really 0 or if we've found an
871*c87b03e5Sespie underflow. We do this in the most primitive imaginable way. */
872*c87b03e5Sespie int really_zero = 1;
873*c87b03e5Sespie char *p = literal_token;
874*c87b03e5Sespie if (*p == '-')
875*c87b03e5Sespie ++p;
876*c87b03e5Sespie while (*p && *p != 'e' && *p != 'E')
877*c87b03e5Sespie {
878*c87b03e5Sespie if (*p != '0' && *p != '.')
879*c87b03e5Sespie {
880*c87b03e5Sespie really_zero = 0;
881*c87b03e5Sespie break;
882*c87b03e5Sespie }
883*c87b03e5Sespie ++p;
884*c87b03e5Sespie }
885*c87b03e5Sespie if (! really_zero)
886*c87b03e5Sespie {
887*c87b03e5Sespie int i = ctxp->c_line->current;
888*c87b03e5Sespie ctxp->c_line->current = number_beginning;
889*c87b03e5Sespie java_lex_error ("Floating point literal underflow", 0);
890*c87b03e5Sespie ctxp->c_line->current = i;
891*c87b03e5Sespie }
892*c87b03e5Sespie }
893*c87b03e5Sespie
894*c87b03e5Sespie SET_LVAL_NODE_TYPE (build_real (type, value), type);
895*c87b03e5Sespie }
896*c87b03e5Sespie #endif
897*c87b03e5Sespie
898*c87b03e5Sespie static int yylex PARAMS ((YYSTYPE *));
899*c87b03e5Sespie
900*c87b03e5Sespie static int
901*c87b03e5Sespie #ifdef JC1_LITE
yylex(java_lval)902*c87b03e5Sespie yylex (java_lval)
903*c87b03e5Sespie #else
904*c87b03e5Sespie java_lex (java_lval)
905*c87b03e5Sespie #endif
906*c87b03e5Sespie YYSTYPE *java_lval;
907*c87b03e5Sespie {
908*c87b03e5Sespie int c;
909*c87b03e5Sespie unicode_t first_unicode;
910*c87b03e5Sespie int ascii_index, all_ascii;
911*c87b03e5Sespie char *string;
912*c87b03e5Sespie
913*c87b03e5Sespie /* Translation of the Unicode escape in the raw stream of Unicode
914*c87b03e5Sespie characters. Takes care of line terminator. */
915*c87b03e5Sespie step1:
916*c87b03e5Sespie /* Skip white spaces: SP, TAB and FF or ULT. */
917*c87b03e5Sespie for (c = java_get_unicode ();
918*c87b03e5Sespie c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
919*c87b03e5Sespie if (c == '\n')
920*c87b03e5Sespie {
921*c87b03e5Sespie ctxp->elc.line = ctxp->c_line->lineno;
922*c87b03e5Sespie ctxp->elc.col = ctxp->c_line->char_col-2;
923*c87b03e5Sespie }
924*c87b03e5Sespie
925*c87b03e5Sespie ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
926*c87b03e5Sespie
927*c87b03e5Sespie if (c == 0x1a) /* CTRL-Z. */
928*c87b03e5Sespie {
929*c87b03e5Sespie if ((c = java_get_unicode ()) == UEOF)
930*c87b03e5Sespie return 0; /* Ok here. */
931*c87b03e5Sespie else
932*c87b03e5Sespie java_unget_unicode (); /* Caught later, at the end of the
933*c87b03e5Sespie function. */
934*c87b03e5Sespie }
935*c87b03e5Sespie /* Handle EOF here. */
936*c87b03e5Sespie if (c == UEOF) /* Should probably do something here... */
937*c87b03e5Sespie return 0;
938*c87b03e5Sespie
939*c87b03e5Sespie /* Take care of eventual comments. */
940*c87b03e5Sespie if (c == '/')
941*c87b03e5Sespie {
942*c87b03e5Sespie switch (c = java_get_unicode ())
943*c87b03e5Sespie {
944*c87b03e5Sespie case '/':
945*c87b03e5Sespie for (;;)
946*c87b03e5Sespie {
947*c87b03e5Sespie c = java_get_unicode ();
948*c87b03e5Sespie if (c == UEOF)
949*c87b03e5Sespie {
950*c87b03e5Sespie /* It is ok to end a `//' comment with EOF, unless
951*c87b03e5Sespie we're being pedantic. */
952*c87b03e5Sespie if (pedantic)
953*c87b03e5Sespie java_lex_error ("Comment not terminated at end of input",
954*c87b03e5Sespie 0);
955*c87b03e5Sespie return 0;
956*c87b03e5Sespie }
957*c87b03e5Sespie if (c == '\n') /* ULT */
958*c87b03e5Sespie goto step1;
959*c87b03e5Sespie }
960*c87b03e5Sespie break;
961*c87b03e5Sespie
962*c87b03e5Sespie case '*':
963*c87b03e5Sespie if ((c = java_get_unicode ()) == '*')
964*c87b03e5Sespie {
965*c87b03e5Sespie if ((c = java_get_unicode ()) == '/')
966*c87b03e5Sespie goto step1; /* Empty documentation comment. */
967*c87b03e5Sespie else if (java_parse_doc_section (c))
968*c87b03e5Sespie goto step1;
969*c87b03e5Sespie }
970*c87b03e5Sespie
971*c87b03e5Sespie java_parse_end_comment ((c = java_get_unicode ()));
972*c87b03e5Sespie goto step1;
973*c87b03e5Sespie break;
974*c87b03e5Sespie default:
975*c87b03e5Sespie java_unget_unicode ();
976*c87b03e5Sespie c = '/';
977*c87b03e5Sespie break;
978*c87b03e5Sespie }
979*c87b03e5Sespie }
980*c87b03e5Sespie
981*c87b03e5Sespie ctxp->elc.line = ctxp->c_line->lineno;
982*c87b03e5Sespie ctxp->elc.prev_col = ctxp->elc.col;
983*c87b03e5Sespie ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
984*c87b03e5Sespie if (ctxp->elc.col < 0)
985*c87b03e5Sespie abort ();
986*c87b03e5Sespie
987*c87b03e5Sespie /* Numeric literals. */
988*c87b03e5Sespie if (JAVA_ASCII_DIGIT (c) || (c == '.'))
989*c87b03e5Sespie {
990*c87b03e5Sespie /* This section of code is borrowed from gcc/c-lex.c. */
991*c87b03e5Sespie #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
992*c87b03e5Sespie int parts[TOTAL_PARTS];
993*c87b03e5Sespie HOST_WIDE_INT high, low;
994*c87b03e5Sespie /* End borrowed section. */
995*c87b03e5Sespie char literal_token [256];
996*c87b03e5Sespie int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
997*c87b03e5Sespie int found_hex_digits = 0, found_non_octal_digits = 0;
998*c87b03e5Sespie int i;
999*c87b03e5Sespie #ifndef JC1_LITE
1000*c87b03e5Sespie int number_beginning = ctxp->c_line->current;
1001*c87b03e5Sespie tree value;
1002*c87b03e5Sespie #endif
1003*c87b03e5Sespie
1004*c87b03e5Sespie /* We might have a . separator instead of a FP like .[0-9]*. */
1005*c87b03e5Sespie if (c == '.')
1006*c87b03e5Sespie {
1007*c87b03e5Sespie unicode_t peep = java_sneak_unicode ();
1008*c87b03e5Sespie
1009*c87b03e5Sespie if (!JAVA_ASCII_DIGIT (peep))
1010*c87b03e5Sespie {
1011*c87b03e5Sespie JAVA_LEX_SEP('.');
1012*c87b03e5Sespie BUILD_OPERATOR (DOT_TK);
1013*c87b03e5Sespie }
1014*c87b03e5Sespie }
1015*c87b03e5Sespie
1016*c87b03e5Sespie for (i = 0; i < TOTAL_PARTS; i++)
1017*c87b03e5Sespie parts [i] = 0;
1018*c87b03e5Sespie
1019*c87b03e5Sespie if (c == '0')
1020*c87b03e5Sespie {
1021*c87b03e5Sespie c = java_get_unicode ();
1022*c87b03e5Sespie if (c == 'x' || c == 'X')
1023*c87b03e5Sespie {
1024*c87b03e5Sespie radix = 16;
1025*c87b03e5Sespie c = java_get_unicode ();
1026*c87b03e5Sespie }
1027*c87b03e5Sespie else if (JAVA_ASCII_DIGIT (c))
1028*c87b03e5Sespie radix = 8;
1029*c87b03e5Sespie else if (c == '.' || c == 'e' || c =='E')
1030*c87b03e5Sespie {
1031*c87b03e5Sespie /* Push the '.', 'e', or 'E' back and prepare for a FP
1032*c87b03e5Sespie parsing... */
1033*c87b03e5Sespie java_unget_unicode ();
1034*c87b03e5Sespie c = '0';
1035*c87b03e5Sespie }
1036*c87b03e5Sespie else
1037*c87b03e5Sespie {
1038*c87b03e5Sespie /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */
1039*c87b03e5Sespie JAVA_LEX_LIT ("0", 10);
1040*c87b03e5Sespie switch (c)
1041*c87b03e5Sespie {
1042*c87b03e5Sespie case 'L': case 'l':
1043*c87b03e5Sespie SET_LVAL_NODE (long_zero_node);
1044*c87b03e5Sespie return (INT_LIT_TK);
1045*c87b03e5Sespie case 'f': case 'F':
1046*c87b03e5Sespie SET_LVAL_NODE (float_zero_node);
1047*c87b03e5Sespie return (FP_LIT_TK);
1048*c87b03e5Sespie case 'd': case 'D':
1049*c87b03e5Sespie SET_LVAL_NODE (double_zero_node);
1050*c87b03e5Sespie return (FP_LIT_TK);
1051*c87b03e5Sespie default:
1052*c87b03e5Sespie java_unget_unicode ();
1053*c87b03e5Sespie SET_LVAL_NODE (integer_zero_node);
1054*c87b03e5Sespie return (INT_LIT_TK);
1055*c87b03e5Sespie }
1056*c87b03e5Sespie }
1057*c87b03e5Sespie }
1058*c87b03e5Sespie /* Parse the first part of the literal, until we find something
1059*c87b03e5Sespie which is not a number. */
1060*c87b03e5Sespie while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1061*c87b03e5Sespie JAVA_ASCII_DIGIT (c))
1062*c87b03e5Sespie {
1063*c87b03e5Sespie /* We store in a string (in case it turns out to be a FP) and in
1064*c87b03e5Sespie PARTS if we have to process a integer literal. */
1065*c87b03e5Sespie int numeric = hex_value (c);
1066*c87b03e5Sespie int count;
1067*c87b03e5Sespie
1068*c87b03e5Sespie /* Remember when we find a valid hexadecimal digit. */
1069*c87b03e5Sespie if (radix == 16)
1070*c87b03e5Sespie found_hex_digits = 1;
1071*c87b03e5Sespie /* Remember when we find an invalid octal digit. */
1072*c87b03e5Sespie else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
1073*c87b03e5Sespie found_non_octal_digits = 1;
1074*c87b03e5Sespie
1075*c87b03e5Sespie literal_token [literal_index++] = c;
1076*c87b03e5Sespie /* This section of code if borrowed from gcc/c-lex.c. */
1077*c87b03e5Sespie for (count = 0; count < TOTAL_PARTS; count++)
1078*c87b03e5Sespie {
1079*c87b03e5Sespie parts[count] *= radix;
1080*c87b03e5Sespie if (count)
1081*c87b03e5Sespie {
1082*c87b03e5Sespie parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1083*c87b03e5Sespie parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1084*c87b03e5Sespie }
1085*c87b03e5Sespie else
1086*c87b03e5Sespie parts[0] += numeric;
1087*c87b03e5Sespie }
1088*c87b03e5Sespie if (parts [TOTAL_PARTS-1] != 0)
1089*c87b03e5Sespie overflow = 1;
1090*c87b03e5Sespie /* End borrowed section. */
1091*c87b03e5Sespie c = java_get_unicode ();
1092*c87b03e5Sespie }
1093*c87b03e5Sespie
1094*c87b03e5Sespie /* If we have something from the FP char set but not a digit, parse
1095*c87b03e5Sespie a FP literal. */
1096*c87b03e5Sespie if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1097*c87b03e5Sespie {
1098*c87b03e5Sespie int stage = 0;
1099*c87b03e5Sespie int seen_digit = (literal_index ? 1 : 0);
1100*c87b03e5Sespie int seen_exponent = 0;
1101*c87b03e5Sespie int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1102*c87b03e5Sespie double unless specified. */
1103*c87b03e5Sespie
1104*c87b03e5Sespie /* It is ok if the radix is 8 because this just means we've
1105*c87b03e5Sespie seen a leading `0'. However, radix==16 is invalid. */
1106*c87b03e5Sespie if (radix == 16)
1107*c87b03e5Sespie java_lex_error ("Can't express non-decimal FP literal", 0);
1108*c87b03e5Sespie radix = 10;
1109*c87b03e5Sespie
1110*c87b03e5Sespie for (;;)
1111*c87b03e5Sespie {
1112*c87b03e5Sespie if (c == '.')
1113*c87b03e5Sespie {
1114*c87b03e5Sespie if (stage < 1)
1115*c87b03e5Sespie {
1116*c87b03e5Sespie stage = 1;
1117*c87b03e5Sespie literal_token [literal_index++ ] = c;
1118*c87b03e5Sespie c = java_get_unicode ();
1119*c87b03e5Sespie }
1120*c87b03e5Sespie else
1121*c87b03e5Sespie java_lex_error ("Invalid character in FP literal", 0);
1122*c87b03e5Sespie }
1123*c87b03e5Sespie
1124*c87b03e5Sespie if (c == 'e' || c == 'E')
1125*c87b03e5Sespie {
1126*c87b03e5Sespie if (stage < 2)
1127*c87b03e5Sespie {
1128*c87b03e5Sespie /* {E,e} must have seen at least a digit. */
1129*c87b03e5Sespie if (!seen_digit)
1130*c87b03e5Sespie java_lex_error
1131*c87b03e5Sespie ("Invalid FP literal, mantissa must have digit", 0);
1132*c87b03e5Sespie seen_digit = 0;
1133*c87b03e5Sespie seen_exponent = 1;
1134*c87b03e5Sespie stage = 2;
1135*c87b03e5Sespie literal_token [literal_index++] = c;
1136*c87b03e5Sespie c = java_get_unicode ();
1137*c87b03e5Sespie }
1138*c87b03e5Sespie else
1139*c87b03e5Sespie java_lex_error ("Invalid character in FP literal", 0);
1140*c87b03e5Sespie }
1141*c87b03e5Sespie if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1142*c87b03e5Sespie {
1143*c87b03e5Sespie fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1144*c87b03e5Sespie stage = 4; /* So we fall through. */
1145*c87b03e5Sespie }
1146*c87b03e5Sespie
1147*c87b03e5Sespie if ((c=='-' || c =='+') && stage == 2)
1148*c87b03e5Sespie {
1149*c87b03e5Sespie stage = 3;
1150*c87b03e5Sespie literal_token [literal_index++] = c;
1151*c87b03e5Sespie c = java_get_unicode ();
1152*c87b03e5Sespie }
1153*c87b03e5Sespie
1154*c87b03e5Sespie if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1155*c87b03e5Sespie (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1156*c87b03e5Sespie (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1157*c87b03e5Sespie (stage == 3 && JAVA_ASCII_DIGIT (c)))
1158*c87b03e5Sespie {
1159*c87b03e5Sespie if (JAVA_ASCII_DIGIT (c))
1160*c87b03e5Sespie seen_digit = 1;
1161*c87b03e5Sespie if (stage == 2)
1162*c87b03e5Sespie stage = 3;
1163*c87b03e5Sespie literal_token [literal_index++ ] = c;
1164*c87b03e5Sespie c = java_get_unicode ();
1165*c87b03e5Sespie }
1166*c87b03e5Sespie else
1167*c87b03e5Sespie {
1168*c87b03e5Sespie if (stage != 4) /* Don't push back fF/dD. */
1169*c87b03e5Sespie java_unget_unicode ();
1170*c87b03e5Sespie
1171*c87b03e5Sespie /* An exponent (if any) must have seen a digit. */
1172*c87b03e5Sespie if (seen_exponent && !seen_digit)
1173*c87b03e5Sespie java_lex_error
1174*c87b03e5Sespie ("Invalid FP literal, exponent must have digit", 0);
1175*c87b03e5Sespie
1176*c87b03e5Sespie literal_token [literal_index] = '\0';
1177*c87b03e5Sespie JAVA_LEX_LIT (literal_token, radix);
1178*c87b03e5Sespie
1179*c87b03e5Sespie #ifndef JC1_LITE
1180*c87b03e5Sespie java_perform_atof (java_lval, literal_token,
1181*c87b03e5Sespie fflag, number_beginning);
1182*c87b03e5Sespie #endif
1183*c87b03e5Sespie return FP_LIT_TK;
1184*c87b03e5Sespie }
1185*c87b03e5Sespie }
1186*c87b03e5Sespie } /* JAVA_ASCII_FPCHAR (c) */
1187*c87b03e5Sespie
1188*c87b03e5Sespie /* Here we get back to converting the integral literal. */
1189*c87b03e5Sespie if (radix == 16 && ! found_hex_digits)
1190*c87b03e5Sespie java_lex_error
1191*c87b03e5Sespie ("0x must be followed by at least one hexadecimal digit", 0);
1192*c87b03e5Sespie else if (radix == 8 && found_non_octal_digits)
1193*c87b03e5Sespie java_lex_error ("Octal literal contains digit out of range", 0);
1194*c87b03e5Sespie else if (c == 'L' || c == 'l')
1195*c87b03e5Sespie long_suffix = 1;
1196*c87b03e5Sespie else
1197*c87b03e5Sespie java_unget_unicode ();
1198*c87b03e5Sespie
1199*c87b03e5Sespie #ifdef JAVA_LEX_DEBUG
1200*c87b03e5Sespie literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1201*c87b03e5Sespie JAVA_LEX_LIT (literal_token, radix);
1202*c87b03e5Sespie #endif
1203*c87b03e5Sespie /* This section of code is borrowed from gcc/c-lex.c. */
1204*c87b03e5Sespie if (!overflow)
1205*c87b03e5Sespie {
1206*c87b03e5Sespie bytes = GET_TYPE_PRECISION (long_type_node);
1207*c87b03e5Sespie for (i = bytes; i < TOTAL_PARTS; i++)
1208*c87b03e5Sespie if (parts [i])
1209*c87b03e5Sespie {
1210*c87b03e5Sespie overflow = 1;
1211*c87b03e5Sespie break;
1212*c87b03e5Sespie }
1213*c87b03e5Sespie }
1214*c87b03e5Sespie high = low = 0;
1215*c87b03e5Sespie for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1216*c87b03e5Sespie {
1217*c87b03e5Sespie high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1218*c87b03e5Sespie / HOST_BITS_PER_CHAR)]
1219*c87b03e5Sespie << (i * HOST_BITS_PER_CHAR));
1220*c87b03e5Sespie low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1221*c87b03e5Sespie }
1222*c87b03e5Sespie /* End borrowed section. */
1223*c87b03e5Sespie
1224*c87b03e5Sespie #ifndef JC1_LITE
1225*c87b03e5Sespie /* Range checking. */
1226*c87b03e5Sespie value = build_int_2 (low, high);
1227*c87b03e5Sespie /* Temporarily set type to unsigned. */
1228*c87b03e5Sespie SET_LVAL_NODE_TYPE (value, (long_suffix
1229*c87b03e5Sespie ? unsigned_long_type_node
1230*c87b03e5Sespie : unsigned_int_type_node));
1231*c87b03e5Sespie
1232*c87b03e5Sespie /* For base 10 numbers, only values up to the highest value
1233*c87b03e5Sespie (plus one) can be written. For instance, only ints up to
1234*c87b03e5Sespie 2147483648 can be written. The special case of the largest
1235*c87b03e5Sespie negative value is handled elsewhere. For other bases, any
1236*c87b03e5Sespie number can be represented. */
1237*c87b03e5Sespie if (overflow || (radix == 10
1238*c87b03e5Sespie && tree_int_cst_lt (long_suffix
1239*c87b03e5Sespie ? decimal_long_max
1240*c87b03e5Sespie : decimal_int_max,
1241*c87b03e5Sespie value)))
1242*c87b03e5Sespie {
1243*c87b03e5Sespie if (long_suffix)
1244*c87b03e5Sespie JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1245*c87b03e5Sespie else
1246*c87b03e5Sespie JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1247*c87b03e5Sespie }
1248*c87b03e5Sespie
1249*c87b03e5Sespie /* Sign extend the value. */
1250*c87b03e5Sespie SET_LVAL_NODE_TYPE (value, (long_suffix ? long_type_node : int_type_node));
1251*c87b03e5Sespie force_fit_type (value, 0);
1252*c87b03e5Sespie JAVA_RADIX10_FLAG (value) = radix == 10;
1253*c87b03e5Sespie #else
1254*c87b03e5Sespie SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1255*c87b03e5Sespie long_suffix ? long_type_node : int_type_node);
1256*c87b03e5Sespie #endif
1257*c87b03e5Sespie return INT_LIT_TK;
1258*c87b03e5Sespie }
1259*c87b03e5Sespie
1260*c87b03e5Sespie /* Character literals. */
1261*c87b03e5Sespie if (c == '\'')
1262*c87b03e5Sespie {
1263*c87b03e5Sespie int char_lit;
1264*c87b03e5Sespie if ((c = java_get_unicode ()) == '\\')
1265*c87b03e5Sespie char_lit = java_parse_escape_sequence ();
1266*c87b03e5Sespie else
1267*c87b03e5Sespie {
1268*c87b03e5Sespie if (c == '\n' || c == '\'')
1269*c87b03e5Sespie java_lex_error ("Invalid character literal", 0);
1270*c87b03e5Sespie char_lit = c;
1271*c87b03e5Sespie }
1272*c87b03e5Sespie
1273*c87b03e5Sespie c = java_get_unicode ();
1274*c87b03e5Sespie
1275*c87b03e5Sespie if ((c == '\n') || (c == UEOF))
1276*c87b03e5Sespie java_lex_error ("Character literal not terminated at end of line", 0);
1277*c87b03e5Sespie if (c != '\'')
1278*c87b03e5Sespie java_lex_error ("Syntax error in character literal", 0);
1279*c87b03e5Sespie
1280*c87b03e5Sespie if (char_lit == JAVA_CHAR_ERROR)
1281*c87b03e5Sespie char_lit = 0; /* We silently convert it to zero. */
1282*c87b03e5Sespie
1283*c87b03e5Sespie JAVA_LEX_CHAR_LIT (char_lit);
1284*c87b03e5Sespie SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1285*c87b03e5Sespie return CHAR_LIT_TK;
1286*c87b03e5Sespie }
1287*c87b03e5Sespie
1288*c87b03e5Sespie /* String literals. */
1289*c87b03e5Sespie if (c == '"')
1290*c87b03e5Sespie {
1291*c87b03e5Sespie int no_error;
1292*c87b03e5Sespie char *string;
1293*c87b03e5Sespie
1294*c87b03e5Sespie for (no_error = 1, c = java_get_unicode ();
1295*c87b03e5Sespie c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1296*c87b03e5Sespie {
1297*c87b03e5Sespie if (c == '\\')
1298*c87b03e5Sespie c = java_parse_escape_sequence ();
1299*c87b03e5Sespie if (c == JAVA_CHAR_ERROR)
1300*c87b03e5Sespie {
1301*c87b03e5Sespie no_error = 0;
1302*c87b03e5Sespie c = 0; /* We silently convert it to zero. */
1303*c87b03e5Sespie }
1304*c87b03e5Sespie java_unicode_2_utf8 (c);
1305*c87b03e5Sespie }
1306*c87b03e5Sespie if (c == '\n' || c == UEOF) /* ULT. */
1307*c87b03e5Sespie {
1308*c87b03e5Sespie lineno--; /* Refer to the line where the terminator was seen. */
1309*c87b03e5Sespie java_lex_error ("String not terminated at end of line", 0);
1310*c87b03e5Sespie lineno++;
1311*c87b03e5Sespie }
1312*c87b03e5Sespie
1313*c87b03e5Sespie obstack_1grow (&temporary_obstack, '\0');
1314*c87b03e5Sespie string = obstack_finish (&temporary_obstack);
1315*c87b03e5Sespie #ifndef JC1_LITE
1316*c87b03e5Sespie if (!no_error || (c != '"'))
1317*c87b03e5Sespie java_lval->node = error_mark_node; /* FIXME: Requires futher
1318*c87b03e5Sespie testing. */
1319*c87b03e5Sespie else
1320*c87b03e5Sespie java_lval->node = build_string (strlen (string), string);
1321*c87b03e5Sespie #endif
1322*c87b03e5Sespie obstack_free (&temporary_obstack, string);
1323*c87b03e5Sespie return STRING_LIT_TK;
1324*c87b03e5Sespie }
1325*c87b03e5Sespie
1326*c87b03e5Sespie /* Separator. */
1327*c87b03e5Sespie switch (c)
1328*c87b03e5Sespie {
1329*c87b03e5Sespie case '(':
1330*c87b03e5Sespie JAVA_LEX_SEP (c);
1331*c87b03e5Sespie BUILD_OPERATOR (OP_TK);
1332*c87b03e5Sespie case ')':
1333*c87b03e5Sespie JAVA_LEX_SEP (c);
1334*c87b03e5Sespie return CP_TK;
1335*c87b03e5Sespie case '{':
1336*c87b03e5Sespie JAVA_LEX_SEP (c);
1337*c87b03e5Sespie if (ctxp->ccb_indent == 1)
1338*c87b03e5Sespie ctxp->first_ccb_indent1 = lineno;
1339*c87b03e5Sespie ctxp->ccb_indent++;
1340*c87b03e5Sespie BUILD_OPERATOR (OCB_TK);
1341*c87b03e5Sespie case '}':
1342*c87b03e5Sespie JAVA_LEX_SEP (c);
1343*c87b03e5Sespie ctxp->ccb_indent--;
1344*c87b03e5Sespie if (ctxp->ccb_indent == 1)
1345*c87b03e5Sespie ctxp->last_ccb_indent1 = lineno;
1346*c87b03e5Sespie BUILD_OPERATOR (CCB_TK);
1347*c87b03e5Sespie case '[':
1348*c87b03e5Sespie JAVA_LEX_SEP (c);
1349*c87b03e5Sespie BUILD_OPERATOR (OSB_TK);
1350*c87b03e5Sespie case ']':
1351*c87b03e5Sespie JAVA_LEX_SEP (c);
1352*c87b03e5Sespie return CSB_TK;
1353*c87b03e5Sespie case ';':
1354*c87b03e5Sespie JAVA_LEX_SEP (c);
1355*c87b03e5Sespie return SC_TK;
1356*c87b03e5Sespie case ',':
1357*c87b03e5Sespie JAVA_LEX_SEP (c);
1358*c87b03e5Sespie return C_TK;
1359*c87b03e5Sespie case '.':
1360*c87b03e5Sespie JAVA_LEX_SEP (c);
1361*c87b03e5Sespie BUILD_OPERATOR (DOT_TK);
1362*c87b03e5Sespie /* return DOT_TK; */
1363*c87b03e5Sespie }
1364*c87b03e5Sespie
1365*c87b03e5Sespie /* Operators. */
1366*c87b03e5Sespie switch (c)
1367*c87b03e5Sespie {
1368*c87b03e5Sespie case '=':
1369*c87b03e5Sespie if ((c = java_get_unicode ()) == '=')
1370*c87b03e5Sespie {
1371*c87b03e5Sespie BUILD_OPERATOR (EQ_TK);
1372*c87b03e5Sespie }
1373*c87b03e5Sespie else
1374*c87b03e5Sespie {
1375*c87b03e5Sespie /* Equals is used in two different locations. In the
1376*c87b03e5Sespie variable_declarator: rule, it has to be seen as '=' as opposed
1377*c87b03e5Sespie to being seen as an ordinary assignment operator in
1378*c87b03e5Sespie assignment_operators: rule. */
1379*c87b03e5Sespie java_unget_unicode ();
1380*c87b03e5Sespie BUILD_OPERATOR (ASSIGN_TK);
1381*c87b03e5Sespie }
1382*c87b03e5Sespie
1383*c87b03e5Sespie case '>':
1384*c87b03e5Sespie switch ((c = java_get_unicode ()))
1385*c87b03e5Sespie {
1386*c87b03e5Sespie case '=':
1387*c87b03e5Sespie BUILD_OPERATOR (GTE_TK);
1388*c87b03e5Sespie case '>':
1389*c87b03e5Sespie switch ((c = java_get_unicode ()))
1390*c87b03e5Sespie {
1391*c87b03e5Sespie case '>':
1392*c87b03e5Sespie if ((c = java_get_unicode ()) == '=')
1393*c87b03e5Sespie {
1394*c87b03e5Sespie BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1395*c87b03e5Sespie }
1396*c87b03e5Sespie else
1397*c87b03e5Sespie {
1398*c87b03e5Sespie java_unget_unicode ();
1399*c87b03e5Sespie BUILD_OPERATOR (ZRS_TK);
1400*c87b03e5Sespie }
1401*c87b03e5Sespie case '=':
1402*c87b03e5Sespie BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1403*c87b03e5Sespie default:
1404*c87b03e5Sespie java_unget_unicode ();
1405*c87b03e5Sespie BUILD_OPERATOR (SRS_TK);
1406*c87b03e5Sespie }
1407*c87b03e5Sespie default:
1408*c87b03e5Sespie java_unget_unicode ();
1409*c87b03e5Sespie BUILD_OPERATOR (GT_TK);
1410*c87b03e5Sespie }
1411*c87b03e5Sespie
1412*c87b03e5Sespie case '<':
1413*c87b03e5Sespie switch ((c = java_get_unicode ()))
1414*c87b03e5Sespie {
1415*c87b03e5Sespie case '=':
1416*c87b03e5Sespie BUILD_OPERATOR (LTE_TK);
1417*c87b03e5Sespie case '<':
1418*c87b03e5Sespie if ((c = java_get_unicode ()) == '=')
1419*c87b03e5Sespie {
1420*c87b03e5Sespie BUILD_OPERATOR2 (LS_ASSIGN_TK);
1421*c87b03e5Sespie }
1422*c87b03e5Sespie else
1423*c87b03e5Sespie {
1424*c87b03e5Sespie java_unget_unicode ();
1425*c87b03e5Sespie BUILD_OPERATOR (LS_TK);
1426*c87b03e5Sespie }
1427*c87b03e5Sespie default:
1428*c87b03e5Sespie java_unget_unicode ();
1429*c87b03e5Sespie BUILD_OPERATOR (LT_TK);
1430*c87b03e5Sespie }
1431*c87b03e5Sespie
1432*c87b03e5Sespie case '&':
1433*c87b03e5Sespie switch ((c = java_get_unicode ()))
1434*c87b03e5Sespie {
1435*c87b03e5Sespie case '&':
1436*c87b03e5Sespie BUILD_OPERATOR (BOOL_AND_TK);
1437*c87b03e5Sespie case '=':
1438*c87b03e5Sespie BUILD_OPERATOR2 (AND_ASSIGN_TK);
1439*c87b03e5Sespie default:
1440*c87b03e5Sespie java_unget_unicode ();
1441*c87b03e5Sespie BUILD_OPERATOR (AND_TK);
1442*c87b03e5Sespie }
1443*c87b03e5Sespie
1444*c87b03e5Sespie case '|':
1445*c87b03e5Sespie switch ((c = java_get_unicode ()))
1446*c87b03e5Sespie {
1447*c87b03e5Sespie case '|':
1448*c87b03e5Sespie BUILD_OPERATOR (BOOL_OR_TK);
1449*c87b03e5Sespie case '=':
1450*c87b03e5Sespie BUILD_OPERATOR2 (OR_ASSIGN_TK);
1451*c87b03e5Sespie default:
1452*c87b03e5Sespie java_unget_unicode ();
1453*c87b03e5Sespie BUILD_OPERATOR (OR_TK);
1454*c87b03e5Sespie }
1455*c87b03e5Sespie
1456*c87b03e5Sespie case '+':
1457*c87b03e5Sespie switch ((c = java_get_unicode ()))
1458*c87b03e5Sespie {
1459*c87b03e5Sespie case '+':
1460*c87b03e5Sespie BUILD_OPERATOR (INCR_TK);
1461*c87b03e5Sespie case '=':
1462*c87b03e5Sespie BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1463*c87b03e5Sespie default:
1464*c87b03e5Sespie java_unget_unicode ();
1465*c87b03e5Sespie BUILD_OPERATOR (PLUS_TK);
1466*c87b03e5Sespie }
1467*c87b03e5Sespie
1468*c87b03e5Sespie case '-':
1469*c87b03e5Sespie switch ((c = java_get_unicode ()))
1470*c87b03e5Sespie {
1471*c87b03e5Sespie case '-':
1472*c87b03e5Sespie BUILD_OPERATOR (DECR_TK);
1473*c87b03e5Sespie case '=':
1474*c87b03e5Sespie BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1475*c87b03e5Sespie default:
1476*c87b03e5Sespie java_unget_unicode ();
1477*c87b03e5Sespie BUILD_OPERATOR (MINUS_TK);
1478*c87b03e5Sespie }
1479*c87b03e5Sespie
1480*c87b03e5Sespie case '*':
1481*c87b03e5Sespie if ((c = java_get_unicode ()) == '=')
1482*c87b03e5Sespie {
1483*c87b03e5Sespie BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1484*c87b03e5Sespie }
1485*c87b03e5Sespie else
1486*c87b03e5Sespie {
1487*c87b03e5Sespie java_unget_unicode ();
1488*c87b03e5Sespie BUILD_OPERATOR (MULT_TK);
1489*c87b03e5Sespie }
1490*c87b03e5Sespie
1491*c87b03e5Sespie case '/':
1492*c87b03e5Sespie if ((c = java_get_unicode ()) == '=')
1493*c87b03e5Sespie {
1494*c87b03e5Sespie BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1495*c87b03e5Sespie }
1496*c87b03e5Sespie else
1497*c87b03e5Sespie {
1498*c87b03e5Sespie java_unget_unicode ();
1499*c87b03e5Sespie BUILD_OPERATOR (DIV_TK);
1500*c87b03e5Sespie }
1501*c87b03e5Sespie
1502*c87b03e5Sespie case '^':
1503*c87b03e5Sespie if ((c = java_get_unicode ()) == '=')
1504*c87b03e5Sespie {
1505*c87b03e5Sespie BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1506*c87b03e5Sespie }
1507*c87b03e5Sespie else
1508*c87b03e5Sespie {
1509*c87b03e5Sespie java_unget_unicode ();
1510*c87b03e5Sespie BUILD_OPERATOR (XOR_TK);
1511*c87b03e5Sespie }
1512*c87b03e5Sespie
1513*c87b03e5Sespie case '%':
1514*c87b03e5Sespie if ((c = java_get_unicode ()) == '=')
1515*c87b03e5Sespie {
1516*c87b03e5Sespie BUILD_OPERATOR2 (REM_ASSIGN_TK);
1517*c87b03e5Sespie }
1518*c87b03e5Sespie else
1519*c87b03e5Sespie {
1520*c87b03e5Sespie java_unget_unicode ();
1521*c87b03e5Sespie BUILD_OPERATOR (REM_TK);
1522*c87b03e5Sespie }
1523*c87b03e5Sespie
1524*c87b03e5Sespie case '!':
1525*c87b03e5Sespie if ((c = java_get_unicode()) == '=')
1526*c87b03e5Sespie {
1527*c87b03e5Sespie BUILD_OPERATOR (NEQ_TK);
1528*c87b03e5Sespie }
1529*c87b03e5Sespie else
1530*c87b03e5Sespie {
1531*c87b03e5Sespie java_unget_unicode ();
1532*c87b03e5Sespie BUILD_OPERATOR (NEG_TK);
1533*c87b03e5Sespie }
1534*c87b03e5Sespie
1535*c87b03e5Sespie case '?':
1536*c87b03e5Sespie JAVA_LEX_OP ("?");
1537*c87b03e5Sespie BUILD_OPERATOR (REL_QM_TK);
1538*c87b03e5Sespie case ':':
1539*c87b03e5Sespie JAVA_LEX_OP (":");
1540*c87b03e5Sespie BUILD_OPERATOR (REL_CL_TK);
1541*c87b03e5Sespie case '~':
1542*c87b03e5Sespie BUILD_OPERATOR (NOT_TK);
1543*c87b03e5Sespie }
1544*c87b03e5Sespie
1545*c87b03e5Sespie /* Keyword, boolean literal or null literal. */
1546*c87b03e5Sespie for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1547*c87b03e5Sespie c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1548*c87b03e5Sespie {
1549*c87b03e5Sespie java_unicode_2_utf8 (c);
1550*c87b03e5Sespie if (all_ascii && c >= 128)
1551*c87b03e5Sespie all_ascii = 0;
1552*c87b03e5Sespie ascii_index++;
1553*c87b03e5Sespie }
1554*c87b03e5Sespie
1555*c87b03e5Sespie obstack_1grow (&temporary_obstack, '\0');
1556*c87b03e5Sespie string = obstack_finish (&temporary_obstack);
1557*c87b03e5Sespie if (c != UEOF)
1558*c87b03e5Sespie java_unget_unicode ();
1559*c87b03e5Sespie
1560*c87b03e5Sespie /* If we have something all ascii, we consider a keyword, a boolean
1561*c87b03e5Sespie literal, a null literal or an all ASCII identifier. Otherwise,
1562*c87b03e5Sespie this is an identifier (possibly not respecting formation rule). */
1563*c87b03e5Sespie if (all_ascii)
1564*c87b03e5Sespie {
1565*c87b03e5Sespie const struct java_keyword *kw;
1566*c87b03e5Sespie if ((kw=java_keyword (string, ascii_index)))
1567*c87b03e5Sespie {
1568*c87b03e5Sespie JAVA_LEX_KW (string);
1569*c87b03e5Sespie switch (kw->token)
1570*c87b03e5Sespie {
1571*c87b03e5Sespie case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1572*c87b03e5Sespie case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1573*c87b03e5Sespie case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1574*c87b03e5Sespie case PRIVATE_TK: case STRICT_TK:
1575*c87b03e5Sespie SET_MODIFIER_CTX (kw->token);
1576*c87b03e5Sespie return MODIFIER_TK;
1577*c87b03e5Sespie case FLOAT_TK:
1578*c87b03e5Sespie SET_LVAL_NODE (float_type_node);
1579*c87b03e5Sespie return FP_TK;
1580*c87b03e5Sespie case DOUBLE_TK:
1581*c87b03e5Sespie SET_LVAL_NODE (double_type_node);
1582*c87b03e5Sespie return FP_TK;
1583*c87b03e5Sespie case BOOLEAN_TK:
1584*c87b03e5Sespie SET_LVAL_NODE (boolean_type_node);
1585*c87b03e5Sespie return BOOLEAN_TK;
1586*c87b03e5Sespie case BYTE_TK:
1587*c87b03e5Sespie SET_LVAL_NODE (byte_type_node);
1588*c87b03e5Sespie return INTEGRAL_TK;
1589*c87b03e5Sespie case SHORT_TK:
1590*c87b03e5Sespie SET_LVAL_NODE (short_type_node);
1591*c87b03e5Sespie return INTEGRAL_TK;
1592*c87b03e5Sespie case INT_TK:
1593*c87b03e5Sespie SET_LVAL_NODE (int_type_node);
1594*c87b03e5Sespie return INTEGRAL_TK;
1595*c87b03e5Sespie case LONG_TK:
1596*c87b03e5Sespie SET_LVAL_NODE (long_type_node);
1597*c87b03e5Sespie return INTEGRAL_TK;
1598*c87b03e5Sespie case CHAR_TK:
1599*c87b03e5Sespie SET_LVAL_NODE (char_type_node);
1600*c87b03e5Sespie return INTEGRAL_TK;
1601*c87b03e5Sespie
1602*c87b03e5Sespie /* Keyword based literals. */
1603*c87b03e5Sespie case TRUE_TK:
1604*c87b03e5Sespie case FALSE_TK:
1605*c87b03e5Sespie SET_LVAL_NODE ((kw->token == TRUE_TK ?
1606*c87b03e5Sespie boolean_true_node : boolean_false_node));
1607*c87b03e5Sespie return BOOL_LIT_TK;
1608*c87b03e5Sespie case NULL_TK:
1609*c87b03e5Sespie SET_LVAL_NODE (null_pointer_node);
1610*c87b03e5Sespie return NULL_TK;
1611*c87b03e5Sespie
1612*c87b03e5Sespie case ASSERT_TK:
1613*c87b03e5Sespie if (flag_assert)
1614*c87b03e5Sespie {
1615*c87b03e5Sespie BUILD_OPERATOR (kw->token);
1616*c87b03e5Sespie return kw->token;
1617*c87b03e5Sespie }
1618*c87b03e5Sespie else
1619*c87b03e5Sespie break;
1620*c87b03e5Sespie
1621*c87b03e5Sespie /* Some keyword we want to retain information on the location
1622*c87b03e5Sespie they where found. */
1623*c87b03e5Sespie case CASE_TK:
1624*c87b03e5Sespie case DEFAULT_TK:
1625*c87b03e5Sespie case SUPER_TK:
1626*c87b03e5Sespie case THIS_TK:
1627*c87b03e5Sespie case RETURN_TK:
1628*c87b03e5Sespie case BREAK_TK:
1629*c87b03e5Sespie case CONTINUE_TK:
1630*c87b03e5Sespie case TRY_TK:
1631*c87b03e5Sespie case CATCH_TK:
1632*c87b03e5Sespie case THROW_TK:
1633*c87b03e5Sespie case INSTANCEOF_TK:
1634*c87b03e5Sespie BUILD_OPERATOR (kw->token);
1635*c87b03e5Sespie
1636*c87b03e5Sespie default:
1637*c87b03e5Sespie return kw->token;
1638*c87b03e5Sespie }
1639*c87b03e5Sespie }
1640*c87b03e5Sespie }
1641*c87b03e5Sespie
1642*c87b03e5Sespie /* We may have an ID here. */
1643*c87b03e5Sespie if (JAVA_START_CHAR_P (first_unicode))
1644*c87b03e5Sespie {
1645*c87b03e5Sespie JAVA_LEX_ID (string);
1646*c87b03e5Sespie java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1647*c87b03e5Sespie return ID_TK;
1648*c87b03e5Sespie }
1649*c87b03e5Sespie
1650*c87b03e5Sespie /* Everything else is an invalid character in the input. */
1651*c87b03e5Sespie {
1652*c87b03e5Sespie char lex_error_buffer [128];
1653*c87b03e5Sespie sprintf (lex_error_buffer, "Invalid character `%s' in input",
1654*c87b03e5Sespie java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1655*c87b03e5Sespie java_lex_error (lex_error_buffer, 1);
1656*c87b03e5Sespie }
1657*c87b03e5Sespie return 0;
1658*c87b03e5Sespie }
1659*c87b03e5Sespie
1660*c87b03e5Sespie #ifndef JC1_LITE
1661*c87b03e5Sespie /* This is called by the parser to see if an error should be generated
1662*c87b03e5Sespie due to numeric overflow. This function only handles the particular
1663*c87b03e5Sespie case of the largest negative value, and is only called in the case
1664*c87b03e5Sespie where this value is not preceded by `-'. */
1665*c87b03e5Sespie static void
error_if_numeric_overflow(value)1666*c87b03e5Sespie error_if_numeric_overflow (value)
1667*c87b03e5Sespie tree value;
1668*c87b03e5Sespie {
1669*c87b03e5Sespie if (TREE_CODE (value) == INTEGER_CST
1670*c87b03e5Sespie && JAVA_RADIX10_FLAG (value)
1671*c87b03e5Sespie && tree_int_cst_sgn (value) < 0)
1672*c87b03e5Sespie {
1673*c87b03e5Sespie if (TREE_TYPE (value) == long_type_node)
1674*c87b03e5Sespie java_lex_error ("Numeric overflow for `long' literal", 0);
1675*c87b03e5Sespie else
1676*c87b03e5Sespie java_lex_error ("Numeric overflow for `int' literal", 0);
1677*c87b03e5Sespie }
1678*c87b03e5Sespie }
1679*c87b03e5Sespie #endif /* JC1_LITE */
1680*c87b03e5Sespie
1681*c87b03e5Sespie static void
java_unicode_2_utf8(unicode)1682*c87b03e5Sespie java_unicode_2_utf8 (unicode)
1683*c87b03e5Sespie unicode_t unicode;
1684*c87b03e5Sespie {
1685*c87b03e5Sespie if (RANGE (unicode, 0x01, 0x7f))
1686*c87b03e5Sespie obstack_1grow (&temporary_obstack, (char)unicode);
1687*c87b03e5Sespie else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1688*c87b03e5Sespie {
1689*c87b03e5Sespie obstack_1grow (&temporary_obstack,
1690*c87b03e5Sespie (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1691*c87b03e5Sespie obstack_1grow (&temporary_obstack,
1692*c87b03e5Sespie (unsigned char)(0x80 | (unicode & 0x3f)));
1693*c87b03e5Sespie }
1694*c87b03e5Sespie else /* Range 0x800-0xffff. */
1695*c87b03e5Sespie {
1696*c87b03e5Sespie obstack_1grow (&temporary_obstack,
1697*c87b03e5Sespie (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1698*c87b03e5Sespie obstack_1grow (&temporary_obstack,
1699*c87b03e5Sespie (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1700*c87b03e5Sespie obstack_1grow (&temporary_obstack,
1701*c87b03e5Sespie (unsigned char)(0x80 | (unicode & 0x003f)));
1702*c87b03e5Sespie }
1703*c87b03e5Sespie }
1704*c87b03e5Sespie
1705*c87b03e5Sespie #ifndef JC1_LITE
1706*c87b03e5Sespie static tree
build_wfl_node(node)1707*c87b03e5Sespie build_wfl_node (node)
1708*c87b03e5Sespie tree node;
1709*c87b03e5Sespie {
1710*c87b03e5Sespie node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1711*c87b03e5Sespie /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1712*c87b03e5Sespie TREE_TYPE (node) = NULL_TREE;
1713*c87b03e5Sespie return node;
1714*c87b03e5Sespie }
1715*c87b03e5Sespie #endif
1716*c87b03e5Sespie
1717*c87b03e5Sespie static void
java_lex_error(msg,forward)1718*c87b03e5Sespie java_lex_error (msg, forward)
1719*c87b03e5Sespie const char *msg ATTRIBUTE_UNUSED;
1720*c87b03e5Sespie int forward ATTRIBUTE_UNUSED;
1721*c87b03e5Sespie {
1722*c87b03e5Sespie #ifndef JC1_LITE
1723*c87b03e5Sespie ctxp->elc.line = ctxp->c_line->lineno;
1724*c87b03e5Sespie ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1725*c87b03e5Sespie
1726*c87b03e5Sespie /* Might be caught in the middle of some error report. */
1727*c87b03e5Sespie ctxp->java_error_flag = 0;
1728*c87b03e5Sespie java_error (NULL);
1729*c87b03e5Sespie java_error (msg);
1730*c87b03e5Sespie #endif
1731*c87b03e5Sespie }
1732*c87b03e5Sespie
1733*c87b03e5Sespie #ifndef JC1_LITE
1734*c87b03e5Sespie static int
java_is_eol(fp,c)1735*c87b03e5Sespie java_is_eol (fp, c)
1736*c87b03e5Sespie FILE *fp;
1737*c87b03e5Sespie int c;
1738*c87b03e5Sespie {
1739*c87b03e5Sespie int next;
1740*c87b03e5Sespie switch (c)
1741*c87b03e5Sespie {
1742*c87b03e5Sespie case '\r':
1743*c87b03e5Sespie next = getc (fp);
1744*c87b03e5Sespie if (next != '\n' && next != EOF)
1745*c87b03e5Sespie ungetc (next, fp);
1746*c87b03e5Sespie return 1;
1747*c87b03e5Sespie case '\n':
1748*c87b03e5Sespie return 1;
1749*c87b03e5Sespie default:
1750*c87b03e5Sespie return 0;
1751*c87b03e5Sespie }
1752*c87b03e5Sespie }
1753*c87b03e5Sespie #endif
1754*c87b03e5Sespie
1755*c87b03e5Sespie char *
java_get_line_col(filename,line,col)1756*c87b03e5Sespie java_get_line_col (filename, line, col)
1757*c87b03e5Sespie const char *filename ATTRIBUTE_UNUSED;
1758*c87b03e5Sespie int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1759*c87b03e5Sespie {
1760*c87b03e5Sespie #ifdef JC1_LITE
1761*c87b03e5Sespie return 0;
1762*c87b03e5Sespie #else
1763*c87b03e5Sespie /* Dumb implementation. Doesn't try to cache or optimize things. */
1764*c87b03e5Sespie /* First line of the file is line 1, first column is 1. */
1765*c87b03e5Sespie
1766*c87b03e5Sespie /* COL == -1 means, at the CR/LF in LINE. */
1767*c87b03e5Sespie /* COL == -2 means, at the first non space char in LINE. */
1768*c87b03e5Sespie
1769*c87b03e5Sespie FILE *fp;
1770*c87b03e5Sespie int c, ccol, cline = 1;
1771*c87b03e5Sespie int current_line_col = 0;
1772*c87b03e5Sespie int first_non_space = 0;
1773*c87b03e5Sespie char *base;
1774*c87b03e5Sespie
1775*c87b03e5Sespie if (!(fp = fopen (filename, "r")))
1776*c87b03e5Sespie fatal_io_error ("can't open %s", filename);
1777*c87b03e5Sespie
1778*c87b03e5Sespie while (cline != line)
1779*c87b03e5Sespie {
1780*c87b03e5Sespie c = getc (fp);
1781*c87b03e5Sespie if (c == EOF)
1782*c87b03e5Sespie {
1783*c87b03e5Sespie static const char msg[] = "<<file too short - unexpected EOF>>";
1784*c87b03e5Sespie obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1785*c87b03e5Sespie goto have_line;
1786*c87b03e5Sespie }
1787*c87b03e5Sespie if (java_is_eol (fp, c))
1788*c87b03e5Sespie cline++;
1789*c87b03e5Sespie }
1790*c87b03e5Sespie
1791*c87b03e5Sespie /* Gather the chars of the current line in a buffer. */
1792*c87b03e5Sespie for (;;)
1793*c87b03e5Sespie {
1794*c87b03e5Sespie c = getc (fp);
1795*c87b03e5Sespie if (c < 0 || java_is_eol (fp, c))
1796*c87b03e5Sespie break;
1797*c87b03e5Sespie if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1798*c87b03e5Sespie first_non_space = current_line_col;
1799*c87b03e5Sespie obstack_1grow (&temporary_obstack, c);
1800*c87b03e5Sespie current_line_col++;
1801*c87b03e5Sespie }
1802*c87b03e5Sespie have_line:
1803*c87b03e5Sespie
1804*c87b03e5Sespie obstack_1grow (&temporary_obstack, '\n');
1805*c87b03e5Sespie
1806*c87b03e5Sespie if (col == -1)
1807*c87b03e5Sespie {
1808*c87b03e5Sespie col = current_line_col;
1809*c87b03e5Sespie first_non_space = 0;
1810*c87b03e5Sespie }
1811*c87b03e5Sespie else if (col == -2)
1812*c87b03e5Sespie col = first_non_space;
1813*c87b03e5Sespie else
1814*c87b03e5Sespie first_non_space = 0;
1815*c87b03e5Sespie
1816*c87b03e5Sespie /* Place the '^' a the right position. */
1817*c87b03e5Sespie base = obstack_base (&temporary_obstack);
1818*c87b03e5Sespie for (ccol = 1; ccol <= col+3; ccol++)
1819*c87b03e5Sespie {
1820*c87b03e5Sespie /* Compute \t when reaching first_non_space. */
1821*c87b03e5Sespie char c = (first_non_space ?
1822*c87b03e5Sespie (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1823*c87b03e5Sespie obstack_1grow (&temporary_obstack, c);
1824*c87b03e5Sespie }
1825*c87b03e5Sespie obstack_grow0 (&temporary_obstack, "^", 1);
1826*c87b03e5Sespie
1827*c87b03e5Sespie fclose (fp);
1828*c87b03e5Sespie return obstack_finish (&temporary_obstack);
1829*c87b03e5Sespie #endif
1830*c87b03e5Sespie }
1831*c87b03e5Sespie
1832*c87b03e5Sespie #ifndef JC1_LITE
1833*c87b03e5Sespie static int
utf8_cmp(str,length,name)1834*c87b03e5Sespie utf8_cmp (str, length, name)
1835*c87b03e5Sespie const unsigned char *str;
1836*c87b03e5Sespie int length;
1837*c87b03e5Sespie const char *name;
1838*c87b03e5Sespie {
1839*c87b03e5Sespie const unsigned char *limit = str + length;
1840*c87b03e5Sespie int i;
1841*c87b03e5Sespie
1842*c87b03e5Sespie for (i = 0; name[i]; ++i)
1843*c87b03e5Sespie {
1844*c87b03e5Sespie int ch = UTF8_GET (str, limit);
1845*c87b03e5Sespie if (ch != name[i])
1846*c87b03e5Sespie return ch - name[i];
1847*c87b03e5Sespie }
1848*c87b03e5Sespie
1849*c87b03e5Sespie return str == limit ? 0 : 1;
1850*c87b03e5Sespie }
1851*c87b03e5Sespie
1852*c87b03e5Sespie /* A sorted list of all C++ keywords. */
1853*c87b03e5Sespie
1854*c87b03e5Sespie static const char *const cxx_keywords[] =
1855*c87b03e5Sespie {
1856*c87b03e5Sespie "_Complex",
1857*c87b03e5Sespie "__alignof",
1858*c87b03e5Sespie "__alignof__",
1859*c87b03e5Sespie "__asm",
1860*c87b03e5Sespie "__asm__",
1861*c87b03e5Sespie "__attribute",
1862*c87b03e5Sespie "__attribute__",
1863*c87b03e5Sespie "__builtin_va_arg",
1864*c87b03e5Sespie "__complex",
1865*c87b03e5Sespie "__complex__",
1866*c87b03e5Sespie "__const",
1867*c87b03e5Sespie "__const__",
1868*c87b03e5Sespie "__extension__",
1869*c87b03e5Sespie "__imag",
1870*c87b03e5Sespie "__imag__",
1871*c87b03e5Sespie "__inline",
1872*c87b03e5Sespie "__inline__",
1873*c87b03e5Sespie "__label__",
1874*c87b03e5Sespie "__null",
1875*c87b03e5Sespie "__real",
1876*c87b03e5Sespie "__real__",
1877*c87b03e5Sespie "__restrict",
1878*c87b03e5Sespie "__restrict__",
1879*c87b03e5Sespie "__signed",
1880*c87b03e5Sespie "__signed__",
1881*c87b03e5Sespie "__typeof",
1882*c87b03e5Sespie "__typeof__",
1883*c87b03e5Sespie "__volatile",
1884*c87b03e5Sespie "__volatile__",
1885*c87b03e5Sespie "and",
1886*c87b03e5Sespie "and_eq",
1887*c87b03e5Sespie "asm",
1888*c87b03e5Sespie "auto",
1889*c87b03e5Sespie "bitand",
1890*c87b03e5Sespie "bitor",
1891*c87b03e5Sespie "bool",
1892*c87b03e5Sespie "break",
1893*c87b03e5Sespie "case",
1894*c87b03e5Sespie "catch",
1895*c87b03e5Sespie "char",
1896*c87b03e5Sespie "class",
1897*c87b03e5Sespie "compl",
1898*c87b03e5Sespie "const",
1899*c87b03e5Sespie "const_cast",
1900*c87b03e5Sespie "continue",
1901*c87b03e5Sespie "default",
1902*c87b03e5Sespie "delete",
1903*c87b03e5Sespie "do",
1904*c87b03e5Sespie "double",
1905*c87b03e5Sespie "dynamic_cast",
1906*c87b03e5Sespie "else",
1907*c87b03e5Sespie "enum",
1908*c87b03e5Sespie "explicit",
1909*c87b03e5Sespie "export",
1910*c87b03e5Sespie "extern",
1911*c87b03e5Sespie "false",
1912*c87b03e5Sespie "float",
1913*c87b03e5Sespie "for",
1914*c87b03e5Sespie "friend",
1915*c87b03e5Sespie "goto",
1916*c87b03e5Sespie "if",
1917*c87b03e5Sespie "inline",
1918*c87b03e5Sespie "int",
1919*c87b03e5Sespie "long",
1920*c87b03e5Sespie "mutable",
1921*c87b03e5Sespie "namespace",
1922*c87b03e5Sespie "new",
1923*c87b03e5Sespie "not",
1924*c87b03e5Sespie "not_eq",
1925*c87b03e5Sespie "operator",
1926*c87b03e5Sespie "or",
1927*c87b03e5Sespie "or_eq",
1928*c87b03e5Sespie "private",
1929*c87b03e5Sespie "protected",
1930*c87b03e5Sespie "public",
1931*c87b03e5Sespie "register",
1932*c87b03e5Sespie "reinterpret_cast",
1933*c87b03e5Sespie "return",
1934*c87b03e5Sespie "short",
1935*c87b03e5Sespie "signed",
1936*c87b03e5Sespie "sizeof",
1937*c87b03e5Sespie "static",
1938*c87b03e5Sespie "static_cast",
1939*c87b03e5Sespie "struct",
1940*c87b03e5Sespie "switch",
1941*c87b03e5Sespie "template",
1942*c87b03e5Sespie "this",
1943*c87b03e5Sespie "throw",
1944*c87b03e5Sespie "true",
1945*c87b03e5Sespie "try",
1946*c87b03e5Sespie "typedef",
1947*c87b03e5Sespie "typeid",
1948*c87b03e5Sespie "typename",
1949*c87b03e5Sespie "typeof",
1950*c87b03e5Sespie "union",
1951*c87b03e5Sespie "unsigned",
1952*c87b03e5Sespie "using",
1953*c87b03e5Sespie "virtual",
1954*c87b03e5Sespie "void",
1955*c87b03e5Sespie "volatile",
1956*c87b03e5Sespie "wchar_t",
1957*c87b03e5Sespie "while",
1958*c87b03e5Sespie "xor",
1959*c87b03e5Sespie "xor_eq"
1960*c87b03e5Sespie };
1961*c87b03e5Sespie
1962*c87b03e5Sespie /* Return true if NAME is a C++ keyword. */
1963*c87b03e5Sespie
1964*c87b03e5Sespie int
cxx_keyword_p(name,length)1965*c87b03e5Sespie cxx_keyword_p (name, length)
1966*c87b03e5Sespie const char *name;
1967*c87b03e5Sespie int length;
1968*c87b03e5Sespie {
1969*c87b03e5Sespie int last = ARRAY_SIZE (cxx_keywords);
1970*c87b03e5Sespie int first = 0;
1971*c87b03e5Sespie int mid = (last + first) / 2;
1972*c87b03e5Sespie int old = -1;
1973*c87b03e5Sespie
1974*c87b03e5Sespie for (mid = (last + first) / 2;
1975*c87b03e5Sespie mid != old;
1976*c87b03e5Sespie old = mid, mid = (last + first) / 2)
1977*c87b03e5Sespie {
1978*c87b03e5Sespie int kwl = strlen (cxx_keywords[mid]);
1979*c87b03e5Sespie int min_length = kwl > length ? length : kwl;
1980*c87b03e5Sespie int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1981*c87b03e5Sespie
1982*c87b03e5Sespie if (r == 0)
1983*c87b03e5Sespie {
1984*c87b03e5Sespie int i;
1985*c87b03e5Sespie /* We've found a match if all the remaining characters are `$'. */
1986*c87b03e5Sespie for (i = min_length; i < length && name[i] == '$'; ++i)
1987*c87b03e5Sespie ;
1988*c87b03e5Sespie if (i == length)
1989*c87b03e5Sespie return 1;
1990*c87b03e5Sespie r = 1;
1991*c87b03e5Sespie }
1992*c87b03e5Sespie
1993*c87b03e5Sespie if (r < 0)
1994*c87b03e5Sespie last = mid;
1995*c87b03e5Sespie else
1996*c87b03e5Sespie first = mid;
1997*c87b03e5Sespie }
1998*c87b03e5Sespie return 0;
1999*c87b03e5Sespie }
2000*c87b03e5Sespie #endif /* JC1_LITE */
2001