xref: /openbsd/gnu/usr.bin/gcc/gcc/java/lex.h (revision c87b03e5)
1*c87b03e5Sespie /* Language lexer definitions for the GNU compiler for the Java(TM) language.
2*c87b03e5Sespie    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3*c87b03e5Sespie    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
4*c87b03e5Sespie 
5*c87b03e5Sespie This file is part of GNU CC.
6*c87b03e5Sespie 
7*c87b03e5Sespie GNU CC is free software; you can redistribute it and/or modify
8*c87b03e5Sespie it under the terms of the GNU General Public License as published by
9*c87b03e5Sespie the Free Software Foundation; either version 2, or (at your option)
10*c87b03e5Sespie any later version.
11*c87b03e5Sespie 
12*c87b03e5Sespie GNU CC is distributed in the hope that it will be useful,
13*c87b03e5Sespie but WITHOUT ANY WARRANTY; without even the implied warranty of
14*c87b03e5Sespie MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15*c87b03e5Sespie GNU General Public License for more details.
16*c87b03e5Sespie 
17*c87b03e5Sespie You should have received a copy of the GNU General Public License
18*c87b03e5Sespie along with GNU CC; see the file COPYING.  If not, write to
19*c87b03e5Sespie the Free Software Foundation, 59 Temple Place - Suite 330,
20*c87b03e5Sespie Boston, MA 02111-1307, USA.
21*c87b03e5Sespie 
22*c87b03e5Sespie Java and all Java-based marks are trademarks or registered trademarks
23*c87b03e5Sespie of Sun Microsystems, Inc. in the United States and other countries.
24*c87b03e5Sespie The Free Software Foundation is independent of Sun Microsystems, Inc.  */
25*c87b03e5Sespie 
26*c87b03e5Sespie #ifndef GCC_JAVA_LEX_H
27*c87b03e5Sespie #define GCC_JAVA_LEX_H
28*c87b03e5Sespie 
29*c87b03e5Sespie /* Extern global variables declarations  */
30*c87b03e5Sespie extern FILE *finput;
31*c87b03e5Sespie extern int   lineno;
32*c87b03e5Sespie 
33*c87b03e5Sespie /* A Unicode character, as read from the input file  */
34*c87b03e5Sespie typedef unsigned short unicode_t;
35*c87b03e5Sespie 
36*c87b03e5Sespie #ifdef HAVE_ICONV
37*c87b03e5Sespie #include <iconv.h>
38*c87b03e5Sespie #endif /* HAVE_ICONV */
39*c87b03e5Sespie 
40*c87b03e5Sespie /* Default encoding to use if no encoding is specified.  */
41*c87b03e5Sespie #define DEFAULT_ENCODING "UTF-8"
42*c87b03e5Sespie 
43*c87b03e5Sespie /* Debug macro to print-out what we match  */
44*c87b03e5Sespie #ifdef JAVA_LEX_DEBUG
45*c87b03e5Sespie #ifdef JAVA_LEX_DEBUG_CHAR
46*c87b03e5Sespie #define JAVA_LEX_CHAR(c)      printf ("java_lex:%d: char '%c'.%d\n", 	\
47*c87b03e5Sespie 				      lineno, (c < 128 ? c : '.'), c);
48*c87b03e5Sespie #else
49*c87b03e5Sespie #define JAVA_LEX_CHAR(c)
50*c87b03e5Sespie #endif
51*c87b03e5Sespie #define JAVA_LEX_KW(c)        printf ("java_lex:%d: keyword: '%s'\n", lineno,c)
52*c87b03e5Sespie #define JAVA_LEX_ID(s)        printf ("java_lex:%d: ID: '%s'\n",	\
53*c87b03e5Sespie 				      lineno,				\
54*c87b03e5Sespie 				      (all_ascii ? s : "<U>"))
55*c87b03e5Sespie #define JAVA_LEX_LIT(s, r)    printf ("java_lex:%d: literal '%s'_%d\n",	\
56*c87b03e5Sespie 				      lineno, s, r)
57*c87b03e5Sespie #define JAVA_LEX_CHAR_LIT(s)  printf ("java_lex:%d: literal '%d'\n", lineno, s)
58*c87b03e5Sespie #define JAVA_LEX_STR_LIT(s)   {						 \
59*c87b03e5Sespie 				 int i;					 \
60*c87b03e5Sespie 				 printf ("java_lex:%d: literal '%s'\n",  \
61*c87b03e5Sespie 					 lineno, s);			 \
62*c87b03e5Sespie 			       }
63*c87b03e5Sespie #define JAVA_LEX_SEP(c)       printf ("java_lex:%d: separator '%c'\n",lineno,c)
64*c87b03e5Sespie #define JAVA_LEX_OP(c)        printf ("java_lex:%d: operator '%s'\n", lineno,c)
65*c87b03e5Sespie #else
66*c87b03e5Sespie #define JAVA_LEX_CHAR(c)
67*c87b03e5Sespie #define JAVA_LEX_KW(c)
68*c87b03e5Sespie #define JAVA_LEX_ID(s)
69*c87b03e5Sespie #define JAVA_LEX_LIT(s,r)
70*c87b03e5Sespie #define JAVA_LEX_CHAR_LIT(s)
71*c87b03e5Sespie #define JAVA_LEX_STR_LIT(s)
72*c87b03e5Sespie #define JAVA_LEX_SEP(c)
73*c87b03e5Sespie #define JAVA_LEX_OP(s)
74*c87b03e5Sespie #endif
75*c87b03e5Sespie 
76*c87b03e5Sespie /* Line information containers  */
77*c87b03e5Sespie struct java_line {
78*c87b03e5Sespie   unicode_t *line;		/* The line's unicode */
79*c87b03e5Sespie   char      *unicode_escape_p;	/* The matching char was a unicode escape */
80*c87b03e5Sespie   unicode_t ahead[1];		/* Character ahead */
81*c87b03e5Sespie   char unicode_escape_ahead_p;	/* Character ahead is a unicode escape */
82*c87b03e5Sespie   int max;			/* buffer's max size */
83*c87b03e5Sespie   int size;			/* number of unicodes */
84*c87b03e5Sespie   int current;			/* Current position, unicode based */
85*c87b03e5Sespie   int char_col;			/* Current position, input char based */
86*c87b03e5Sespie   int lineno;			/* Its line number */
87*c87b03e5Sespie   int white_space_only;		/* If it contains only white spaces */
88*c87b03e5Sespie };
89*c87b03e5Sespie #define JAVA_COLUMN_DELTA(p)						\
90*c87b03e5Sespie   (ctxp->c_line->unicode_escape_p [ctxp->c_line->current+(p)] ? 6 : 	\
91*c87b03e5Sespie    (ctxp->c_line->line [ctxp->c_line->current+(p)] == '\t' ? 8 : 1))
92*c87b03e5Sespie 
93*c87b03e5Sespie struct java_error {
94*c87b03e5Sespie   struct java_line *line;
95*c87b03e5Sespie   int error;
96*c87b03e5Sespie };
97*c87b03e5Sespie 
98*c87b03e5Sespie typedef struct _java_lc {
99*c87b03e5Sespie   int line;
100*c87b03e5Sespie   int prev_col;
101*c87b03e5Sespie   int col;
102*c87b03e5Sespie } java_lc;
103*c87b03e5Sespie 
104*c87b03e5Sespie typedef struct java_lexer
105*c87b03e5Sespie {
106*c87b03e5Sespie   /* The file from which we're reading.  */
107*c87b03e5Sespie   FILE *finput;
108*c87b03e5Sespie 
109*c87b03e5Sespie   /* Number of consecutive backslashes we've read.  */
110*c87b03e5Sespie   int bs_count;
111*c87b03e5Sespie 
112*c87b03e5Sespie   /* If nonzero, a value that was pushed back.  */
113*c87b03e5Sespie   unicode_t unget_value;
114*c87b03e5Sespie 
115*c87b03e5Sespie   /* If nonzero, we've hit EOF.  Used only by java_get_unicode().  */
116*c87b03e5Sespie   int hit_eof : 1;
117*c87b03e5Sespie 
118*c87b03e5Sespie #ifdef HAVE_ICONV
119*c87b03e5Sespie   /* Nonzero if we've read any bytes.  We only recognize the
120*c87b03e5Sespie      byte-order-marker (BOM) as the first word.  */
121*c87b03e5Sespie   int read_anything : 1;
122*c87b03e5Sespie 
123*c87b03e5Sespie   /* Nonzero if we have to byte swap.  */
124*c87b03e5Sespie   int byte_swap : 1;
125*c87b03e5Sespie 
126*c87b03e5Sespie   /* Nonzero if we're using the fallback decoder.  */
127*c87b03e5Sespie   int use_fallback : 1;
128*c87b03e5Sespie 
129*c87b03e5Sespie   /* The handle for the iconv converter we're using.  */
130*c87b03e5Sespie   iconv_t handle;
131*c87b03e5Sespie 
132*c87b03e5Sespie   /* Bytes we've read from the file but have not sent to iconv.  */
133*c87b03e5Sespie   char buffer[1024];
134*c87b03e5Sespie 
135*c87b03e5Sespie   /* Index of first valid character in buffer, -1 if no valid
136*c87b03e5Sespie      characters.  */
137*c87b03e5Sespie   int first;
138*c87b03e5Sespie 
139*c87b03e5Sespie   /* Index of last valid character in buffer, plus one.  -1 if no
140*c87b03e5Sespie      valid characters in buffer.  */
141*c87b03e5Sespie   int last;
142*c87b03e5Sespie 
143*c87b03e5Sespie   /* This is a buffer of characters already converted by iconv.  We
144*c87b03e5Sespie      use `char' here because we're assuming that iconv() converts to
145*c87b03e5Sespie      UCS-2, and then we convert it ourselves.  */
146*c87b03e5Sespie   unsigned char out_buffer[1024];
147*c87b03e5Sespie 
148*c87b03e5Sespie   /* Index of first valid output character.  -1 if no valid
149*c87b03e5Sespie      characters.  */
150*c87b03e5Sespie   int out_first;
151*c87b03e5Sespie 
152*c87b03e5Sespie   /* Index of last valid output character, plus one.  -1 if no valid
153*c87b03e5Sespie      characters.  */
154*c87b03e5Sespie   int out_last;
155*c87b03e5Sespie 
156*c87b03e5Sespie #endif /* HAVE_ICONV */
157*c87b03e5Sespie } java_lexer;
158*c87b03e5Sespie 
159*c87b03e5Sespie /* Destroy a lexer object.  */
160*c87b03e5Sespie extern void java_destroy_lexer PARAMS ((java_lexer *));
161*c87b03e5Sespie 
162*c87b03e5Sespie #define JAVA_LINE_MAX 80
163*c87b03e5Sespie 
164*c87b03e5Sespie /* Build a location compound integer */
165*c87b03e5Sespie #define BUILD_LOCATION() ((ctxp->elc.line << 12) | (ctxp->elc.col & 0xfff))
166*c87b03e5Sespie 
167*c87b03e5Sespie /* Those macros are defined differently if we compile jc1-lite
168*c87b03e5Sespie    (JC1_LITE defined) or jc1.  */
169*c87b03e5Sespie #ifdef JC1_LITE
170*c87b03e5Sespie 
171*c87b03e5Sespie #define DCONST0 0
172*c87b03e5Sespie #define REAL_VALUE_TYPE int
173*c87b03e5Sespie #define GET_IDENTIFIER(S) xstrdup ((S))
174*c87b03e5Sespie #define REAL_VALUE_ATOF(LIT,MODE) 0
175*c87b03e5Sespie #define REAL_VALUE_ISINF(VALUE)   0
176*c87b03e5Sespie #define REAL_VALUE_ISNAN(VALUE)   0
177*c87b03e5Sespie #define SET_REAL_VALUE_ATOF(TARGET,SOURCE)
178*c87b03e5Sespie #define FLOAT_TYPE_NODE 0
179*c87b03e5Sespie #define DOUBLE_TYPE_NODE 0
180*c87b03e5Sespie #define SET_MODIFIER_CTX(TOKEN) java_lval->value = (TOKEN)
181*c87b03e5Sespie #define GET_TYPE_PRECISION(NODE) 4
182*c87b03e5Sespie #define BUILD_OPERATOR(TOKEN)	return TOKEN
183*c87b03e5Sespie #define BUILD_OPERATOR2(TOKEN)	return ASSIGN_ANY_TK
184*c87b03e5Sespie #define SET_LVAL_NODE(NODE)
185*c87b03e5Sespie #define SET_LVAL_NODE_TYPE(NODE, TYPE)
186*c87b03e5Sespie #define BUILD_ID_WFL(EXP) (EXP)
187*c87b03e5Sespie #define JAVA_FLOAT_RANGE_ERROR(S) {}
188*c87b03e5Sespie #define JAVA_INTEGRAL_RANGE_ERROR(S) do { } while (0)
189*c87b03e5Sespie 
190*c87b03e5Sespie #else
191*c87b03e5Sespie 
192*c87b03e5Sespie #define DCONST0 dconst0
193*c87b03e5Sespie #define GET_IDENTIFIER(S) get_identifier ((S))
194*c87b03e5Sespie #define SET_REAL_VALUE_ATOF(TARGET,SOURCE) (TARGET) = (SOURCE)
195*c87b03e5Sespie #define FLOAT_TYPE_NODE float_type_node
196*c87b03e5Sespie #define DOUBLE_TYPE_NODE double_type_node
197*c87b03e5Sespie /* Set modifier_ctx according to TOKEN */
198*c87b03e5Sespie #define SET_MODIFIER_CTX(TOKEN)						   \
199*c87b03e5Sespie   {									   \
200*c87b03e5Sespie     ctxp->modifier_ctx [(TOKEN)-PUBLIC_TK] = build_wfl_node (NULL_TREE); \
201*c87b03e5Sespie     java_lval->value = (TOKEN)-PUBLIC_TK;				   \
202*c87b03e5Sespie   }
203*c87b03e5Sespie /* Type precision for long */
204*c87b03e5Sespie #define GET_TYPE_PRECISION(NODE) TYPE_PRECISION (long_type_node) / 8;
205*c87b03e5Sespie /* Build an operator tree node and return TOKEN */
206*c87b03e5Sespie #define BUILD_OPERATOR(TOKEN)				\
207*c87b03e5Sespie   {							\
208*c87b03e5Sespie     java_lval->operator.token = (TOKEN);		\
209*c87b03e5Sespie     java_lval->operator.location = BUILD_LOCATION();	\
210*c87b03e5Sespie     return (TOKEN);					\
211*c87b03e5Sespie   }
212*c87b03e5Sespie 
213*c87b03e5Sespie /* Build an operator tree node but return ASSIGN_ANY_TK */
214*c87b03e5Sespie #define BUILD_OPERATOR2(TOKEN)				\
215*c87b03e5Sespie   {							\
216*c87b03e5Sespie     java_lval->operator.token = (TOKEN);		\
217*c87b03e5Sespie     java_lval->operator.location = BUILD_LOCATION();	\
218*c87b03e5Sespie     return ASSIGN_ANY_TK;				\
219*c87b03e5Sespie   }
220*c87b03e5Sespie /* Set java_lval->node and TREE_TYPE(java_lval->node) in macros */
221*c87b03e5Sespie #define SET_LVAL_NODE(NODE) java_lval->node = (NODE)
222*c87b03e5Sespie #define SET_LVAL_NODE_TYPE(NODE,TYPE)		\
223*c87b03e5Sespie   {						\
224*c87b03e5Sespie     java_lval->node = (NODE);			\
225*c87b03e5Sespie     TREE_TYPE (java_lval->node) = (TYPE);	\
226*c87b03e5Sespie   }
227*c87b03e5Sespie /* Wrap identifier around a wfl */
228*c87b03e5Sespie #define BUILD_ID_WFL(EXP) build_wfl_node ((EXP))
229*c87b03e5Sespie /* Special ways to report error on numeric literals  */
230*c87b03e5Sespie #define JAVA_FLOAT_RANGE_ERROR(m)					  \
231*c87b03e5Sespie   {									  \
232*c87b03e5Sespie     char msg [1024];							  \
233*c87b03e5Sespie     int i = ctxp->c_line->current;					  \
234*c87b03e5Sespie     ctxp->c_line->current = number_beginning;				  \
235*c87b03e5Sespie     sprintf (msg, "Floating point literal exceeds range of `%s'", (m)); \
236*c87b03e5Sespie     java_lex_error (msg, 0);						  \
237*c87b03e5Sespie     ctxp->c_line->current = i;						  \
238*c87b03e5Sespie   }
239*c87b03e5Sespie #define JAVA_INTEGRAL_RANGE_ERROR(m)		\
240*c87b03e5Sespie   do {						\
241*c87b03e5Sespie     int i = ctxp->c_line->current;		\
242*c87b03e5Sespie     ctxp->c_line->current = number_beginning;	\
243*c87b03e5Sespie     java_lex_error (m, 0);			\
244*c87b03e5Sespie     ctxp->c_line->current = i;			\
245*c87b03e5Sespie   } while (0)
246*c87b03e5Sespie 
247*c87b03e5Sespie #endif /* Definitions for jc1 compilation only */
248*c87b03e5Sespie 
249*c87b03e5Sespie /* Macros to decode character ranges */
250*c87b03e5Sespie #define RANGE(c, l, h)           (((c) >= l && (c) <= h))
251*c87b03e5Sespie #define JAVA_WHITE_SPACE_P(c) (c == ' ' || c == '\t' || c == '\f')
252*c87b03e5Sespie #define JAVA_START_CHAR_P(c) ((c < 128					      \
253*c87b03e5Sespie 			       && (ISIDST (c) || c == '$'))		      \
254*c87b03e5Sespie                               || (c >= 128 && java_start_char_p (c)))
255*c87b03e5Sespie #define JAVA_PART_CHAR_P(c) ((c < 128					      \
256*c87b03e5Sespie 			       && (ISIDNUM (c)				      \
257*c87b03e5Sespie 				   || c == '$'				      \
258*c87b03e5Sespie 				   || c == 0x0000			      \
259*c87b03e5Sespie 				   || RANGE (c, 0x01, 0x08)		      \
260*c87b03e5Sespie 				   || RANGE (c, 0x0e, 0x1b)		      \
261*c87b03e5Sespie 				   || c == 0x7f))			      \
262*c87b03e5Sespie                               || (c >= 128 && java_part_char_p (c)))
263*c87b03e5Sespie #define JAVA_ASCII_DIGIT(c)    ISDIGIT (c)
264*c87b03e5Sespie #define JAVA_ASCII_OCTDIGIT(c) RANGE (c, '0', '7')
265*c87b03e5Sespie #define JAVA_ASCII_HEXDIGIT(c) ISXDIGIT (c)
266*c87b03e5Sespie #define JAVA_ASCII_FPCHAR(c)   (RANGE (c, 'd', 'f') || RANGE (c, 'D', 'F') || \
267*c87b03e5Sespie 				c == '.' || JAVA_ASCII_DIGIT (c))
268*c87b03e5Sespie #define JAVA_FP_SUFFIX(c)      (c == 'D' || c == 'd' || c == 'f' || c == 'F')
269*c87b03e5Sespie #define JAVA_FP_EXP(c)         (c == 'E' || c == 'F')
270*c87b03e5Sespie #define JAVA_FP_PM(c)          (c == '-' || c == '+')
271*c87b03e5Sespie #define JAVA_ASCII_LETTER(c)   ISALPHA (c)
272*c87b03e5Sespie 
273*c87b03e5Sespie /* Constants  */
274*c87b03e5Sespie #define JAVA_READ_BUFFER 256
275*c87b03e5Sespie #define JAVA_CHAR_ERROR -2
276*c87b03e5Sespie #define UEOF -1
277*c87b03e5Sespie 
278*c87b03e5Sespie #endif /* ! GCC_JAVA_LEX_H */
279