1*c87b03e5Sespie /* Language lexer definitions for the GNU compiler for the Java(TM) language. 2*c87b03e5Sespie Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. 3*c87b03e5Sespie Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com) 4*c87b03e5Sespie 5*c87b03e5Sespie This file is part of GNU CC. 6*c87b03e5Sespie 7*c87b03e5Sespie GNU CC is free software; you can redistribute it and/or modify 8*c87b03e5Sespie it under the terms of the GNU General Public License as published by 9*c87b03e5Sespie the Free Software Foundation; either version 2, or (at your option) 10*c87b03e5Sespie any later version. 11*c87b03e5Sespie 12*c87b03e5Sespie GNU CC is distributed in the hope that it will be useful, 13*c87b03e5Sespie but WITHOUT ANY WARRANTY; without even the implied warranty of 14*c87b03e5Sespie MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15*c87b03e5Sespie GNU General Public License for more details. 16*c87b03e5Sespie 17*c87b03e5Sespie You should have received a copy of the GNU General Public License 18*c87b03e5Sespie along with GNU CC; see the file COPYING. If not, write to 19*c87b03e5Sespie the Free Software Foundation, 59 Temple Place - Suite 330, 20*c87b03e5Sespie Boston, MA 02111-1307, USA. 21*c87b03e5Sespie 22*c87b03e5Sespie Java and all Java-based marks are trademarks or registered trademarks 23*c87b03e5Sespie of Sun Microsystems, Inc. in the United States and other countries. 24*c87b03e5Sespie The Free Software Foundation is independent of Sun Microsystems, Inc. */ 25*c87b03e5Sespie 26*c87b03e5Sespie #ifndef GCC_JAVA_LEX_H 27*c87b03e5Sespie #define GCC_JAVA_LEX_H 28*c87b03e5Sespie 29*c87b03e5Sespie /* Extern global variables declarations */ 30*c87b03e5Sespie extern FILE *finput; 31*c87b03e5Sespie extern int lineno; 32*c87b03e5Sespie 33*c87b03e5Sespie /* A Unicode character, as read from the input file */ 34*c87b03e5Sespie typedef unsigned short unicode_t; 35*c87b03e5Sespie 36*c87b03e5Sespie #ifdef HAVE_ICONV 37*c87b03e5Sespie #include <iconv.h> 38*c87b03e5Sespie #endif /* HAVE_ICONV */ 39*c87b03e5Sespie 40*c87b03e5Sespie /* Default encoding to use if no encoding is specified. */ 41*c87b03e5Sespie #define DEFAULT_ENCODING "UTF-8" 42*c87b03e5Sespie 43*c87b03e5Sespie /* Debug macro to print-out what we match */ 44*c87b03e5Sespie #ifdef JAVA_LEX_DEBUG 45*c87b03e5Sespie #ifdef JAVA_LEX_DEBUG_CHAR 46*c87b03e5Sespie #define JAVA_LEX_CHAR(c) printf ("java_lex:%d: char '%c'.%d\n", \ 47*c87b03e5Sespie lineno, (c < 128 ? c : '.'), c); 48*c87b03e5Sespie #else 49*c87b03e5Sespie #define JAVA_LEX_CHAR(c) 50*c87b03e5Sespie #endif 51*c87b03e5Sespie #define JAVA_LEX_KW(c) printf ("java_lex:%d: keyword: '%s'\n", lineno,c) 52*c87b03e5Sespie #define JAVA_LEX_ID(s) printf ("java_lex:%d: ID: '%s'\n", \ 53*c87b03e5Sespie lineno, \ 54*c87b03e5Sespie (all_ascii ? s : "<U>")) 55*c87b03e5Sespie #define JAVA_LEX_LIT(s, r) printf ("java_lex:%d: literal '%s'_%d\n", \ 56*c87b03e5Sespie lineno, s, r) 57*c87b03e5Sespie #define JAVA_LEX_CHAR_LIT(s) printf ("java_lex:%d: literal '%d'\n", lineno, s) 58*c87b03e5Sespie #define JAVA_LEX_STR_LIT(s) { \ 59*c87b03e5Sespie int i; \ 60*c87b03e5Sespie printf ("java_lex:%d: literal '%s'\n", \ 61*c87b03e5Sespie lineno, s); \ 62*c87b03e5Sespie } 63*c87b03e5Sespie #define JAVA_LEX_SEP(c) printf ("java_lex:%d: separator '%c'\n",lineno,c) 64*c87b03e5Sespie #define JAVA_LEX_OP(c) printf ("java_lex:%d: operator '%s'\n", lineno,c) 65*c87b03e5Sespie #else 66*c87b03e5Sespie #define JAVA_LEX_CHAR(c) 67*c87b03e5Sespie #define JAVA_LEX_KW(c) 68*c87b03e5Sespie #define JAVA_LEX_ID(s) 69*c87b03e5Sespie #define JAVA_LEX_LIT(s,r) 70*c87b03e5Sespie #define JAVA_LEX_CHAR_LIT(s) 71*c87b03e5Sespie #define JAVA_LEX_STR_LIT(s) 72*c87b03e5Sespie #define JAVA_LEX_SEP(c) 73*c87b03e5Sespie #define JAVA_LEX_OP(s) 74*c87b03e5Sespie #endif 75*c87b03e5Sespie 76*c87b03e5Sespie /* Line information containers */ 77*c87b03e5Sespie struct java_line { 78*c87b03e5Sespie unicode_t *line; /* The line's unicode */ 79*c87b03e5Sespie char *unicode_escape_p; /* The matching char was a unicode escape */ 80*c87b03e5Sespie unicode_t ahead[1]; /* Character ahead */ 81*c87b03e5Sespie char unicode_escape_ahead_p; /* Character ahead is a unicode escape */ 82*c87b03e5Sespie int max; /* buffer's max size */ 83*c87b03e5Sespie int size; /* number of unicodes */ 84*c87b03e5Sespie int current; /* Current position, unicode based */ 85*c87b03e5Sespie int char_col; /* Current position, input char based */ 86*c87b03e5Sespie int lineno; /* Its line number */ 87*c87b03e5Sespie int white_space_only; /* If it contains only white spaces */ 88*c87b03e5Sespie }; 89*c87b03e5Sespie #define JAVA_COLUMN_DELTA(p) \ 90*c87b03e5Sespie (ctxp->c_line->unicode_escape_p [ctxp->c_line->current+(p)] ? 6 : \ 91*c87b03e5Sespie (ctxp->c_line->line [ctxp->c_line->current+(p)] == '\t' ? 8 : 1)) 92*c87b03e5Sespie 93*c87b03e5Sespie struct java_error { 94*c87b03e5Sespie struct java_line *line; 95*c87b03e5Sespie int error; 96*c87b03e5Sespie }; 97*c87b03e5Sespie 98*c87b03e5Sespie typedef struct _java_lc { 99*c87b03e5Sespie int line; 100*c87b03e5Sespie int prev_col; 101*c87b03e5Sespie int col; 102*c87b03e5Sespie } java_lc; 103*c87b03e5Sespie 104*c87b03e5Sespie typedef struct java_lexer 105*c87b03e5Sespie { 106*c87b03e5Sespie /* The file from which we're reading. */ 107*c87b03e5Sespie FILE *finput; 108*c87b03e5Sespie 109*c87b03e5Sespie /* Number of consecutive backslashes we've read. */ 110*c87b03e5Sespie int bs_count; 111*c87b03e5Sespie 112*c87b03e5Sespie /* If nonzero, a value that was pushed back. */ 113*c87b03e5Sespie unicode_t unget_value; 114*c87b03e5Sespie 115*c87b03e5Sespie /* If nonzero, we've hit EOF. Used only by java_get_unicode(). */ 116*c87b03e5Sespie int hit_eof : 1; 117*c87b03e5Sespie 118*c87b03e5Sespie #ifdef HAVE_ICONV 119*c87b03e5Sespie /* Nonzero if we've read any bytes. We only recognize the 120*c87b03e5Sespie byte-order-marker (BOM) as the first word. */ 121*c87b03e5Sespie int read_anything : 1; 122*c87b03e5Sespie 123*c87b03e5Sespie /* Nonzero if we have to byte swap. */ 124*c87b03e5Sespie int byte_swap : 1; 125*c87b03e5Sespie 126*c87b03e5Sespie /* Nonzero if we're using the fallback decoder. */ 127*c87b03e5Sespie int use_fallback : 1; 128*c87b03e5Sespie 129*c87b03e5Sespie /* The handle for the iconv converter we're using. */ 130*c87b03e5Sespie iconv_t handle; 131*c87b03e5Sespie 132*c87b03e5Sespie /* Bytes we've read from the file but have not sent to iconv. */ 133*c87b03e5Sespie char buffer[1024]; 134*c87b03e5Sespie 135*c87b03e5Sespie /* Index of first valid character in buffer, -1 if no valid 136*c87b03e5Sespie characters. */ 137*c87b03e5Sespie int first; 138*c87b03e5Sespie 139*c87b03e5Sespie /* Index of last valid character in buffer, plus one. -1 if no 140*c87b03e5Sespie valid characters in buffer. */ 141*c87b03e5Sespie int last; 142*c87b03e5Sespie 143*c87b03e5Sespie /* This is a buffer of characters already converted by iconv. We 144*c87b03e5Sespie use `char' here because we're assuming that iconv() converts to 145*c87b03e5Sespie UCS-2, and then we convert it ourselves. */ 146*c87b03e5Sespie unsigned char out_buffer[1024]; 147*c87b03e5Sespie 148*c87b03e5Sespie /* Index of first valid output character. -1 if no valid 149*c87b03e5Sespie characters. */ 150*c87b03e5Sespie int out_first; 151*c87b03e5Sespie 152*c87b03e5Sespie /* Index of last valid output character, plus one. -1 if no valid 153*c87b03e5Sespie characters. */ 154*c87b03e5Sespie int out_last; 155*c87b03e5Sespie 156*c87b03e5Sespie #endif /* HAVE_ICONV */ 157*c87b03e5Sespie } java_lexer; 158*c87b03e5Sespie 159*c87b03e5Sespie /* Destroy a lexer object. */ 160*c87b03e5Sespie extern void java_destroy_lexer PARAMS ((java_lexer *)); 161*c87b03e5Sespie 162*c87b03e5Sespie #define JAVA_LINE_MAX 80 163*c87b03e5Sespie 164*c87b03e5Sespie /* Build a location compound integer */ 165*c87b03e5Sespie #define BUILD_LOCATION() ((ctxp->elc.line << 12) | (ctxp->elc.col & 0xfff)) 166*c87b03e5Sespie 167*c87b03e5Sespie /* Those macros are defined differently if we compile jc1-lite 168*c87b03e5Sespie (JC1_LITE defined) or jc1. */ 169*c87b03e5Sespie #ifdef JC1_LITE 170*c87b03e5Sespie 171*c87b03e5Sespie #define DCONST0 0 172*c87b03e5Sespie #define REAL_VALUE_TYPE int 173*c87b03e5Sespie #define GET_IDENTIFIER(S) xstrdup ((S)) 174*c87b03e5Sespie #define REAL_VALUE_ATOF(LIT,MODE) 0 175*c87b03e5Sespie #define REAL_VALUE_ISINF(VALUE) 0 176*c87b03e5Sespie #define REAL_VALUE_ISNAN(VALUE) 0 177*c87b03e5Sespie #define SET_REAL_VALUE_ATOF(TARGET,SOURCE) 178*c87b03e5Sespie #define FLOAT_TYPE_NODE 0 179*c87b03e5Sespie #define DOUBLE_TYPE_NODE 0 180*c87b03e5Sespie #define SET_MODIFIER_CTX(TOKEN) java_lval->value = (TOKEN) 181*c87b03e5Sespie #define GET_TYPE_PRECISION(NODE) 4 182*c87b03e5Sespie #define BUILD_OPERATOR(TOKEN) return TOKEN 183*c87b03e5Sespie #define BUILD_OPERATOR2(TOKEN) return ASSIGN_ANY_TK 184*c87b03e5Sespie #define SET_LVAL_NODE(NODE) 185*c87b03e5Sespie #define SET_LVAL_NODE_TYPE(NODE, TYPE) 186*c87b03e5Sespie #define BUILD_ID_WFL(EXP) (EXP) 187*c87b03e5Sespie #define JAVA_FLOAT_RANGE_ERROR(S) {} 188*c87b03e5Sespie #define JAVA_INTEGRAL_RANGE_ERROR(S) do { } while (0) 189*c87b03e5Sespie 190*c87b03e5Sespie #else 191*c87b03e5Sespie 192*c87b03e5Sespie #define DCONST0 dconst0 193*c87b03e5Sespie #define GET_IDENTIFIER(S) get_identifier ((S)) 194*c87b03e5Sespie #define SET_REAL_VALUE_ATOF(TARGET,SOURCE) (TARGET) = (SOURCE) 195*c87b03e5Sespie #define FLOAT_TYPE_NODE float_type_node 196*c87b03e5Sespie #define DOUBLE_TYPE_NODE double_type_node 197*c87b03e5Sespie /* Set modifier_ctx according to TOKEN */ 198*c87b03e5Sespie #define SET_MODIFIER_CTX(TOKEN) \ 199*c87b03e5Sespie { \ 200*c87b03e5Sespie ctxp->modifier_ctx [(TOKEN)-PUBLIC_TK] = build_wfl_node (NULL_TREE); \ 201*c87b03e5Sespie java_lval->value = (TOKEN)-PUBLIC_TK; \ 202*c87b03e5Sespie } 203*c87b03e5Sespie /* Type precision for long */ 204*c87b03e5Sespie #define GET_TYPE_PRECISION(NODE) TYPE_PRECISION (long_type_node) / 8; 205*c87b03e5Sespie /* Build an operator tree node and return TOKEN */ 206*c87b03e5Sespie #define BUILD_OPERATOR(TOKEN) \ 207*c87b03e5Sespie { \ 208*c87b03e5Sespie java_lval->operator.token = (TOKEN); \ 209*c87b03e5Sespie java_lval->operator.location = BUILD_LOCATION(); \ 210*c87b03e5Sespie return (TOKEN); \ 211*c87b03e5Sespie } 212*c87b03e5Sespie 213*c87b03e5Sespie /* Build an operator tree node but return ASSIGN_ANY_TK */ 214*c87b03e5Sespie #define BUILD_OPERATOR2(TOKEN) \ 215*c87b03e5Sespie { \ 216*c87b03e5Sespie java_lval->operator.token = (TOKEN); \ 217*c87b03e5Sespie java_lval->operator.location = BUILD_LOCATION(); \ 218*c87b03e5Sespie return ASSIGN_ANY_TK; \ 219*c87b03e5Sespie } 220*c87b03e5Sespie /* Set java_lval->node and TREE_TYPE(java_lval->node) in macros */ 221*c87b03e5Sespie #define SET_LVAL_NODE(NODE) java_lval->node = (NODE) 222*c87b03e5Sespie #define SET_LVAL_NODE_TYPE(NODE,TYPE) \ 223*c87b03e5Sespie { \ 224*c87b03e5Sespie java_lval->node = (NODE); \ 225*c87b03e5Sespie TREE_TYPE (java_lval->node) = (TYPE); \ 226*c87b03e5Sespie } 227*c87b03e5Sespie /* Wrap identifier around a wfl */ 228*c87b03e5Sespie #define BUILD_ID_WFL(EXP) build_wfl_node ((EXP)) 229*c87b03e5Sespie /* Special ways to report error on numeric literals */ 230*c87b03e5Sespie #define JAVA_FLOAT_RANGE_ERROR(m) \ 231*c87b03e5Sespie { \ 232*c87b03e5Sespie char msg [1024]; \ 233*c87b03e5Sespie int i = ctxp->c_line->current; \ 234*c87b03e5Sespie ctxp->c_line->current = number_beginning; \ 235*c87b03e5Sespie sprintf (msg, "Floating point literal exceeds range of `%s'", (m)); \ 236*c87b03e5Sespie java_lex_error (msg, 0); \ 237*c87b03e5Sespie ctxp->c_line->current = i; \ 238*c87b03e5Sespie } 239*c87b03e5Sespie #define JAVA_INTEGRAL_RANGE_ERROR(m) \ 240*c87b03e5Sespie do { \ 241*c87b03e5Sespie int i = ctxp->c_line->current; \ 242*c87b03e5Sespie ctxp->c_line->current = number_beginning; \ 243*c87b03e5Sespie java_lex_error (m, 0); \ 244*c87b03e5Sespie ctxp->c_line->current = i; \ 245*c87b03e5Sespie } while (0) 246*c87b03e5Sespie 247*c87b03e5Sespie #endif /* Definitions for jc1 compilation only */ 248*c87b03e5Sespie 249*c87b03e5Sespie /* Macros to decode character ranges */ 250*c87b03e5Sespie #define RANGE(c, l, h) (((c) >= l && (c) <= h)) 251*c87b03e5Sespie #define JAVA_WHITE_SPACE_P(c) (c == ' ' || c == '\t' || c == '\f') 252*c87b03e5Sespie #define JAVA_START_CHAR_P(c) ((c < 128 \ 253*c87b03e5Sespie && (ISIDST (c) || c == '$')) \ 254*c87b03e5Sespie || (c >= 128 && java_start_char_p (c))) 255*c87b03e5Sespie #define JAVA_PART_CHAR_P(c) ((c < 128 \ 256*c87b03e5Sespie && (ISIDNUM (c) \ 257*c87b03e5Sespie || c == '$' \ 258*c87b03e5Sespie || c == 0x0000 \ 259*c87b03e5Sespie || RANGE (c, 0x01, 0x08) \ 260*c87b03e5Sespie || RANGE (c, 0x0e, 0x1b) \ 261*c87b03e5Sespie || c == 0x7f)) \ 262*c87b03e5Sespie || (c >= 128 && java_part_char_p (c))) 263*c87b03e5Sespie #define JAVA_ASCII_DIGIT(c) ISDIGIT (c) 264*c87b03e5Sespie #define JAVA_ASCII_OCTDIGIT(c) RANGE (c, '0', '7') 265*c87b03e5Sespie #define JAVA_ASCII_HEXDIGIT(c) ISXDIGIT (c) 266*c87b03e5Sespie #define JAVA_ASCII_FPCHAR(c) (RANGE (c, 'd', 'f') || RANGE (c, 'D', 'F') || \ 267*c87b03e5Sespie c == '.' || JAVA_ASCII_DIGIT (c)) 268*c87b03e5Sespie #define JAVA_FP_SUFFIX(c) (c == 'D' || c == 'd' || c == 'f' || c == 'F') 269*c87b03e5Sespie #define JAVA_FP_EXP(c) (c == 'E' || c == 'F') 270*c87b03e5Sespie #define JAVA_FP_PM(c) (c == '-' || c == '+') 271*c87b03e5Sespie #define JAVA_ASCII_LETTER(c) ISALPHA (c) 272*c87b03e5Sespie 273*c87b03e5Sespie /* Constants */ 274*c87b03e5Sespie #define JAVA_READ_BUFFER 256 275*c87b03e5Sespie #define JAVA_CHAR_ERROR -2 276*c87b03e5Sespie #define UEOF -1 277*c87b03e5Sespie 278*c87b03e5Sespie #endif /* ! GCC_JAVA_LEX_H */ 279