/******************************** -*- C -*- **************************** * * Lexer Module. * * ***********************************************************************/ /*********************************************************************** * * Copyright 1988,89,90,91,92,94,95,99,2000,2001,2002,2003, * 2004,2005,2006,2007,2008,2009 Free Software Foundation, Inc. * Written by Steve Byrne. * * This file is part of GNU Smalltalk. * * GNU Smalltalk is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2, or (at your option) any later * version. * * Linking GNU Smalltalk statically or dynamically with other modules is * making a combined work based on GNU Smalltalk. Thus, the terms and * conditions of the GNU General Public License cover the whole * combination. * * In addition, as a special exception, the Free Software Foundation * give you permission to combine GNU Smalltalk with free software * programs or libraries that are released under the GNU LGPL and with * independent programs running under the GNU Smalltalk virtual machine. * * You may copy and distribute such a system following the terms of the * GNU GPL for GNU Smalltalk and the licenses of the other code * concerned, provided that you include the source code of that other * code when and as the GNU GPL requires distribution of source code. * * Note that people who make modified versions of GNU Smalltalk are not * obligated to grant this special exception for their modified * versions; it is their choice whether to do so. The GNU General * Public License gives permission to release a modified version without * this exception; this exception also makes it possible to release a * modified version which carries forward this exception. * * GNU Smalltalk is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * GNU Smalltalk; see the file COPYING. If not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * ***********************************************************************/ #include "gstpriv.h" #if defined(__FreeBSD__) #include #endif /* Define this if you want the lexer to print all the tokens that it scans, * before passing them to the parser. */ /* #define LEXDEBUG */ /* Define this if you're debugging the lexer and you don't want the parser * to be ran -- only lexical scanning will take place. */ /* #define NO_PARSE */ #define WHITE_SPACE 1 #define DIGIT 2 #define ID_CHAR 4 #define BIN_OP_CHAR 8 #define SYMBOL_CHAR 16 /* The obstack containing parse tree nodes. */ struct obstack *_gst_compilation_obstack = NULL; /* True if errors must be reported to the standard error, false if errors should instead stored so that they are passed to Smalltalk code. */ mst_Boolean _gst_report_errors = true; /* This is set to true by the parser or the compiler if an error (respectively, a parse error or a semantic error) is found, and avoids that _gst_execute_statements tries to execute the result of the compilation. */ mst_Boolean _gst_had_error = false; /* This is set to true by the parser if error recovery is going on. In this case ERROR_RECOVERY tokens are generated. */ mst_Boolean _gst_error_recovery = false; /* The location of the first error reported, stored here so that compilation primitives can pass them to Smalltalk code. */ char *_gst_first_error_str = NULL; char *_gst_first_error_file = NULL; int _gst_first_error_line = 0; /* Last returned token. */ static int last_token; /* Balance of parentheses. Used to turn a newline into a period. */ static int parenthesis_depth; /* Answer true if IC is a valid base-10 digit. */ static mst_Boolean is_digit (int ic); /* Answer true if C is a valid base-BASE digit. */ static mst_Boolean is_base_digit (int c, int base); /* Parse the fractional part of a Float constant. Store it in NUMPTR. Read numbers in base-BASE, the first one being C. Answer the scale (number of digits in numPtr). If LARGEINTEGER is not NULL, the digits are stored in an obstack, and LARGEINTEGER is set to true if numPtr does not have sufficient precision. */ static int scan_fraction (int c, mst_Boolean negative, unsigned base, uintptr_t *intNumPtr, struct real *numPtr, mst_Boolean *largeInteger); /* Parse a numeric constant and return it. Read numbers in base-BASE, the first one being C. If a - was parsed, NEGATIVE must be true so that the sign of the result is changed accordingly. If LARGEINTEGER is not NULL, the digits are stored in an obstack, and LARGEINTEGER is set to true if the return value does not have sufficient precision. */ static uintptr_t scan_digits (int c, mst_Boolean negative, unsigned base, struct real * n, mst_Boolean * largeInteger); /* Parse the large integer constant stored as base-BASE digits in the buffer maintained by str.c, adjusting the sign if NEGATIVE is true. Return an embryo of the LargeInteger object as a byte_object structure. */ static byte_object scan_large_integer (mst_Boolean negative, int base); /* Raise an error. */ static int invalid (int c, YYSTYPE * lvalp); /* Parse a comment. C is '"'. Return 0 to indicate the lexer that this lexeme must be ignored. */ static int comment (int c, YYSTYPE * lvalp); /* Parse a character literal. C is '$' */ static int char_literal (int c, YYSTYPE * lvalp); /* Remember the current balance of open/close parentheses, used to treat newlines as periods. */ static int scan_open_paren (int c, YYSTYPE * lvalp); /* Remember the current balance of open/close parentheses, used to treat newlines as periods. */ static int scan_close_paren (int c, YYSTYPE * lvalp); /* Remember the current balance of open/close parentheses, used to treat newlines as periods. */ static int scan_reset_paren (int c, YYSTYPE * lvalp); /* If the current balance of open/close parentheses is zero, and the last token was not a period or bang, treat the newline as a period. */ static int scan_newline (int c, YYSTYPE * lvalp); /* Parse a binary operator. C is the first symbol in the selector */ static int scan_bin_op (int c, YYSTYPE * lvalp); /* Actual work for scan_bin_op is done here. MAYBE_NUMBER is false if we cannot parse a negative number in this context. */ static int scan_bin_op_1 (int c, YYSTYPE * lvalp, mst_Boolean maybe_number); /* Parse a string literal. C is '\'' */ static int string_literal (int c, YYSTYPE * lvalp); /* Parse a number. C is the first digit. */ static int scan_number (int c, YYSTYPE * lvalp); /* Parse an identifier. C is the first letter. */ static int scan_ident (int c, YYSTYPE * lvalp); /* Try to parse an assignment operator or namespace separator. C is ':'. */ static int scan_colon (int c, YYSTYPE * lvalp); /* Try to parse a symbol constant, or return '#'. C is '#'. */ static int scan_symbol (int c, YYSTYPE * lvalp); /* Convert the digit C (if it is a valid base-BASE digit) to its value. Raise an error if it is invalid. */ static int digit_to_int (int c, int base); #ifdef LEXDEBUG static void print_token (int token, YYSTYPE *yylval); #endif typedef struct { int (*lexFunc) (int, YYSTYPE *); int retToken; int char_class; } lex_tab_elt; /* This macro is needed to properly handle 8-bit characters */ #define CHAR_TAB(x) ((x) < 128 ? char_table + (x) : char_table) static const lex_tab_elt char_table[128] = { /* 0 */ {invalid, 0, 0}, /* 1 */ {invalid, 0, 0}, /* 2 */ {invalid, 0, 0}, /* 3 */ {invalid, 0, 0}, /* 4 */ {invalid, 0, 0}, /* 5 */ {invalid, 0, 0}, /* 6 */ {invalid, 0, 0}, /* 7 */ {invalid, 0, 0}, /* 8 */ {invalid, 0, 0}, /* 9 */ {0, 0, WHITE_SPACE}, /* 10 */ {scan_newline, 0, 0}, /* 11 */ {invalid, 0, 0}, /* 12 */ {0, 0, WHITE_SPACE}, /* 13 */ {0, 0, WHITE_SPACE}, /* 14 */ {invalid, 0, 0}, /* 15 */ {invalid, 0, 0}, /* 16 */ {invalid, 0, 0}, /* 17 */ {invalid, 0, 0}, /* 18 */ {invalid, 0, 0}, /* 19 */ {invalid, 0, 0}, /* 20 */ {invalid, 0, 0}, /* 21 */ {invalid, 0, 0}, /* 22 */ {invalid, 0, 0}, /* 23 */ {invalid, 0, 0}, /* 24 */ {invalid, 0, 0}, /* 25 */ {invalid, 0, 0}, /* 26 */ {invalid, 0, 0}, /* 27 */ {invalid, 0, 0}, /* 28 */ {invalid, 0, 0}, /* 29 */ {invalid, 0, 0}, /* 30 */ {invalid, 0, 0}, /* 31 */ {invalid, 0, 0}, /* */ {0, 0, WHITE_SPACE}, /* ! */ {scan_reset_paren, 0, 0}, /* " */ {comment, 0, 0}, /* # */ {scan_symbol, 0, 0}, /* $ */ {char_literal, 0, ID_CHAR | SYMBOL_CHAR}, /* % */ {scan_bin_op, 0, BIN_OP_CHAR}, /* & */ {scan_bin_op, 0, BIN_OP_CHAR}, /* ' */ {string_literal, 0, 0}, /* ( */ {scan_open_paren, 0, 0}, /* ) */ {scan_close_paren, 0, 0}, /* * */ {scan_bin_op, 0, BIN_OP_CHAR}, /* + */ {scan_bin_op, 0, BIN_OP_CHAR}, /* , */ {scan_bin_op, 0, BIN_OP_CHAR}, /* - */ {scan_bin_op, 0, BIN_OP_CHAR}, /* . */ {0, '.', 0}, /* / */ {scan_bin_op, 0, BIN_OP_CHAR}, /* 0 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR}, /* 1 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR}, /* 2 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR}, /* 3 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR}, /* 4 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR}, /* 5 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR}, /* 6 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR}, /* 7 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR}, /* 8 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR}, /* 9 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR}, /* : */ {scan_colon, 0, SYMBOL_CHAR}, /* ; */ {0, ';', 0}, /* < */ {scan_bin_op, 0, BIN_OP_CHAR}, /* = */ {scan_bin_op, 0, BIN_OP_CHAR}, /* > */ {scan_bin_op, 0, BIN_OP_CHAR}, /* ? */ {scan_bin_op, 0, BIN_OP_CHAR}, /* @ */ {scan_bin_op, 0, BIN_OP_CHAR}, /* A */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* B */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* C */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* D */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* E */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* F */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* G */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* H */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* I */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* J */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* K */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* L */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* M */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* N */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* O */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* P */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* Q */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* R */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* S */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* T */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* U */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* V */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* W */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* X */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* Y */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* Z */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* [ */ {scan_open_paren, 0, 0}, /* \ */ {scan_bin_op, 0, BIN_OP_CHAR}, /* ] */ {scan_close_paren, 0, 0}, /* ^ */ {0, '^', 0}, /* _ */ {0, ASSIGNMENT, ID_CHAR | SYMBOL_CHAR}, /* ` */ {invalid, 0, 0}, /* a */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* b */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* c */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* d */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* e */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* f */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* g */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* h */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* i */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* j */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* k */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* l */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* m */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* n */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* o */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* p */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* q */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* r */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* s */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* t */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* u */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* v */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* w */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* x */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* y */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* z */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR}, /* { */ {scan_open_paren, 0, 0}, /* | */ {scan_bin_op, 0, BIN_OP_CHAR}, /* } */ {scan_close_paren, 0, 0}, /* ~ */ {scan_bin_op, 0, BIN_OP_CHAR}, /* ^? */ {invalid, 0, 0} }; #if defined(LEXDEBUG) static inline int yylex_internal (); int _gst_yylex (PTR lvalp, YYLTYPE *llocp) { int result; result = yylex_internal (lvalp, llocp); print_token (result, lvalp); return (result); } #define _gst_yylex yylex_internal #endif /* LEXDEBUG */ int _gst_yylex (PTR lvalp, YYLTYPE *llocp) { int ic, result; const lex_tab_elt *ct; while ((ic = _gst_next_char ()) != EOF) { ct = CHAR_TAB (ic); if ((ct->char_class & WHITE_SPACE) == 0) { *llocp = _gst_get_location (); assert (ct->lexFunc || ct->retToken); if (ct->lexFunc) result = (*ct->lexFunc) (ic, (YYSTYPE *) lvalp); else result = ct->retToken; if (result) { if (_gst_get_cur_stream_prompt ()) last_token = result; return (result); } } } *llocp = _gst_get_location (); return (EOF); } int invalid (int c, YYSTYPE * lvalp) { char cp[5]; if (c < ' ' || c == 127) { cp[0] = '^'; cp[1] = c ^ 64; /* uncontrolify */ cp[2] = '\0'; } else if (c & 128) sprintf (cp, "%#02x", c & 255); else { cp[0] = c; cp[1] = '\0'; } _gst_errorf ("Invalid character %s", cp); _gst_had_error = true; return (0); /* tell the lexer to ignore this */ } int scan_reset_paren (int c, YYSTYPE * lvalp) { if (_gst_get_cur_stream_prompt ()) parenthesis_depth = 0; return c; } int scan_open_paren (int c, YYSTYPE * lvalp) { if (_gst_get_cur_stream_prompt ()) parenthesis_depth++; return c; } int scan_close_paren (int c, YYSTYPE * lvalp) { if (_gst_get_cur_stream_prompt ()) parenthesis_depth--; return c; } int scan_newline (int c, YYSTYPE * lvalp) { if (_gst_get_cur_stream_prompt ()) { /* Newline is special-cased in the REPL. */ if (_gst_error_recovery) return ERROR_RECOVERY; if (parenthesis_depth == 0 && last_token != 0 && last_token != '.' && last_token != '!' && last_token != KEYWORD && last_token != BINOP && last_token != '|' && last_token != '<' && last_token != '>' && last_token != ';' && last_token != ASSIGNMENT && last_token != SCOPE_SEPARATOR) return ('.'); } return 0; } int comment (int c, YYSTYPE * lvalp) { int ic; do { ic = _gst_next_char (); if (ic == EOF) { _gst_errorf ("Unterminated comment, attempting recovery"); _gst_had_error = true; break; } } while (ic != c); return (0); } int char_literal (int c, YYSTYPE * lvalp) { int ic; ic = _gst_next_char (); if (ic == EOF) { _gst_errorf ("Unterminated character literal, attempting recovery"); _gst_unread_char (ic); _gst_had_error = true; return (0); } else { if (ic > 127) { _gst_errorf ("Invalid character literal, only character codes from 0 to 127 are valid"); _gst_had_error = true; } lvalp->ival = ic; return (CHAR_LITERAL); } } int scan_colon (int c, YYSTYPE * lvalp) { int ic; ic = _gst_next_char (); if (ic == '=') return (ASSIGNMENT); else if (ic == ':') return (SCOPE_SEPARATOR); else _gst_unread_char (ic); return (':'); } int scan_symbol (int c, YYSTYPE *lvalp) { int ic; ic = _gst_next_char (); if (ic == EOF) return '#'; /* Look for a shebang (#! /). */ if (ic == '!') { YYLTYPE loc = _gst_get_location (); if (loc.first_line == 1 && loc.first_column == 2) { while (((ic = _gst_next_char ()) != EOF) && ic != '\r' && ic != '\n') continue; return (SHEBANG); } } /* We can read a binary operator and return a SYMBOL_LITERAL,... */ if (CHAR_TAB (ic)->char_class & BIN_OP_CHAR) { scan_bin_op_1 (ic, lvalp, false); return SYMBOL_LITERAL; } if (ic == '\'') { string_literal (ic, lvalp); return SYMBOL_LITERAL; } /* ...else, we can absorb identifier characters and colons, but discard anything else. */ if ((CHAR_TAB (ic)->char_class & (DIGIT | SYMBOL_CHAR)) != SYMBOL_CHAR) { _gst_unread_char (ic); return '#'; } obstack_1grow (_gst_compilation_obstack, ic); while (((ic = _gst_next_char ()) != EOF) && (CHAR_TAB (ic)->char_class & SYMBOL_CHAR)) obstack_1grow (_gst_compilation_obstack, ic); _gst_unread_char (ic); obstack_1grow (_gst_compilation_obstack, '\0'); lvalp->sval = obstack_finish (_gst_compilation_obstack); return SYMBOL_LITERAL; } int scan_bin_op_1 (int c, YYSTYPE *lvalp, mst_Boolean maybe_number) { char buf[3]; int ic; buf[0] = c; ic = _gst_next_char (); if (ic != EOF && (CHAR_TAB (ic)->char_class & BIN_OP_CHAR)) { buf[1] = ic, buf[2] = 0; /* temptatively accumulate next char */ /* This may be a two-character binary operator, except if the second character is a - and is followed by a digit. */ if (ic == '-') { ic = _gst_next_char (); _gst_unread_char (ic); if (is_digit (ic)) { _gst_unread_char ('-'); buf[1] = '\0'; } } } else { _gst_unread_char (ic); buf[1] = 0; } lvalp->sval = xstrdup (buf); if ((buf[0] == '|' || buf[0] == '<' || buf[0] == '>' || buf[0] == '-') && buf[1] == '\0') return (buf[0]); else return (BINOP); } int scan_bin_op (int c, YYSTYPE *lvalp) { return scan_bin_op_1 (c, lvalp, true); } int string_literal (int c, YYSTYPE * lvalp) { int ic; for (;;) { ic = _gst_next_char (); if (ic == EOF) { _gst_errorf ("Unterminated string, attempting recovery"); _gst_had_error = true; break; } if (ic == c) { /* check for doubled delimiters */ ic = _gst_next_char (); if (ic != c) { _gst_unread_char (ic); break; } } obstack_1grow (_gst_compilation_obstack, ic); } obstack_1grow (_gst_compilation_obstack, '\0'); lvalp->sval = obstack_finish (_gst_compilation_obstack); return (STRING_LITERAL); } int scan_ident (int c, YYSTYPE * lvalp) { int ic, identType; obstack_1grow (_gst_compilation_obstack, c); identType = IDENTIFIER; while (((ic = _gst_next_char ()) != EOF) && (CHAR_TAB (ic)->char_class & ID_CHAR)) obstack_1grow (_gst_compilation_obstack, ic); /* Read a dot as '::' if followed by a letter. */ if (ic == '.') { ic = _gst_next_char (); _gst_unread_char (ic); if (ic != EOF && (CHAR_TAB (ic)->char_class & ID_CHAR)) { _gst_unread_char (':'); _gst_unread_char (':'); } else _gst_unread_char ('.'); } else if (ic == ':') { ic = _gst_next_char (); _gst_unread_char (ic); if (ic == ':' || ic == '=') /* foo:: and foo:= split before colon */ _gst_unread_char (':'); else { obstack_1grow (_gst_compilation_obstack, ':'); identType = KEYWORD; } } else _gst_unread_char (ic); obstack_1grow (_gst_compilation_obstack, '\0'); lvalp->sval = obstack_finish (_gst_compilation_obstack); return (identType); } /* TODO: We track the number in *three* formats: struct real, uintptr_t, * and just save the bytes for large integers. We should just save * the bytes and work on those. */ int scan_number (int c, YYSTYPE * lvalp) { OOP intNumOOP; int base, exponent, ic; uintptr_t intNum; struct real num, dummy; int floatExponent; mst_Boolean isNegative = false, largeInteger = false; int float_type = 0; base = 10; exponent = 0; ic = c; assert (ic != '-'); intNum = scan_digits (ic, false, 10, &num, &largeInteger); ic = _gst_next_char (); if (ic == 'r') { char *p = obstack_finish (_gst_compilation_obstack); obstack_free (_gst_compilation_obstack, p); if (intNum > 36 || largeInteger) { _gst_errorf ("Numeric base too large %d", base); _gst_had_error = true; } else base = intNum; ic = _gst_next_char (); /* Having to support things like 16r-123 is a pity :-) because we actually incorrectly accept -16r-0. */ if (ic == '-') { isNegative = true; ic = _gst_next_char (); } intNum = scan_digits (ic, isNegative, base, &num, &largeInteger); ic = _gst_next_char (); } if (ic == '.') { ic = _gst_next_char (); if (!is_base_digit (ic, base)) { /* OOPS...we gobbled the '.' by mistake...it was a statement boundary delimiter. We have an integer that we need to return, and need to push back both the . and the character that we just read. */ _gst_unread_char (ic); ic = '.'; } else { float_type = FLOATD_LITERAL; exponent = scan_fraction (ic, isNegative, base, &intNum, &num, &largeInteger); ic = _gst_next_char (); } } if (ic == 's') do { /* By default the same as the number of decimal points we used. */ floatExponent = -exponent; ic = _gst_next_char (); if (ic == EOF) ; else if (CHAR_TAB (ic)->char_class & DIGIT) { /* 123s4 format -- parse the exponent */ floatExponent = scan_digits (ic, false, 10, &dummy, NULL); } else if (CHAR_TAB (ic)->char_class & ID_CHAR) { /* 123stuvwxyz sends #stuvwxyz to 123!!! */ _gst_unread_char (ic); ic = 's'; break; } else _gst_unread_char (ic); if (largeInteger) { /* Make a LargeInteger constant and create an object out of it. */ byte_object bo = scan_large_integer (isNegative, base); gst_object result = instantiate_with (bo->class, bo->size, &intNumOOP); memcpy (result->data, bo->body, bo->size); } else intNumOOP = FROM_INT((intptr_t) (isNegative ? -intNum : intNum)); /* too much of a chore to create a Fraction, so we call-in. We lose the ability to create ScaledDecimals during the very first phases of bootstrapping, but who cares?... This is equivalent to (intNumOOP * (10 raisedToInteger: exponent) asScaledDecimal: floatExponent) */ lvalp->oval = _gst_msg_send (intNumOOP, _gst_as_scaled_decimal_radix_scale_symbol, FROM_INT (exponent), FROM_INT (base), FROM_INT ((int) floatExponent), NULL); /* incubator is set up by _gst_compile_method */ INC_ADD_OOP (lvalp->oval); MAKE_OOP_READONLY (lvalp->oval, true); return (SCALED_DECIMAL_LITERAL); } while (0); if (ic == 'e' || ic == 'd' || ic == 'q') { int exp_char = ic; switch (ic) { case 'e': float_type = FLOATE_LITERAL; break; case 'd': float_type = FLOATD_LITERAL; break; case 'q': float_type = FLOATQ_LITERAL; break; } ic = _gst_next_char (); if (ic == EOF) ; else if (ic == '-') { floatExponent = scan_digits (_gst_next_char (), true, 10, &dummy, NULL); exponent -= (int) floatExponent; } else if (CHAR_TAB (ic)->char_class & DIGIT) { floatExponent = scan_digits (ic, false, 10, &dummy, NULL); exponent += (int) floatExponent; } else if (CHAR_TAB (ic)->char_class & ID_CHAR) { /* 123def sends #def to 123!!! */ _gst_unread_char (ic); ic = exp_char; } else _gst_unread_char (ic); } else _gst_unread_char (ic); if (float_type) { char *p = obstack_finish (_gst_compilation_obstack); obstack_free (_gst_compilation_obstack, p); if (exponent) { struct real r; _gst_real_from_int (&r, base); _gst_real_powi (&r, &r, exponent < 0 ? -exponent : exponent); if (exponent < 0) _gst_real_div (&num, &num, &r); else _gst_real_mul (&num, &r); } lvalp->fval = _gst_real_get_ld (&num); if (isNegative) lvalp->fval = -lvalp->fval; return (float_type); } else if (largeInteger) { lvalp->boval = scan_large_integer (isNegative, base); return (LARGE_INTEGER_LITERAL); } else { char *p = obstack_finish (_gst_compilation_obstack); obstack_free (_gst_compilation_obstack, p); lvalp->ival = (intptr_t) (isNegative ? -intNum : intNum); return (INTEGER_LITERAL); } } uintptr_t scan_digits (int c, mst_Boolean negative, unsigned base, struct real * n, mst_Boolean * largeInteger) { uintptr_t result; mst_Boolean oneDigit = false; while (c == '_') c = _gst_next_char (); memset (n, 0, sizeof (*n)); for (result = 0.0; is_base_digit (c, base); ) { unsigned value = digit_to_int (c, base); if (largeInteger) { obstack_1grow (_gst_compilation_obstack, digit_to_int (c, base)); if (result > (negative /* We want (uintptr_t) -MIN_ST_INT, but it's the same. */ ? (uintptr_t) MIN_ST_INT - value : (uintptr_t) MAX_ST_INT - value) / base) *largeInteger = true; } _gst_real_mul_int (n, base); _gst_real_add_int (n, value); oneDigit = true; result *= base; result += value; do c = _gst_next_char (); while (c == '_'); } if (!oneDigit) { _gst_errorf ("Unexpected EOF while scanning number"); _gst_had_error = true; } _gst_unread_char (c); return (result); } int scan_fraction (int c, mst_Boolean negative, unsigned base, uintptr_t *intNumPtr, struct real *numPtr, mst_Boolean *largeInteger) { uintptr_t intNum; int scale; scale = 0; while (c == '_') c = _gst_next_char (); for (intNum = *intNumPtr; is_base_digit (c, base); ) { unsigned value = digit_to_int (c, base); if (largeInteger) { obstack_1grow (_gst_compilation_obstack, digit_to_int (c, base)); if (intNum > (negative /* We want (uintptr_t) -MIN_ST_INT, but it's the same. */ ? (uintptr_t) MIN_ST_INT - value : (uintptr_t) MAX_ST_INT - value) / base) *largeInteger = true; } _gst_real_mul_int (numPtr, base); _gst_real_add_int (numPtr, value); intNum *= base; intNum += value; scale--; do c = _gst_next_char (); while (c == '_'); } _gst_unread_char (c); *intNumPtr = intNum; return scale; } int digit_to_int (int c, int base) { if (c < '0' || (c > '9' && c < 'A') || c > 'Z') { _gst_errorf ("Invalid digit %c in number", c); _gst_had_error = true; return (0); } if (c >= 'A') c = c - 'A' + 10; else c -= '0'; if (c >= base) { _gst_errorf ("Digit '%c' too large for base %d", c, base); _gst_had_error = true; return (0); } return (c); } mst_Boolean is_base_digit (int c, int base) { if (c < '0' || (c > '9' && c < 'A') || c > 'Z') return (false); if (c >= 'A') c = c - 'A' + 10; else c -= '0'; return (c < base); } mst_Boolean is_digit (int ic) { return (ic != EOF && (CHAR_TAB (ic)->char_class & DIGIT) != 0); } byte_object scan_large_integer (mst_Boolean negative, int base) { int i; int size, digitsLeft; gst_uchar *digits, *result; byte_object bo; /* Copy the contents of the currently grown obstack on the stack. */ size = obstack_object_size (_gst_compilation_obstack); digits = (gst_uchar *) alloca (size); memcpy (digits, obstack_base (_gst_compilation_obstack), size); /* And reuse the area on the obstack for a struct byte_object. */ obstack_blank (_gst_compilation_obstack, sizeof (struct byte_object)); bo = (byte_object) obstack_finish (_gst_compilation_obstack); bo->class = negative ? _gst_large_negative_integer_class : _gst_large_positive_integer_class; result = bo->body; memset (result, 0, size); /* On each pass, multiply the previous partial result by the base, and sum each of the digits as they were retrieved by scan_digits. */ for (digitsLeft = size; digitsLeft--;) { int total, carry; total = result[0] * base + *digits++; carry = total >> 8; result[0] = (gst_uchar) total; for (i = 1; i < size; i++) { total = result[i] * base + carry; carry = total >> 8; result[i] = (gst_uchar) total; } } if (negative) { /* Do two's complement -- first invert, then increment with carry */ for (i = 0; i < size; i++) result[i] ^= 255; for (i = 0; (++result[i]) == 0; i++); /* Search where the number really ends -- discard trailing 111... bytes but remember, the most significant bit of the last digit must be 1! */ for (; size > 0 && result[size - 1] == 255; size--); if (result[size - 1] < 128) size++; } else { /* Search where the number really ends -- discard trailing 000... bytes but remember, the most significant bit of the last digit must be 0! */ for (; size > 0 && result[size - 1] == 0; size--); if (result[size - 1] > 127) size++; } /* Only now can we set the size! */ bo->size = size; return (bo); } void _gst_parse_stream (mst_Boolean method) { struct obstack thisObstack, *oldObstack; /* Allow re-entrancy by allocating a different obstack every time _gst_parse_stream is called */ oldObstack = _gst_compilation_obstack; _gst_compilation_obstack = &thisObstack; obstack_init (&thisObstack); { #ifdef NO_PARSE YYSTYPE yylval; while (_gst_yylex (&yylval)); #else /* !NO_PARSE */ _gst_had_error = false; if (method) { _gst_parse_method (); _gst_reset_compilation_category (); } else _gst_parse_chunks (); #endif /* !NO_PARSE */ } obstack_free (&thisObstack, NULL); _gst_compilation_obstack = oldObstack; } #ifdef LEXDEBUG void print_token (token, yylval) int token; YYSTYPE *yylval; { switch (token) { case 0: break; case '.': case '!': case ':': case '|': case '^': case '#': case ';': case '(': case ')': case '[': case ']': case '{': case '}': printf ("%c\n", token); break; case SCOPE_SEPARATOR: printf ("::\n"); break; case ASSIGNMENT: printf (":=\n"); break; case IDENTIFIER: printf ("IDENTIFIER: `%s'\n", yylval->sval); break; case KEYWORD: printf ("KEYWORD: `%s'\n", yylval->sval); break; case SYMBOL_LITERAL: printf ("SYMBOL_LITERAL: #'%s'\n", yylval->sval); break; case LARGE_INTEGER_LITERAL: printf ("LARGE_INTEGER_LITERAL\n"); case INTEGER_LITERAL: printf ("INTEGER_LITERAL: %ld\n", yylval->ival); break; case FLOATD_LITERAL: printf ("FLOATD_LITERAL: %g\n", (double) yylval->fval); break; case FLOATE_LITERAL: printf ("FLOATE_LITERAL: %g\n", (float) yylval->fval); break; case FLOATQ_LITERAL: printf ("FLOATQ_LITERAL: %Lg\n", yylval->fval); break; case CHAR_LITERAL: printf ("CHAR_LITERAL: %d", yylval->ival, if (yylval->ival >= 32 && yylval->ival <= 126) printf (" ($%c)", (char) yylval->ival); printf ("\n"); break; case STRING_LITERAL: printf ("STRING_LITERAL: '%s'\n", yylval->sval); break; case BINOP: printf ("BINOP: `%s'\n", yylval->sval); break; } } #endif void _gst_yyprint (FILE * file, int token, PTR lval) { YYSTYPE *yylval = (YYSTYPE *) lval; switch (token) { case IDENTIFIER: case BINOP: case KEYWORD: fprintf (file, ": `%s'", yylval->sval); break; case SYMBOL_LITERAL: fprintf (file, ": #'%s'", yylval->sval); break; case STRING_LITERAL: fprintf (file, ": '%s'", yylval->sval); break; case INTEGER_LITERAL: fprintf (file, ": %ld", yylval->ival); break; case FLOATD_LITERAL: fprintf (file, ": %g", (double) yylval->fval); break; case FLOATE_LITERAL: fprintf (file, ": %g", (float) yylval->fval); break; case FLOATQ_LITERAL: fprintf (file, ": %Lg", yylval->fval); break; case CHAR_LITERAL: fprintf (file, ": %d", yylval->ival); if (yylval->ival >= 32 && yylval->ival <= 126) fprintf (file, " ($%c)", (char) yylval->ival); fprintf (file, "\n"); break; default: break; } } mst_Boolean _gst_negate_yylval (int token, YYSTYPE *yylval) { switch (token) { case INTEGER_LITERAL: if (yylval->ival < 0) return false; yylval->ival = -yylval->ival; break; case FLOATD_LITERAL: case FLOATE_LITERAL: case FLOATQ_LITERAL: if (yylval->fval < 0) return false; yylval->fval = -yylval->fval; break; case SCALED_DECIMAL_LITERAL: { int sign; _gst_msg_sendf (&sign, "%i %o sign", yylval->oval); if (sign < 0) return false; _gst_msg_sendf (&yylval->oval, "%o %o negated", yylval->oval); INC_ADD_OOP (yylval->oval); MAKE_OOP_READONLY (yylval->oval, true); break; } case LARGE_INTEGER_LITERAL: { byte_object bo = yylval->boval; gst_uchar *digits = bo->body; int size = bo->size; int i; /* The input value must be positive. */ if (digits[size - 1] >= 128) return false; /* Do two's complement -- first invert, then increment with carry */ for (i = 0; i < size; i++) digits[i] ^= 255; for (i = 0; (++digits[i]) == 0; i++); /* Search where the number really ends -- discard trailing 111... bytes but remember, the most significant bit of the last digit must be 1! */ for (; size > 0 && digits[size - 1] == 255; size--); if (digits[size - 1] < 128) size++; assert (size <= bo->size); bo->size = size; bo->class = _gst_large_negative_integer_class; break; } default: abort (); } return true; }