1 /******************************** -*- C -*- ****************************
2 *
3 * Lexer Module.
4 *
5 *
6 ***********************************************************************/
7
8 /***********************************************************************
9 *
10 * Copyright 1988,89,90,91,92,94,95,99,2000,2001,2002,2003,
11 * 2004,2005,2006,2007,2008,2009 Free Software Foundation, Inc.
12 * Written by Steve Byrne.
13 *
14 * This file is part of GNU Smalltalk.
15 *
16 * GNU Smalltalk is free software; you can redistribute it and/or modify it
17 * under the terms of the GNU General Public License as published by the Free
18 * Software Foundation; either version 2, or (at your option) any later
19 * version.
20 *
21 * Linking GNU Smalltalk statically or dynamically with other modules is
22 * making a combined work based on GNU Smalltalk. Thus, the terms and
23 * conditions of the GNU General Public License cover the whole
24 * combination.
25 *
26 * In addition, as a special exception, the Free Software Foundation
27 * give you permission to combine GNU Smalltalk with free software
28 * programs or libraries that are released under the GNU LGPL and with
29 * independent programs running under the GNU Smalltalk virtual machine.
30 *
31 * You may copy and distribute such a system following the terms of the
32 * GNU GPL for GNU Smalltalk and the licenses of the other code
33 * concerned, provided that you include the source code of that other
34 * code when and as the GNU GPL requires distribution of source code.
35 *
36 * Note that people who make modified versions of GNU Smalltalk are not
37 * obligated to grant this special exception for their modified
38 * versions; it is their choice whether to do so. The GNU General
39 * Public License gives permission to release a modified version without
40 * this exception; this exception also makes it possible to release a
41 * modified version which carries forward this exception.
42 *
43 * GNU Smalltalk is distributed in the hope that it will be useful, but WITHOUT
44 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
45 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
46 * more details.
47 *
48 * You should have received a copy of the GNU General Public License along with
49 * GNU Smalltalk; see the file COPYING. If not, write to the Free Software
50 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
51 *
52 ***********************************************************************/
53
54
55 #include "gstpriv.h"
56
57 #if defined(__FreeBSD__)
58 #include <floatingpoint.h>
59 #endif
60
61 /* Define this if you want the lexer to print all the tokens that it scans,
62 * before passing them to the parser.
63 */
64 /* #define LEXDEBUG */
65
66 /* Define this if you're debugging the lexer and you don't want the parser
67 * to be ran -- only lexical scanning will take place.
68 */
69 /* #define NO_PARSE */
70
71
72
73 #define WHITE_SPACE 1
74 #define DIGIT 2
75 #define ID_CHAR 4
76 #define BIN_OP_CHAR 8
77 #define SYMBOL_CHAR 16
78
79 /* The obstack containing parse tree nodes. */
80 struct obstack *_gst_compilation_obstack = NULL;
81
82 /* True if errors must be reported to the standard error, false if
83 errors should instead stored so that they are passed to Smalltalk
84 code. */
85 mst_Boolean _gst_report_errors = true;
86
87 /* This is set to true by the parser or the compiler if an error
88 (respectively, a parse error or a semantic error) is found, and
89 avoids that _gst_execute_statements tries to execute the result of
90 the compilation. */
91 mst_Boolean _gst_had_error = false;
92
93 /* This is set to true by the parser if error recovery is going on.
94 In this case ERROR_RECOVERY tokens are generated. */
95 mst_Boolean _gst_error_recovery = false;
96
97 /* The location of the first error reported, stored here so that
98 compilation primitives can pass them to Smalltalk code. */
99 char *_gst_first_error_str = NULL;
100 char *_gst_first_error_file = NULL;
101 int _gst_first_error_line = 0;
102
103 /* Last returned token. */
104 static int last_token;
105
106 /* Balance of parentheses. Used to turn a newline into a period. */
107 static int parenthesis_depth;
108
109 /* Answer true if IC is a valid base-10 digit. */
110 static mst_Boolean is_digit (int ic);
111
112 /* Answer true if C is a valid base-BASE digit. */
113 static mst_Boolean is_base_digit (int c,
114 int base);
115
116 /* Parse the fractional part of a Float constant. Store it in
117 NUMPTR. Read numbers in base-BASE, the first one being C. Answer the
118 scale (number of digits in numPtr). If LARGEINTEGER is not NULL,
119 the digits are stored in an obstack, and LARGEINTEGER is set to true
120 if numPtr does not have sufficient precision. */
121 static int scan_fraction (int c,
122 mst_Boolean negative,
123 unsigned base,
124 uintptr_t *intNumPtr,
125 struct real *numPtr,
126 mst_Boolean *largeInteger);
127
128 /* Parse a numeric constant and return it. Read numbers in
129 base-BASE, the first one being C. If a - was parsed, NEGATIVE
130 must be true so that the sign of the result is changed accordingly.
131 If LARGEINTEGER is not NULL, the digits are stored in an obstack,
132 and LARGEINTEGER is set to true if the return value does not have
133 sufficient precision. */
134 static uintptr_t scan_digits (int c,
135 mst_Boolean negative,
136 unsigned base,
137 struct real * n,
138 mst_Boolean * largeInteger);
139
140 /* Parse the large integer constant stored as base-BASE
141 digits in the buffer maintained by str.c, adjusting
142 the sign if NEGATIVE is true. Return an embryo of the
143 LargeInteger object as a byte_object structure. */
144 static byte_object scan_large_integer (mst_Boolean negative,
145 int base);
146
147 /* Raise an error. */
148 static int invalid (int c,
149 YYSTYPE * lvalp);
150
151 /* Parse a comment. C is '"'. Return 0 to indicate the lexer
152 that this lexeme must be ignored. */
153 static int comment (int c,
154 YYSTYPE * lvalp);
155
156 /* Parse a character literal. C is '$' */
157 static int char_literal (int c,
158 YYSTYPE * lvalp);
159
160 /* Remember the current balance of open/close parentheses, used to treat
161 newlines as periods. */
162 static int scan_open_paren (int c,
163 YYSTYPE * lvalp);
164
165 /* Remember the current balance of open/close parentheses, used to treat
166 newlines as periods. */
167 static int scan_close_paren (int c,
168 YYSTYPE * lvalp);
169
170 /* Remember the current balance of open/close parentheses, used to treat
171 newlines as periods. */
172 static int scan_reset_paren (int c,
173 YYSTYPE * lvalp);
174
175 /* If the current balance of open/close parentheses is zero, and the
176 last token was not a period or bang, treat the newline as a period. */
177 static int scan_newline (int c,
178 YYSTYPE * lvalp);
179
180 /* Parse a binary operator. C is the first symbol in the selector */
181 static int scan_bin_op (int c,
182 YYSTYPE * lvalp);
183
184 /* Actual work for scan_bin_op is done here. MAYBE_NUMBER is false if
185 we cannot parse a negative number in this context. */
186 static int scan_bin_op_1 (int c,
187 YYSTYPE * lvalp,
188 mst_Boolean maybe_number);
189
190
191 /* Parse a string literal. C is '\'' */
192 static int string_literal (int c,
193 YYSTYPE * lvalp);
194
195 /* Parse a number. C is the first digit. */
196 static int scan_number (int c,
197 YYSTYPE * lvalp);
198
199 /* Parse an identifier. C is the first letter. */
200 static int scan_ident (int c,
201 YYSTYPE * lvalp);
202
203 /* Try to parse an assignment operator or namespace separator. C is ':'. */
204 static int scan_colon (int c,
205 YYSTYPE * lvalp);
206
207 /* Try to parse a symbol constant, or return '#'. C is '#'. */
208 static int scan_symbol (int c,
209 YYSTYPE * lvalp);
210
211 /* Convert the digit C (if it is a valid base-BASE digit) to its
212 value. Raise an error if it is invalid. */
213 static int digit_to_int (int c,
214 int base);
215
216 #ifdef LEXDEBUG
217 static void print_token (int token,
218 YYSTYPE *yylval);
219 #endif
220
221 typedef struct
222 {
223 int (*lexFunc) (int,
224 YYSTYPE *);
225 int retToken;
226 int char_class;
227 }
228 lex_tab_elt;
229
230 /* This macro is needed to properly handle 8-bit characters */
231 #define CHAR_TAB(x) ((x) < 128 ? char_table + (x) : char_table)
232
233 static const lex_tab_elt char_table[128] = {
234 /* 0 */ {invalid, 0, 0},
235 /* 1 */ {invalid, 0, 0},
236 /* 2 */ {invalid, 0, 0},
237 /* 3 */ {invalid, 0, 0},
238 /* 4 */ {invalid, 0, 0},
239 /* 5 */ {invalid, 0, 0},
240 /* 6 */ {invalid, 0, 0},
241 /* 7 */ {invalid, 0, 0},
242 /* 8 */ {invalid, 0, 0},
243 /* 9 */ {0, 0, WHITE_SPACE},
244 /* 10 */ {scan_newline, 0, 0},
245 /* 11 */ {invalid, 0, 0},
246 /* 12 */ {0, 0, WHITE_SPACE},
247 /* 13 */ {0, 0, WHITE_SPACE},
248 /* 14 */ {invalid, 0, 0},
249 /* 15 */ {invalid, 0, 0},
250 /* 16 */ {invalid, 0, 0},
251 /* 17 */ {invalid, 0, 0},
252 /* 18 */ {invalid, 0, 0},
253 /* 19 */ {invalid, 0, 0},
254 /* 20 */ {invalid, 0, 0},
255 /* 21 */ {invalid, 0, 0},
256 /* 22 */ {invalid, 0, 0},
257 /* 23 */ {invalid, 0, 0},
258 /* 24 */ {invalid, 0, 0},
259 /* 25 */ {invalid, 0, 0},
260 /* 26 */ {invalid, 0, 0},
261 /* 27 */ {invalid, 0, 0},
262 /* 28 */ {invalid, 0, 0},
263 /* 29 */ {invalid, 0, 0},
264 /* 30 */ {invalid, 0, 0},
265 /* 31 */ {invalid, 0, 0},
266 /* */ {0, 0, WHITE_SPACE},
267 /* ! */ {scan_reset_paren, 0, 0},
268 /* " */ {comment, 0, 0},
269 /* # */ {scan_symbol, 0, 0},
270 /* $ */ {char_literal, 0, ID_CHAR | SYMBOL_CHAR},
271 /* % */ {scan_bin_op, 0, BIN_OP_CHAR},
272 /* & */ {scan_bin_op, 0, BIN_OP_CHAR},
273 /* ' */ {string_literal, 0, 0},
274 /* ( */ {scan_open_paren, 0, 0},
275 /* ) */ {scan_close_paren, 0, 0},
276 /* * */ {scan_bin_op, 0, BIN_OP_CHAR},
277 /* + */ {scan_bin_op, 0, BIN_OP_CHAR},
278 /* , */ {scan_bin_op, 0, BIN_OP_CHAR},
279 /* - */ {scan_bin_op, 0, BIN_OP_CHAR},
280 /* . */ {0, '.', 0},
281 /* / */ {scan_bin_op, 0, BIN_OP_CHAR},
282 /* 0 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR},
283 /* 1 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR},
284 /* 2 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR},
285 /* 3 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR},
286 /* 4 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR},
287 /* 5 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR},
288 /* 6 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR},
289 /* 7 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR},
290 /* 8 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR},
291 /* 9 */ {scan_number, 0, DIGIT | ID_CHAR | SYMBOL_CHAR},
292 /* : */ {scan_colon, 0, SYMBOL_CHAR},
293 /* ; */ {0, ';', 0},
294 /* < */ {scan_bin_op, 0, BIN_OP_CHAR},
295 /* = */ {scan_bin_op, 0, BIN_OP_CHAR},
296 /* > */ {scan_bin_op, 0, BIN_OP_CHAR},
297 /* ? */ {scan_bin_op, 0, BIN_OP_CHAR},
298 /* @ */ {scan_bin_op, 0, BIN_OP_CHAR},
299 /* A */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
300 /* B */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
301 /* C */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
302 /* D */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
303 /* E */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
304 /* F */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
305 /* G */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
306 /* H */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
307 /* I */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
308 /* J */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
309 /* K */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
310 /* L */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
311 /* M */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
312 /* N */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
313 /* O */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
314 /* P */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
315 /* Q */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
316 /* R */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
317 /* S */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
318 /* T */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
319 /* U */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
320 /* V */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
321 /* W */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
322 /* X */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
323 /* Y */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
324 /* Z */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
325 /* [ */ {scan_open_paren, 0, 0},
326 /* \ */ {scan_bin_op, 0, BIN_OP_CHAR},
327 /* ] */ {scan_close_paren, 0, 0},
328 /* ^ */ {0, '^', 0},
329 /* _ */ {0, ASSIGNMENT, ID_CHAR | SYMBOL_CHAR},
330 /* ` */ {invalid, 0, 0},
331 /* a */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
332 /* b */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
333 /* c */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
334 /* d */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
335 /* e */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
336 /* f */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
337 /* g */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
338 /* h */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
339 /* i */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
340 /* j */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
341 /* k */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
342 /* l */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
343 /* m */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
344 /* n */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
345 /* o */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
346 /* p */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
347 /* q */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
348 /* r */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
349 /* s */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
350 /* t */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
351 /* u */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
352 /* v */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
353 /* w */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
354 /* x */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
355 /* y */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
356 /* z */ {scan_ident, 0, ID_CHAR | SYMBOL_CHAR},
357 /* { */ {scan_open_paren, 0, 0},
358 /* | */ {scan_bin_op, 0, BIN_OP_CHAR},
359 /* } */ {scan_close_paren, 0, 0},
360 /* ~ */ {scan_bin_op, 0, BIN_OP_CHAR},
361 /* ^? */ {invalid, 0, 0}
362 };
363
364
365 #if defined(LEXDEBUG)
366 static inline int yylex_internal ();
367
368 int
_gst_yylex(PTR lvalp,YYLTYPE * llocp)369 _gst_yylex (PTR lvalp, YYLTYPE *llocp)
370 {
371 int result;
372
373 result = yylex_internal (lvalp, llocp);
374 print_token (result, lvalp);
375 return (result);
376 }
377
378 #define _gst_yylex yylex_internal
379 #endif /* LEXDEBUG */
380
381 int
_gst_yylex(PTR lvalp,YYLTYPE * llocp)382 _gst_yylex (PTR lvalp, YYLTYPE *llocp)
383 {
384 int ic, result;
385 const lex_tab_elt *ct;
386
387 while ((ic = _gst_next_char ()) != EOF)
388 {
389 ct = CHAR_TAB (ic);
390 if ((ct->char_class & WHITE_SPACE) == 0)
391 {
392 *llocp = _gst_get_location ();
393 assert (ct->lexFunc || ct->retToken);
394 if (ct->lexFunc)
395 result = (*ct->lexFunc) (ic, (YYSTYPE *) lvalp);
396 else
397 result = ct->retToken;
398
399 if (result)
400 {
401 if (_gst_get_cur_stream_prompt ())
402 last_token = result;
403 return (result);
404 }
405 }
406 }
407
408 *llocp = _gst_get_location ();
409 return (EOF);
410 }
411
412
413
414
415 int
invalid(int c,YYSTYPE * lvalp)416 invalid (int c,
417 YYSTYPE * lvalp)
418 {
419 char cp[5];
420
421 if (c < ' ' || c == 127)
422 {
423 cp[0] = '^';
424 cp[1] = c ^ 64; /* uncontrolify */
425 cp[2] = '\0';
426 }
427 else if (c & 128)
428 sprintf (cp, "%#02x", c & 255);
429 else
430 {
431 cp[0] = c;
432 cp[1] = '\0';
433 }
434
435 _gst_errorf ("Invalid character %s", cp);
436 _gst_had_error = true;
437 return (0); /* tell the lexer to ignore this */
438 }
439
440
441 int
scan_reset_paren(int c,YYSTYPE * lvalp)442 scan_reset_paren (int c,
443 YYSTYPE * lvalp)
444 {
445 if (_gst_get_cur_stream_prompt ())
446 parenthesis_depth = 0;
447 return c;
448 }
449
450 int
scan_open_paren(int c,YYSTYPE * lvalp)451 scan_open_paren (int c,
452 YYSTYPE * lvalp)
453 {
454 if (_gst_get_cur_stream_prompt ())
455 parenthesis_depth++;
456 return c;
457 }
458
459 int
scan_close_paren(int c,YYSTYPE * lvalp)460 scan_close_paren (int c,
461 YYSTYPE * lvalp)
462 {
463 if (_gst_get_cur_stream_prompt ())
464 parenthesis_depth--;
465 return c;
466 }
467
468 int
scan_newline(int c,YYSTYPE * lvalp)469 scan_newline (int c,
470 YYSTYPE * lvalp)
471 {
472 if (_gst_get_cur_stream_prompt ())
473 {
474 /* Newline is special-cased in the REPL. */
475 if (_gst_error_recovery)
476 return ERROR_RECOVERY;
477
478 if (parenthesis_depth == 0
479 && last_token != 0
480 && last_token != '.' && last_token != '!' && last_token != KEYWORD
481 && last_token != BINOP && last_token != '|' && last_token != '<'
482 && last_token != '>' && last_token != ';'
483 && last_token != ASSIGNMENT && last_token != SCOPE_SEPARATOR)
484 return ('.');
485 }
486
487 return 0;
488 }
489
490
491 int
comment(int c,YYSTYPE * lvalp)492 comment (int c,
493 YYSTYPE * lvalp)
494 {
495 int ic;
496
497 do
498 {
499 ic = _gst_next_char ();
500 if (ic == EOF)
501 {
502 _gst_errorf ("Unterminated comment, attempting recovery");
503 _gst_had_error = true;
504 break;
505 }
506 }
507 while (ic != c);
508 return (0);
509 }
510
511 int
char_literal(int c,YYSTYPE * lvalp)512 char_literal (int c,
513 YYSTYPE * lvalp)
514 {
515 int ic;
516
517 ic = _gst_next_char ();
518 if (ic == EOF)
519 {
520 _gst_errorf
521 ("Unterminated character literal, attempting recovery");
522 _gst_unread_char (ic);
523 _gst_had_error = true;
524 return (0);
525 }
526 else
527 {
528 if (ic > 127)
529 {
530 _gst_errorf
531 ("Invalid character literal, only character codes from 0 to 127 are valid");
532 _gst_had_error = true;
533 }
534 lvalp->ival = ic;
535 return (CHAR_LITERAL);
536 }
537 }
538
539 int
scan_colon(int c,YYSTYPE * lvalp)540 scan_colon (int c,
541 YYSTYPE * lvalp)
542 {
543 int ic;
544
545 ic = _gst_next_char ();
546 if (ic == '=')
547 return (ASSIGNMENT);
548 else if (ic == ':')
549 return (SCOPE_SEPARATOR);
550 else
551 _gst_unread_char (ic);
552
553 return (':');
554 }
555
556
557 int
scan_symbol(int c,YYSTYPE * lvalp)558 scan_symbol (int c,
559 YYSTYPE *lvalp)
560 {
561 int ic;
562
563 ic = _gst_next_char ();
564 if (ic == EOF)
565 return '#';
566
567 /* Look for a shebang (#! /). */
568 if (ic == '!')
569 {
570 YYLTYPE loc = _gst_get_location ();
571 if (loc.first_line == 1 && loc.first_column == 2)
572 {
573 while (((ic = _gst_next_char ()) != EOF)
574 && ic != '\r' && ic != '\n')
575 continue;
576 return (SHEBANG);
577 }
578 }
579
580 /* We can read a binary operator and return a SYMBOL_LITERAL,... */
581 if (CHAR_TAB (ic)->char_class & BIN_OP_CHAR)
582 {
583 scan_bin_op_1 (ic, lvalp, false);
584 return SYMBOL_LITERAL;
585 }
586
587 if (ic == '\'')
588 {
589 string_literal (ic, lvalp);
590 return SYMBOL_LITERAL;
591 }
592
593 /* ...else, we can absorb identifier characters and colons, but
594 discard anything else. */
595 if ((CHAR_TAB (ic)->char_class & (DIGIT | SYMBOL_CHAR)) != SYMBOL_CHAR)
596 {
597 _gst_unread_char (ic);
598 return '#';
599 }
600
601 obstack_1grow (_gst_compilation_obstack, ic);
602
603 while (((ic = _gst_next_char ()) != EOF)
604 && (CHAR_TAB (ic)->char_class & SYMBOL_CHAR))
605 obstack_1grow (_gst_compilation_obstack, ic);
606
607 _gst_unread_char (ic);
608 obstack_1grow (_gst_compilation_obstack, '\0');
609 lvalp->sval = obstack_finish (_gst_compilation_obstack);
610 return SYMBOL_LITERAL;
611 }
612
613
614 int
scan_bin_op_1(int c,YYSTYPE * lvalp,mst_Boolean maybe_number)615 scan_bin_op_1 (int c,
616 YYSTYPE *lvalp,
617 mst_Boolean maybe_number)
618 {
619 char buf[3];
620 int ic;
621
622 buf[0] = c;
623
624 ic = _gst_next_char ();
625 if (ic != EOF && (CHAR_TAB (ic)->char_class & BIN_OP_CHAR))
626 {
627 buf[1] = ic, buf[2] = 0; /* temptatively accumulate next char */
628
629 /* This may be a two-character binary operator, except if
630 the second character is a - and is followed by a digit. */
631 if (ic == '-')
632 {
633 ic = _gst_next_char ();
634 _gst_unread_char (ic);
635 if (is_digit (ic))
636 {
637 _gst_unread_char ('-');
638 buf[1] = '\0';
639 }
640 }
641 }
642 else
643 {
644 _gst_unread_char (ic);
645 buf[1] = 0;
646 }
647
648 lvalp->sval = xstrdup (buf);
649
650 if ((buf[0] == '|' || buf[0] == '<' || buf[0] == '>' || buf[0] == '-')
651 && buf[1] == '\0')
652 return (buf[0]);
653
654 else
655 return (BINOP);
656 }
657
658 int
scan_bin_op(int c,YYSTYPE * lvalp)659 scan_bin_op (int c,
660 YYSTYPE *lvalp)
661 {
662 return scan_bin_op_1 (c, lvalp, true);
663 }
664
665 int
string_literal(int c,YYSTYPE * lvalp)666 string_literal (int c,
667 YYSTYPE * lvalp)
668 {
669 int ic;
670
671 for (;;)
672 {
673 ic = _gst_next_char ();
674 if (ic == EOF)
675 {
676 _gst_errorf ("Unterminated string, attempting recovery");
677 _gst_had_error = true;
678 break;
679 }
680 if (ic == c)
681 {
682 /* check for doubled delimiters */
683 ic = _gst_next_char ();
684 if (ic != c)
685 {
686 _gst_unread_char (ic);
687 break;
688 }
689 }
690 obstack_1grow (_gst_compilation_obstack, ic);
691 }
692 obstack_1grow (_gst_compilation_obstack, '\0');
693 lvalp->sval = obstack_finish (_gst_compilation_obstack);
694 return (STRING_LITERAL);
695 }
696
697 int
scan_ident(int c,YYSTYPE * lvalp)698 scan_ident (int c,
699 YYSTYPE * lvalp)
700 {
701 int ic, identType;
702
703 obstack_1grow (_gst_compilation_obstack, c);
704
705 identType = IDENTIFIER;
706
707 while (((ic = _gst_next_char ()) != EOF)
708 && (CHAR_TAB (ic)->char_class & ID_CHAR))
709 obstack_1grow (_gst_compilation_obstack, ic);
710
711 /* Read a dot as '::' if followed by a letter. */
712 if (ic == '.')
713 {
714 ic = _gst_next_char ();
715 _gst_unread_char (ic);
716 if (ic != EOF && (CHAR_TAB (ic)->char_class & ID_CHAR))
717 {
718 _gst_unread_char (':');
719 _gst_unread_char (':');
720 }
721 else
722 _gst_unread_char ('.');
723 }
724
725 else if (ic == ':')
726 {
727 ic = _gst_next_char ();
728 _gst_unread_char (ic);
729 if (ic == ':' || ic == '=') /* foo:: and foo:= split before colon */
730 _gst_unread_char (':');
731 else
732 {
733 obstack_1grow (_gst_compilation_obstack, ':');
734 identType = KEYWORD;
735 }
736 }
737
738 else
739 _gst_unread_char (ic);
740
741 obstack_1grow (_gst_compilation_obstack, '\0');
742 lvalp->sval = obstack_finish (_gst_compilation_obstack);
743 return (identType);
744 }
745
746
747 /* TODO: We track the number in *three* formats: struct real, uintptr_t,
748 * and just save the bytes for large integers. We should just save
749 * the bytes and work on those. */
750
751 int
scan_number(int c,YYSTYPE * lvalp)752 scan_number (int c,
753 YYSTYPE * lvalp)
754 {
755 OOP intNumOOP;
756 int base, exponent, ic;
757 uintptr_t intNum;
758 struct real num, dummy;
759 int floatExponent;
760 mst_Boolean isNegative = false, largeInteger = false;
761 int float_type = 0;
762
763 base = 10;
764 exponent = 0;
765 ic = c;
766
767 assert (ic != '-');
768 intNum = scan_digits (ic, false, 10, &num, &largeInteger);
769 ic = _gst_next_char ();
770 if (ic == 'r')
771 {
772 char *p = obstack_finish (_gst_compilation_obstack);
773 obstack_free (_gst_compilation_obstack, p);
774
775 if (intNum > 36 || largeInteger)
776 {
777 _gst_errorf ("Numeric base too large %d", base);
778 _gst_had_error = true;
779 }
780 else
781 base = intNum;
782 ic = _gst_next_char ();
783
784 /* Having to support things like 16r-123 is a pity :-) because we
785 actually incorrectly accept -16r-0. */
786 if (ic == '-')
787 {
788 isNegative = true;
789 ic = _gst_next_char ();
790 }
791
792 intNum = scan_digits (ic, isNegative, base, &num, &largeInteger);
793 ic = _gst_next_char ();
794 }
795
796 if (ic == '.')
797 {
798 ic = _gst_next_char ();
799 if (!is_base_digit (ic, base))
800 {
801 /* OOPS...we gobbled the '.' by mistake...it was a statement
802 boundary delimiter. We have an integer that we need to
803 return, and need to push back both the . and the character
804 that we just read. */
805 _gst_unread_char (ic);
806 ic = '.';
807 }
808 else
809 {
810 float_type = FLOATD_LITERAL;
811 exponent = scan_fraction (ic, isNegative, base, &intNum, &num, &largeInteger);
812 ic = _gst_next_char ();
813 }
814 }
815
816 if (ic == 's')
817 do
818 {
819 /* By default the same as the number of decimal points
820 we used. */
821 floatExponent = -exponent;
822
823 ic = _gst_next_char ();
824 if (ic == EOF)
825 ;
826 else if (CHAR_TAB (ic)->char_class & DIGIT)
827 {
828 /* 123s4 format -- parse the exponent */
829 floatExponent = scan_digits (ic, false, 10, &dummy, NULL);
830 }
831 else if (CHAR_TAB (ic)->char_class & ID_CHAR)
832 {
833 /* 123stuvwxyz sends #stuvwxyz to 123!!! */
834 _gst_unread_char (ic);
835 ic = 's';
836 break;
837 }
838 else
839 _gst_unread_char (ic);
840
841 if (largeInteger)
842 {
843 /* Make a LargeInteger constant and create an object out of
844 it. */
845 byte_object bo = scan_large_integer (isNegative, base);
846 gst_object result = instantiate_with (bo->class, bo->size, &intNumOOP);
847 memcpy (result->data, bo->body, bo->size);
848 }
849 else
850 intNumOOP = FROM_INT((intptr_t) (isNegative ? -intNum : intNum));
851
852 /* too much of a chore to create a Fraction, so we call-in. We
853 lose the ability to create ScaledDecimals during the very
854 first phases of bootstrapping, but who cares?...
855
856 This is equivalent to
857 (intNumOOP * (10 raisedToInteger: exponent)
858 asScaledDecimal: floatExponent) */
859 lvalp->oval =
860 _gst_msg_send (intNumOOP, _gst_as_scaled_decimal_radix_scale_symbol,
861 FROM_INT (exponent),
862 FROM_INT (base),
863 FROM_INT ((int) floatExponent),
864 NULL);
865
866 /* incubator is set up by _gst_compile_method */
867 INC_ADD_OOP (lvalp->oval);
868 MAKE_OOP_READONLY (lvalp->oval, true);
869 return (SCALED_DECIMAL_LITERAL);
870 }
871 while (0);
872
873 if (ic == 'e' || ic == 'd' || ic == 'q')
874 {
875 int exp_char = ic;
876
877 switch (ic) {
878 case 'e': float_type = FLOATE_LITERAL; break;
879 case 'd': float_type = FLOATD_LITERAL; break;
880 case 'q': float_type = FLOATQ_LITERAL; break;
881 }
882
883 ic = _gst_next_char ();
884 if (ic == EOF)
885 ;
886 else if (ic == '-') {
887 floatExponent =
888 scan_digits (_gst_next_char (), true, 10, &dummy, NULL);
889 exponent -= (int) floatExponent;
890 }
891 else if (CHAR_TAB (ic)->char_class & DIGIT)
892 {
893 floatExponent = scan_digits (ic, false, 10, &dummy, NULL);
894 exponent += (int) floatExponent;
895 }
896 else if (CHAR_TAB (ic)->char_class & ID_CHAR)
897 {
898 /* 123def sends #def to 123!!! */
899 _gst_unread_char (ic);
900 ic = exp_char;
901 }
902 else
903 _gst_unread_char (ic);
904
905 }
906 else
907 _gst_unread_char (ic);
908
909 if (float_type)
910 {
911 char *p = obstack_finish (_gst_compilation_obstack);
912 obstack_free (_gst_compilation_obstack, p);
913
914 if (exponent)
915 {
916 struct real r;
917 _gst_real_from_int (&r, base);
918 _gst_real_powi (&r, &r, exponent < 0 ? -exponent : exponent);
919 if (exponent < 0)
920 _gst_real_div (&num, &num, &r);
921 else
922 _gst_real_mul (&num, &r);
923 }
924 lvalp->fval = _gst_real_get_ld (&num);
925 if (isNegative)
926 lvalp->fval = -lvalp->fval;
927 return (float_type);
928 }
929 else if (largeInteger)
930 {
931 lvalp->boval = scan_large_integer (isNegative, base);
932 return (LARGE_INTEGER_LITERAL);
933 }
934 else
935 {
936 char *p = obstack_finish (_gst_compilation_obstack);
937 obstack_free (_gst_compilation_obstack, p);
938 lvalp->ival = (intptr_t) (isNegative ? -intNum : intNum);
939 return (INTEGER_LITERAL);
940 }
941 }
942
943 uintptr_t
scan_digits(int c,mst_Boolean negative,unsigned base,struct real * n,mst_Boolean * largeInteger)944 scan_digits (int c,
945 mst_Boolean negative,
946 unsigned base,
947 struct real * n,
948 mst_Boolean * largeInteger)
949 {
950 uintptr_t result;
951 mst_Boolean oneDigit = false;
952
953 while (c == '_')
954 c = _gst_next_char ();
955
956 memset (n, 0, sizeof (*n));
957 for (result = 0.0; is_base_digit (c, base); )
958 {
959 unsigned value = digit_to_int (c, base);
960 if (largeInteger)
961 {
962 obstack_1grow (_gst_compilation_obstack, digit_to_int (c, base));
963 if (result >
964 (negative
965 /* We want (uintptr_t) -MIN_ST_INT, but it's the same. */
966 ? (uintptr_t) MIN_ST_INT - value
967 : (uintptr_t) MAX_ST_INT - value) / base)
968 *largeInteger = true;
969 }
970
971 _gst_real_mul_int (n, base);
972 _gst_real_add_int (n, value);
973 oneDigit = true;
974 result *= base;
975 result += value;
976 do
977 c = _gst_next_char ();
978 while (c == '_');
979 }
980
981 if (!oneDigit)
982 {
983 _gst_errorf ("Unexpected EOF while scanning number");
984 _gst_had_error = true;
985 }
986
987 _gst_unread_char (c);
988
989 return (result);
990 }
991
992 int
scan_fraction(int c,mst_Boolean negative,unsigned base,uintptr_t * intNumPtr,struct real * numPtr,mst_Boolean * largeInteger)993 scan_fraction (int c,
994 mst_Boolean negative,
995 unsigned base,
996 uintptr_t *intNumPtr,
997 struct real *numPtr,
998 mst_Boolean *largeInteger)
999 {
1000 uintptr_t intNum;
1001 int scale;
1002
1003 scale = 0;
1004
1005 while (c == '_')
1006 c = _gst_next_char ();
1007
1008 for (intNum = *intNumPtr; is_base_digit (c, base); )
1009 {
1010 unsigned value = digit_to_int (c, base);
1011 if (largeInteger)
1012 {
1013 obstack_1grow (_gst_compilation_obstack, digit_to_int (c, base));
1014 if (intNum >
1015 (negative
1016 /* We want (uintptr_t) -MIN_ST_INT, but it's the same. */
1017 ? (uintptr_t) MIN_ST_INT - value
1018 : (uintptr_t) MAX_ST_INT - value) / base)
1019 *largeInteger = true;
1020 }
1021
1022 _gst_real_mul_int (numPtr, base);
1023 _gst_real_add_int (numPtr, value);
1024 intNum *= base;
1025 intNum += value;
1026 scale--;
1027
1028 do
1029 c = _gst_next_char ();
1030 while (c == '_');
1031 }
1032
1033 _gst_unread_char (c);
1034
1035 *intNumPtr = intNum;
1036 return scale;
1037 }
1038
1039
1040 int
digit_to_int(int c,int base)1041 digit_to_int (int c,
1042 int base)
1043 {
1044 if (c < '0' || (c > '9' && c < 'A') || c > 'Z')
1045 {
1046 _gst_errorf ("Invalid digit %c in number", c);
1047 _gst_had_error = true;
1048 return (0);
1049 }
1050
1051 if (c >= 'A')
1052 c = c - 'A' + 10;
1053
1054 else
1055 c -= '0';
1056
1057 if (c >= base)
1058 {
1059 _gst_errorf ("Digit '%c' too large for base %d", c, base);
1060 _gst_had_error = true;
1061 return (0);
1062 }
1063
1064 return (c);
1065 }
1066
1067 mst_Boolean
is_base_digit(int c,int base)1068 is_base_digit (int c,
1069 int base)
1070 {
1071 if (c < '0' || (c > '9' && c < 'A') || c > 'Z')
1072 return (false);
1073
1074 if (c >= 'A')
1075 c = c - 'A' + 10;
1076
1077 else
1078 c -= '0';
1079
1080 return (c < base);
1081 }
1082
1083
1084 mst_Boolean
is_digit(int ic)1085 is_digit (int ic)
1086 {
1087 return (ic != EOF && (CHAR_TAB (ic)->char_class & DIGIT) != 0);
1088 }
1089
1090 byte_object
scan_large_integer(mst_Boolean negative,int base)1091 scan_large_integer (mst_Boolean negative,
1092 int base)
1093 {
1094 int i;
1095 int size, digitsLeft;
1096 gst_uchar *digits, *result;
1097 byte_object bo;
1098
1099 /* Copy the contents of the currently grown obstack on the stack. */
1100 size = obstack_object_size (_gst_compilation_obstack);
1101 digits = (gst_uchar *) alloca (size);
1102 memcpy (digits, obstack_base (_gst_compilation_obstack), size);
1103
1104 /* And reuse the area on the obstack for a struct byte_object. */
1105 obstack_blank (_gst_compilation_obstack, sizeof (struct byte_object));
1106 bo = (byte_object) obstack_finish (_gst_compilation_obstack);
1107
1108 bo->class =
1109 negative ? _gst_large_negative_integer_class :
1110 _gst_large_positive_integer_class;
1111 result = bo->body;
1112 memset (result, 0, size);
1113
1114 /* On each pass, multiply the previous partial result by the base,
1115 and sum each of the digits as they were retrieved by scan_digits.
1116 */
1117 for (digitsLeft = size; digitsLeft--;)
1118 {
1119 int total, carry;
1120
1121 total = result[0] * base + *digits++;
1122 carry = total >> 8;
1123 result[0] = (gst_uchar) total;
1124 for (i = 1; i < size; i++)
1125 {
1126 total = result[i] * base + carry;
1127 carry = total >> 8;
1128 result[i] = (gst_uchar) total;
1129 }
1130 }
1131
1132 if (negative)
1133 {
1134 /* Do two's complement -- first invert, then increment with carry
1135 */
1136 for (i = 0; i < size; i++)
1137 result[i] ^= 255;
1138
1139 for (i = 0; (++result[i]) == 0; i++);
1140
1141 /* Search where the number really ends -- discard trailing 111...
1142 bytes but remember, the most significant bit of the last digit
1143 must be 1! */
1144 for (; size > 0 && result[size - 1] == 255; size--);
1145 if (result[size - 1] < 128)
1146 size++;
1147 }
1148 else
1149 {
1150 /* Search where the number really ends -- discard trailing 000...
1151 bytes but remember, the most significant bit of the last digit
1152 must be 0! */
1153 for (; size > 0 && result[size - 1] == 0; size--);
1154 if (result[size - 1] > 127)
1155 size++;
1156 }
1157
1158 /* Only now can we set the size! */
1159 bo->size = size;
1160 return (bo);
1161 }
1162
1163
1164 void
_gst_parse_stream(mst_Boolean method)1165 _gst_parse_stream (mst_Boolean method)
1166 {
1167 struct obstack thisObstack, *oldObstack;
1168
1169 /* Allow re-entrancy by allocating a different obstack every time
1170 _gst_parse_stream is called */
1171 oldObstack = _gst_compilation_obstack;
1172 _gst_compilation_obstack = &thisObstack;
1173 obstack_init (&thisObstack);
1174
1175 {
1176 #ifdef NO_PARSE
1177 YYSTYPE yylval;
1178 while (_gst_yylex (&yylval));
1179 #else /* !NO_PARSE */
1180 _gst_had_error = false;
1181 if (method)
1182 {
1183 _gst_parse_method ();
1184 _gst_reset_compilation_category ();
1185 }
1186 else
1187 _gst_parse_chunks ();
1188 #endif /* !NO_PARSE */
1189 }
1190
1191 obstack_free (&thisObstack, NULL);
1192 _gst_compilation_obstack = oldObstack;
1193 }
1194
1195
1196
1197 #ifdef LEXDEBUG
1198 void
print_token(token,yylval)1199 print_token (token,
1200 yylval)
1201 int token;
1202 YYSTYPE *yylval;
1203 {
1204 switch (token)
1205 {
1206 case 0:
1207 break;
1208 case '.':
1209 case '!':
1210 case ':':
1211 case '|':
1212 case '^':
1213 case '#':
1214 case ';':
1215 case '(':
1216 case ')':
1217 case '[':
1218 case ']':
1219 case '{':
1220 case '}':
1221 printf ("%c\n", token);
1222 break;
1223 case SCOPE_SEPARATOR:
1224 printf ("::\n");
1225 break;
1226 case ASSIGNMENT:
1227 printf (":=\n");
1228 break;
1229 case IDENTIFIER:
1230 printf ("IDENTIFIER: `%s'\n", yylval->sval);
1231 break;
1232 case KEYWORD:
1233 printf ("KEYWORD: `%s'\n", yylval->sval);
1234 break;
1235 case SYMBOL_LITERAL:
1236 printf ("SYMBOL_LITERAL: #'%s'\n", yylval->sval);
1237 break;
1238 case LARGE_INTEGER_LITERAL:
1239 printf ("LARGE_INTEGER_LITERAL\n");
1240 case INTEGER_LITERAL:
1241 printf ("INTEGER_LITERAL: %ld\n", yylval->ival);
1242 break;
1243 case FLOATD_LITERAL:
1244 printf ("FLOATD_LITERAL: %g\n", (double) yylval->fval);
1245 break;
1246 case FLOATE_LITERAL:
1247 printf ("FLOATE_LITERAL: %g\n", (float) yylval->fval);
1248 break;
1249 case FLOATQ_LITERAL:
1250 printf ("FLOATQ_LITERAL: %Lg\n", yylval->fval);
1251 break;
1252 case CHAR_LITERAL:
1253 printf ("CHAR_LITERAL: %d", yylval->ival,
1254 if (yylval->ival >= 32 && yylval->ival <= 126)
1255 printf (" ($%c)", (char) yylval->ival);
1256 printf ("\n");
1257 break;
1258 case STRING_LITERAL:
1259 printf ("STRING_LITERAL: '%s'\n", yylval->sval);
1260 break;
1261 case BINOP:
1262 printf ("BINOP: `%s'\n", yylval->sval);
1263 break;
1264 }
1265 }
1266 #endif
1267
1268 void
1269 _gst_yyprint (FILE * file,
1270 int token,
1271 PTR lval)
1272 {
1273 YYSTYPE *yylval = (YYSTYPE *) lval;
1274
1275 switch (token)
1276 {
1277 case IDENTIFIER:
1278 case BINOP:
1279 case KEYWORD:
1280 fprintf (file, ": `%s'", yylval->sval);
1281 break;
1282 case SYMBOL_LITERAL:
1283 fprintf (file, ": #'%s'", yylval->sval);
1284 break;
1285 case STRING_LITERAL:
1286 fprintf (file, ": '%s'", yylval->sval);
1287 break;
1288 case INTEGER_LITERAL:
1289 fprintf (file, ": %ld", yylval->ival);
1290 break;
1291 case FLOATD_LITERAL:
1292 fprintf (file, ": %g", (double) yylval->fval);
1293 break;
1294 case FLOATE_LITERAL:
1295 fprintf (file, ": %g", (float) yylval->fval);
1296 break;
1297 case FLOATQ_LITERAL:
1298 fprintf (file, ": %Lg", yylval->fval);
1299 break;
1300 case CHAR_LITERAL:
1301 fprintf (file, ": %d", yylval->ival);
1302 if (yylval->ival >= 32 && yylval->ival <= 126)
1303 fprintf (file, " ($%c)", (char) yylval->ival);
1304 fprintf (file, "\n");
1305 break;
1306 default:
1307 break;
1308 }
1309 }
1310
1311 mst_Boolean
1312 _gst_negate_yylval (int token, YYSTYPE *yylval)
1313 {
1314 switch (token)
1315 {
1316 case INTEGER_LITERAL:
1317 if (yylval->ival < 0)
1318 return false;
1319 yylval->ival = -yylval->ival;
1320 break;
1321 case FLOATD_LITERAL:
1322 case FLOATE_LITERAL:
1323 case FLOATQ_LITERAL:
1324 if (yylval->fval < 0)
1325 return false;
1326 yylval->fval = -yylval->fval;
1327 break;
1328
1329 case SCALED_DECIMAL_LITERAL:
1330 {
1331 int sign;
1332 _gst_msg_sendf (&sign, "%i %o sign", yylval->oval);
1333 if (sign < 0)
1334 return false;
1335
1336 _gst_msg_sendf (&yylval->oval, "%o %o negated", yylval->oval);
1337 INC_ADD_OOP (yylval->oval);
1338 MAKE_OOP_READONLY (yylval->oval, true);
1339 break;
1340 }
1341
1342 case LARGE_INTEGER_LITERAL:
1343 {
1344 byte_object bo = yylval->boval;
1345 gst_uchar *digits = bo->body;
1346 int size = bo->size;
1347 int i;
1348
1349 /* The input value must be positive. */
1350 if (digits[size - 1] >= 128)
1351 return false;
1352
1353 /* Do two's complement -- first invert, then increment with carry */
1354 for (i = 0; i < size; i++)
1355 digits[i] ^= 255;
1356
1357 for (i = 0; (++digits[i]) == 0; i++);
1358
1359 /* Search where the number really ends -- discard trailing 111...
1360 bytes but remember, the most significant bit of the last digit
1361 must be 1! */
1362 for (; size > 0 && digits[size - 1] == 255; size--);
1363 if (digits[size - 1] < 128)
1364 size++;
1365
1366 assert (size <= bo->size);
1367 bo->size = size;
1368 bo->class = _gst_large_negative_integer_class;
1369 break;
1370 }
1371
1372 default:
1373 abort ();
1374 }
1375
1376 return true;
1377 }
1378