1 /* reader.c -*- mode:c; coding:utf-8; -*-
2 *
3 * Copyright (c) 2010-2021 Takashi Kato <ktakashi@ymail.com>
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $Id: $
29 */
30 #include <ctype.h>
31 #include <string.h>
32 #define LIBSAGITTARIUS_BODY
33 #include "sagittarius/private/reader.h"
34 #include "sagittarius/private/core.h"
35 #include "sagittarius/private/port.h"
36 #include "sagittarius/private/pair.h"
37 #include "sagittarius/private/symbol.h"
38 #include "sagittarius/private/hashtable.h"
39 #include "sagittarius/private/transcoder.h"
40 #include "sagittarius/private/compare.h"
41 #include "sagittarius/private/keyword.h"
42 #include "sagittarius/private/builtin-symbols.h"
43 #include "sagittarius/private/error.h"
44 #include "sagittarius/private/gloc.h"
45 #include "sagittarius/private/writer.h"
46 #include "sagittarius/private/unicode.h"
47 #include "sagittarius/private/values.h"
48 #include "sagittarius/private/vector.h"
49 #include "sagittarius/private/number.h"
50 #include "sagittarius/private/vm.h"
51 #include "sagittarius/private/bytevector.h"
52 #include "sagittarius/private/unicode.h"
53 #include "sagittarius/private/weak.h"
54 #include "sagittarius/private/writer.h"
55 #include "sagittarius/private/library.h"
56
57 #include "shortnames.incl"
58
59 static uint8_t CHAR_MAP[] = {
60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62 /* ! " # $ % & ' ( ) * + , - . / */
63 0, 3, 4, 4, 3, 3, 3, 0, 4, 4, 3, 1, 0, 1, 1, 3,
64 /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4, 3, 3, 3, 3,
66 /* @ A B C D E F G H I J K L M N O */
67 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
68 /* P Q R S T U V W X Y Z [ \ ] ^ _ */
69 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 0, 4, 3, 3,
70 /* ` a b c d e f g h i j k l m n o */
71 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
72 /* p q r s t u v w x y z { | } ~ ^? */
73 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 3, 0,
74 };
75 #define CHAR_MAP_SYMBOL 0x01
76 #define CHAR_MAP_INITIAL 0x02
77 #define CHAR_MAP_DELIMITER 0x04
78
79 #define SYMBOL_CHARP(x) ((CHAR_MAP[x] & CHAR_MAP_SYMBOL) != 0)
80 #define INITIAL_CHARP(x) ((CHAR_MAP[x] & CHAR_MAP_INITIAL) != 0)
81 #define DELIMITER_CHARP(x) ((CHAR_MAP[x] & CHAR_MAP_DELIMITER) != 0)
82
83 static SgObject pair_infos[] = {
84 SG_UNDEF,
85 SG_UNDEF
86 };
87
88 #define SYM_CONST pair_infos[0]
89 #define SYM_SOURCE_INFO pair_infos[1]
90
convert_hex_char_to_int(SgChar c)91 static int convert_hex_char_to_int(SgChar c)
92 {
93 if ((c >= '0') & (c <= '9')) return c - '0';
94 else if ((c >= 'a') & (c <= 'f')) return c - 'a' + 10;
95 else if ((c >= 'A') & (c <= 'F')) return c - 'A' + 10;
96 return -1;
97 }
98
sref_print(SgObject obj,SgPort * port,SgWriteContext * ctx)99 static void sref_print(SgObject obj, SgPort *port, SgWriteContext *ctx)
100 {
101 SgSharedRef *ref = SG_SHAREDREF(obj);
102 Sg_Putuz(port, UC("#<shared-ref "));
103 if (SG_NUMBERP(ref->index)) {
104 Sg_Puts(port, Sg_NumberToString(ref->index, 10, FALSE));
105 } else {
106 Sg_Putuz(port, UC("???"));
107 }
108 Sg_Putc(port, '>');
109 }
110 SG_DEFINE_BUILTIN_CLASS_SIMPLE(Sg_SharedRefClass, sref_print);
111
112 /* TODO, we probably want to have slot access */
rctx_print(SgObject obj,SgPort * port,SgWriteContext * ctx)113 static void rctx_print(SgObject obj, SgPort *port, SgWriteContext *ctx)
114 {
115 Sg_PutuzUnsafe(port, UC("#<read-context>"));
116 }
117 SG_DEFINE_BUILTIN_CLASS_SIMPLE(Sg_ReadContextClass, rctx_print);
118
make_read_context()119 static SgReadContext * make_read_context()
120 {
121 SgReadContext *ctx = SG_NEW(SgReadContext);
122 SG_SET_CLASS(ctx, SG_CLASS_READ_CONTEXT);
123 return ctx;
124 }
125 static SgReadContext *DEFAULT_CONTEXT = NULL;
126
Sg_MakeDefaultReadContext()127 SgObject Sg_MakeDefaultReadContext()
128 {
129 return SG_OBJ(make_read_context());
130 }
131
Sg_MakeReadContextForLoad()132 SgObject Sg_MakeReadContextForLoad()
133 {
134 SgReadContext *ctx = make_read_context();
135 ctx->flags = SG_READ_SOURCE_INFO | SG_CHANGE_VM_MODE;
136 ctx->graph = SG_HASHTABLE(Sg_MakeHashTableSimple(SG_HASH_EQ, 1));
137 return SG_OBJ(ctx);
138 }
139
make_shared_ref(long mark)140 static SgSharedRef* make_shared_ref(long mark)
141 {
142 SgSharedRef *z = SG_NEW(SgSharedRef);
143 SG_SET_CLASS(z, SG_CLASS_SHARED_REF);
144 z->index = SG_MAKE_INT(mark);
145 return z;
146 }
147
148 /* ctx utility */
parsing_range(SgReadContext * ctx,long from,long to)149 static void parsing_range(SgReadContext *ctx, long from, long to)
150 {
151 ctx->parsingLineFrom = from;
152 ctx->parsingLineTo = to;
153 }
154
parsing_line(SgReadContext * ctx,long line)155 static void parsing_line(SgReadContext *ctx, long line)
156 {
157 parsing_range(ctx, line, line);
158 }
159
160
161 typedef struct
162 {
163 long value;
164 int present;
165 } dispmacro_param;
166
167 typedef SgObject (*macro_function)(SgPort *, SgChar, SgReadContext *);
168 typedef SgObject (*dispmacro_function)(SgPort *, SgChar,
169 dispmacro_param *, SgReadContext *);
170
171 typedef enum {
172 CT_ILLEGAL,
173 CT_CONSTITUENT,
174 CT_SINGLE_ESCAPE,
175 CT_WHITE_SPACE,
176 CT_TERM_MACRO,
177 CT_NON_TERM_MACRO
178 } char_type;
179
180 typedef struct
181 {
182 SgObject sfunc;
183 dispmacro_function cfunc;
184 } disptab_t;
185
186 /* we only support ASCII */
187 #define MAX_READTABLE_CHAR 128
188
189 typedef struct
190 {
191 char_type type;
192 SgObject sfunc;
193 macro_function cfunc;
194 disptab_t *disp;
195 } readtab_t;
196
197 struct readtable_rec_t
198 {
199 int insensitiveP;
200 SgObject (*symbol_reader)(SgPort *, SgChar, SgReadContext *,
201 SgObject *errorp);
202 readtab_t readtable[MAX_READTABLE_CHAR];
203 };
204
205 /* template readtables */
206 static readtable_t r6rs_read_table;
207 static readtable_t r7rs_read_table;
208 /* including r7rs #u8() */
209 static readtable_t compat_read_table;
210
211 static readtable_t* default_readtable(int copyP);
212
213 #define DEFAULT_TABLEP(port) \
214 (!SG_PORT_READTABLE(port) || \
215 SG_PORT_READTABLE(port) == default_readtable(FALSE))
216
217 #define ENSURE_COPIED_TABLE(port) \
218 do { \
219 if (DEFAULT_TABLEP(port)) { \
220 SG_PORT_READTABLE(port) = default_readtable(TRUE); \
221 } \
222 } while(0)
223
delimited(SgPort * p,SgChar c)224 static int delimited(SgPort *p, SgChar c)
225 {
226 readtable_t *table = SG_PORT_READTABLE(p);
227 if (c > 127 && Sg_Ucs4WhiteSpaceP(c)) return TRUE;
228 if (c > 127) return FALSE;
229 /* return DELIMITER_CHARP(c); */
230 if (!table) {
231 /* use default table */
232 table = default_readtable(FALSE);
233 }
234 switch (table->readtable[c].type) {
235 case CT_NON_TERM_MACRO:
236 case CT_CONSTITUENT:
237 /* is escape a delmiter? */
238 case CT_SINGLE_ESCAPE:
239 return FALSE;
240 default: return TRUE;
241 }
242 }
243
244 static SgObject read_expr4(SgPort *port, int flags, SgChar delim,
245 SgReadContext *ctx);
246 #define ACCEPT_EOF 1
247 #define ACCEPT_DOT 2
248
249 #define read_expr(p, c) read_expr4((p), 0, EOF, (c))
250 /* one char reader */
251 static SgObject read_open_paren(SgPort *port, SgChar c, SgReadContext *ctx);
252 static SgObject read_close_paren(SgPort *port, SgChar c, SgReadContext *ctx);
253 static SgObject read_double_quote(SgPort *port, SgChar c, SgReadContext *ctx);
254 static SgObject read_quote(SgPort *port, SgChar c, SgReadContext *ctx);
255 static SgObject read_quasiquote(SgPort *port, SgChar c, SgReadContext *ctx);
256 static SgObject read_unquote(SgPort *port, SgChar c, SgReadContext *ctx);
257 static SgObject read_colon(SgPort *port, SgChar c, SgReadContext *ctx);
258 static SgObject read_vertical_bar(SgPort *port, SgChar c, SgReadContext *ctx);
259 static SgObject read_open_bracket(SgPort *port, SgChar c, SgReadContext *ctx);
260 static SgObject read_close_bracket(SgPort *port, SgChar c, SgReadContext *ctx);
261 static SgObject read_semicolon(SgPort *port, SgChar c, SgReadContext *ctx);
262 /* two chars */
263 /* list of two char syntax
264 #-',`!|uvtTfFobBOdDxXiIeE(\;=#
265 */
266 static SgObject read_hash_quote(SgPort *port, SgChar c,
267 dispmacro_param *param, SgReadContext *ctx);
268 static SgObject read_hash_quasiquote(SgPort *port, SgChar c,
269 dispmacro_param *param,
270 SgReadContext *ctx);
271 static SgObject read_hash_unquote(SgPort *port, SgChar c,
272 dispmacro_param *param, SgReadContext *ctx);
273 static SgObject read_hash_bang(SgPort *port, SgChar c, dispmacro_param *param,
274 SgReadContext *ctx);
275 static SgObject read_hash_bar(SgPort *port, SgChar c, dispmacro_param *param,
276 SgReadContext *ctx);
277 static SgObject read_hash_v(SgPort *port, SgChar c, dispmacro_param *param,
278 SgReadContext *ctx);
279 static SgObject read_hash_u(SgPort *port, SgChar c, dispmacro_param *param,
280 SgReadContext *ctx);
281 static SgObject read_hash_t(SgPort *port, SgChar c, dispmacro_param *param,
282 SgReadContext *ctx);
283 static SgObject read_hash_f(SgPort *port, SgChar c, dispmacro_param *param,
284 SgReadContext *ctx);
285 static SgObject read_hash_b(SgPort *port, SgChar c, dispmacro_param *param,
286 SgReadContext *ctx);
287 static SgObject read_hash_o(SgPort *port, SgChar c, dispmacro_param *param,
288 SgReadContext *ctx);
289 static SgObject read_hash_d(SgPort *port, SgChar c, dispmacro_param *param,
290 SgReadContext *ctx);
291 static SgObject read_hash_x(SgPort *port, SgChar c, dispmacro_param *param,
292 SgReadContext *ctx);
293 static SgObject read_hash_i(SgPort *port, SgChar c, dispmacro_param *param,
294 SgReadContext *ctx);
295 static SgObject read_hash_e(SgPort *port, SgChar c, dispmacro_param *param,
296 SgReadContext *ctx);
297 static SgObject read_hash_open_paren(SgPort *port, SgChar c,
298 dispmacro_param *param,
299 SgReadContext *ctx);
300 static SgObject read_hash_semicolon(SgPort *port, SgChar c,
301 dispmacro_param *param, SgReadContext *ctx);
302 static SgObject read_hash_escape(SgPort *port, SgChar c, dispmacro_param *param,
303 SgReadContext *ctx);
304 static SgObject read_hash_equal(SgPort *port, SgChar c, dispmacro_param *param,
305 SgReadContext *ctx);
306 static SgObject read_hash_hash(SgPort *port, SgChar c, dispmacro_param *param,
307 SgReadContext *ctx);
308 static SgObject read_hash_less(SgPort *port, SgChar c, dispmacro_param *param,
309 SgReadContext *ctx);
310 static SgObject read_hash_colon(SgPort *port, SgChar c, dispmacro_param *param,
311 SgReadContext *ctx);
312
313 /* mode */
314 static SgObject read_r6rs_symbol(SgPort *port, SgChar c, SgReadContext *ctx,
315 SgObject *errorp);
316 static SgObject read_compatible_symbol(SgPort *port, SgChar c,
317 SgReadContext *ctx, SgObject *errorp);
318 /* utility */
319 static SgObject read_symbol_or_number(SgPort *port, SgChar c,
320 readtable_t *table, SgReadContext *ctx);
321 static SgObject macro_reader(SgPort *port, SgChar c, readtab_t *tab,
322 SgReadContext *ctx);
323 static SgObject dispmacro_reader(SgPort *port, SgChar c, SgReadContext *ctx);
324
325 static void add_read_table(readtable_t *src, readtable_t *dst);
326 /* TODO use this in lexical_error */
lexical_error_msg(SgPort * port,SgReadContext * ctx,const SgChar * fmt,...)327 static SgObject lexical_error_msg(SgPort * port, SgReadContext *ctx,
328 const SgChar *fmt, ...)
329 {
330 va_list ap;
331 SgObject msg, line, file;
332
333 va_start(ap, fmt);
334 msg = Sg_Vsprintf(fmt, ap, TRUE);
335 va_end(ap);
336
337 file = Sg_FileName(port);
338 if (ctx->parsingLineFrom == ctx->parsingLineTo) {
339 line = Sg_Sprintf(UC("file %S, line %d"),
340 file,
341 ctx->parsingLineFrom);
342 } else {
343 line = Sg_Sprintf(UC("file %S, line %d-%d"),
344 file,
345 ctx->parsingLineFrom, ctx->parsingLineTo);
346 }
347 return Sg_Sprintf(UC("%A (%A)"), msg, line);
348 }
349
lexical_error(SgPort * port,SgReadContext * ctx,const SgChar * fmt,...)350 static void lexical_error(SgPort * port, SgReadContext *ctx,
351 const SgChar *fmt, ...)
352 {
353 va_list ap;
354 SgObject msg, line, file;
355
356 va_start(ap, fmt);
357 msg = Sg_Vsprintf(fmt, ap, TRUE);
358 va_end(ap);
359
360 file = Sg_FileName(port);
361 if (ctx->parsingLineFrom == ctx->parsingLineTo) {
362 line = Sg_Sprintf(UC("file %S, line %d"),
363 file,
364 ctx->parsingLineFrom);
365 } else {
366 line = Sg_Sprintf(UC("file %S, line %d-%d"),
367 file,
368 ctx->parsingLineFrom, ctx->parsingLineTo);
369 }
370 Sg_ReadError(UC("%A (%A)"), msg, line);
371 }
372
read_thing(SgPort * port,SgReadContext * ctx,SgChar * buf,size_t size,SgChar initial)373 static long read_thing(SgPort *port, SgReadContext *ctx, SgChar *buf,
374 size_t size, SgChar initial)
375 {
376 long i = 0;
377 if (initial != -1) {
378 buf[i++] = initial;
379 }
380 while (i < (long)size) {
381 SgChar c = Sg_PeekcUnsafe(port);
382 if (c == EOF || delimited(port, c)) {
383 buf[i] = 0;
384 return i;
385 }
386 Sg_GetcUnsafe(port);
387 buf[i++] = c;
388 }
389 lexical_error(port, ctx,
390 UC("token buffer overflow while reading identifier, %s ..."),
391 buf);
392 return -1; /* dummy */
393 }
394
read_hex_scalar_value(SgPort * port,SgReadContext * ctx)395 static SgChar read_hex_scalar_value(SgPort *port, SgReadContext *ctx)
396 {
397 int n;
398 SgChar ucs4 = 0, c = Sg_GetcUnsafe(port);
399 if (c == EOF) {
400 lexical_error(port, ctx,
401 UC("unexpected end-of-file while reading hex scalar value"));
402 }
403 if (delimited(port, c)) {
404 lexical_error(port, ctx,
405 UC("expected hex digit, but got %c, while reading hex scalar value"), c);
406 }
407 Sg_UngetcUnsafe(port, c);
408
409 while (TRUE) {
410 c = Sg_GetcUnsafe(port);
411 if (c == EOF || delimited(port, c)) {
412 Sg_UngetcUnsafe(port, c);
413 return Sg_EnsureUcs4(ucs4);
414 }
415 n = convert_hex_char_to_int(c);
416 if (n < 0) {
417 lexical_error(port, ctx,
418 UC("expected hex digit, but got %c, while reading hex scalar value"), c);
419 }
420 ucs4 = (ucs4 << 4) + n;
421 if (ucs4 > 0x10ffff) {
422 lexical_error(port, ctx, UC("hex scalar value out of range"));
423 }
424 }
425 }
426
read_escape(SgPort * port,SgReadContext * ctx)427 static SgChar read_escape(SgPort *port, SgReadContext *ctx)
428 {
429 SgChar c = Sg_GetcUnsafe(port);
430 switch (c) {
431 case 'x':
432 c = Sg_GetcUnsafe(port);
433 if (c == EOF) {
434 lexical_error(port, ctx,
435 UC("unexpected end-of-file while reading escape sequence"));
436 }
437 Sg_UngetcUnsafe(port, c);
438 c = read_hex_scalar_value(port, ctx);
439 if (Sg_GetcUnsafe(port) != ';') {
440 lexical_error(port, ctx,
441 UC("inline hex escape missing terminating semi-colon"));
442 }
443 return c;
444 case 'a': return 0x0007;
445 case 'b': return 0x0008;
446 case 't': return 0x0009;
447 case 'n': return 0x000A;
448 case 'v': return 0x000B;
449 case 'f': return 0x000C;
450 case 'r': return 0x000D;
451 case '"': return 0x0022;
452 case '\\': return 0x005C;
453 /* R7RS */
454 case '|': return 0x007C;
455 case EOF:
456 lexical_error(port, ctx,
457 UC("unexpected end-of-file while reading escape sequence"));
458 default:
459 if (ctx->escapedp) return c;
460 lexical_error(port, ctx,
461 UC("invalid escape sequence, \\%c"), c);
462 }
463
464 return -1; /* dummy */
465 }
466
467 typedef int (*read_helper)(SgPort *, SgReadContext *, SgChar *,
468 int, SgChar, SgObject, readtable_t *table,
469 SgObject *errorp);
470 /* convenient macro, used both symbol and string reader*/
471 #define append_char(buf, p, c, i) \
472 do { \
473 if (p) { \
474 Sg_PutcUnsafe(p, (c)); \
475 } else { \
476 (buf)[(i)++] = (c); \
477 } \
478 } while (0)
479
480 #define READ_SYMBOL_MAX_SIZE 256
481
read_symbol_generic(SgPort * port,SgChar initial,read_helper helper,SgReadContext * ctx,SgObject * errorp)482 static SgObject read_symbol_generic(SgPort *port, SgChar initial,
483 read_helper helper,
484 SgReadContext *ctx, SgObject *errorp)
485 {
486 SgChar buf[READ_SYMBOL_MAX_SIZE], c;
487 int i = 0;
488 readtable_t *table = Sg_PortReadTable(port);
489 SgObject out = NULL;
490
491 if (initial > 0) {
492 if (initial > 127) {
493 Sg_EnsureUcs4(initial);
494 if (i == 0) {
495 if (Sg_Ucs4ConstituentP(initial)) {
496 append_char(buf, out, initial, i);
497 goto next;
498 }
499 } else {
500 if (Sg_Ucs4SubsequentP(initial)) {
501 append_char(buf, out, initial, i);
502 goto next;
503 }
504 }
505 lexical_error(port, ctx,
506 UC("invalid character %U during reading identifier"),
507 initial);
508 } else {
509 i = helper(port, ctx, buf, i, initial, out, table, errorp);
510 }
511 }
512 next:
513 while (out != NULL || i < array_sizeof(buf)) {
514 c = Sg_PeekcUnsafe(port);
515 if (c == EOF || delimited(port, c)) {
516 if (out) {
517 return Sg_GetStringFromStringPort(out);
518 } else {
519 SgObject s;
520 buf[i] = 0;
521 s = Sg_MakeString(buf, SG_LITERAL_STRING, i);
522 if (table->insensitiveP) {
523 s = Sg_StringFoldCase(SG_STRING(s));
524 }
525 return s;
526 }
527 }
528 Sg_GetcUnsafe(port);
529 if (c == '\\') {
530 /* TODO how should I treat '\\' during reading symbol */
531 c = Sg_GetcUnsafe(port);
532 if (c == 'x') {
533 Sg_UngetcUnsafe(port, c);
534 append_char(buf, out, read_escape(port, ctx), i);
535 continue;
536 }
537 lexical_error(port, ctx,
538 UC("invalid character '\\' during reading identifier"));
539 }
540 if (c > 127) {
541 Sg_EnsureUcs4(c);
542 if (i == 0) {
543 if (Sg_Ucs4ConstituentP(c)) {
544 append_char(buf, out, c, i);
545 continue;
546 }
547 } else {
548 if (Sg_Ucs4SubsequentP(c)) {
549 append_char(buf, out, c, i);
550 continue;
551 }
552 }
553 lexical_error(port, ctx,
554 UC("invalid character %U during reading identifier"), c);
555 }
556 i = helper(port, ctx, buf, i, c, out, table, errorp);
557 }
558 out = Sg_ConvertToStringOutputPort(buf, i);
559 goto next;
560 /* lexical_error(port, ctx, */
561 /* UC("token buffer overflow during reading identifier")); */
562 return SG_UNDEF;
563 #undef check_range
564 }
565
read_r6rs_symbol_helper(SgPort * port,SgReadContext * ctx,SgChar * buf,int i,SgChar c,SgObject out,readtable_t * table,SgObject * errorp)566 static int read_r6rs_symbol_helper(SgPort *port, SgReadContext *ctx,
567 SgChar *buf, int i, SgChar c,
568 SgObject out,
569 readtable_t *table, SgObject *errorp)
570 {
571 if (c > 127) {
572 if (buf != NULL) append_char(buf, out, c, i);
573 return i;
574 }
575 if (i == 0) {
576 if (INITIAL_CHARP(c)) {
577 if (buf != NULL) append_char(buf, out, c, i);
578 return i;
579 }
580 } else {
581 if (SYMBOL_CHARP(c)) {
582 if (buf != NULL) append_char(buf, out, c, i);
583 return i;
584 }
585 }
586 if (errorp) {
587 if (buf != NULL) append_char(buf, out, c, i);
588 *errorp =
589 lexical_error_msg(port, ctx,
590 UC("invalid character %U while reading identifier"), c);
591 return i;
592 } else {
593 lexical_error(port, ctx,
594 UC("invalid character %U while reading identifier"), c);
595 }
596 return -1; /* dummy */
597 }
598
read_r6rs_symbol(SgPort * port,SgChar initial,SgReadContext * ctx,SgObject * errorp)599 SgObject read_r6rs_symbol(SgPort *port, SgChar initial, SgReadContext *ctx,
600 SgObject *errorp)
601 {
602 return read_symbol_generic(port, initial, read_r6rs_symbol_helper, ctx,
603 errorp);
604 }
605
read_compat_symbol_helper(SgPort * port,SgReadContext * ctx,SgChar * buf,int i,SgChar c,SgObject out,readtable_t * table,SgObject * errorp)606 static int read_compat_symbol_helper(SgPort *port, SgReadContext *ctx,
607 SgChar *buf, int i, SgChar c,
608 SgObject out,
609 readtable_t *table, SgObject *errorp)
610 {
611 if (!delimited(port, c)) {
612 append_char(buf, out, c, i);
613 return i;
614 }
615 if (errorp) {
616 append_char(buf, out, c, i);
617 *errorp =
618 lexical_error_msg(port, ctx,
619 UC("invalid character %U while reading identifier"), c);
620 return i;
621 } else {
622 lexical_error(port, ctx,
623 UC("invalid character %U while reading identifier"), c);
624 }
625 return -1; /* dummy */
626 }
627
read_compatible_symbol(SgPort * port,SgChar initial,SgReadContext * ctx,SgObject * errorp)628 SgObject read_compatible_symbol(SgPort *port, SgChar initial,
629 SgReadContext *ctx, SgObject *errorp)
630 {
631 return read_symbol_generic(port, initial, read_compat_symbol_helper, ctx,
632 errorp);
633 }
634
read_escaped_symbol(SgPort * port,SgReadContext * ctx)635 static SgObject read_escaped_symbol(SgPort *port, SgReadContext *ctx)
636 {
637 SgObject str = read_compatible_symbol(port, -1, ctx, NULL);
638 return Sg_Intern(str);
639 }
640
read_symbol_or_number(SgPort * port,SgChar c,readtable_t * table,SgReadContext * ctx)641 SgObject read_symbol_or_number(SgPort *port, SgChar c,
642 readtable_t *table, SgReadContext *ctx)
643 {
644 SgObject str, num, tmp;
645 SgObject error = SG_FALSE;
646 str = table->symbol_reader(port, c, ctx, &error);
647 if (table->symbol_reader == read_r6rs_symbol) {
648 tmp = str;
649 } else {
650 /* R7RS requires capital NaN or InF */
651 tmp = Sg_StringDownCase(str);
652 }
653 num = Sg_StringToNumber(tmp, 10, TRUE);
654 if (!SG_FALSEP(num)) return num;
655
656 /* check special case first */
657 if (SG_STRING_SIZE(str) == 1 && SG_STRING_VALUE_AT(str, 0) == '.')
658 return SG_SYMBOL_DOT;
659 /* well for now we do not check */
660 if (table->symbol_reader == read_r6rs_symbol &&
661 SG_STRING_VALUE_AT(str, 0) < 128) {
662 int i;
663 if (SG_STRING_SIZE(str) == 1 &&
664 (SG_STRING_VALUE_AT(str, 0)=='+' || SG_STRING_VALUE_AT(str, 0)=='-')) {
665 return Sg_Intern(str);
666 }
667 if (ustrcmp(SG_STRING_VALUE(str), "...") == 0) return SG_SYMBOL_ELLIPSIS;
668 if (SG_STRING_SIZE(str) >= 2 &&
669 SG_STRING_VALUE_AT(str, 0) == '-'
670 && SG_STRING_VALUE_AT(str, 1) == '>') {
671 for (i = 2; i < SG_STRING_SIZE(str); i++) {
672 SgChar c = SG_STRING_VALUE_AT(str, i);
673 if (c > 127) continue;
674 if (SYMBOL_CHARP(c)) continue;
675 lexical_error(port, ctx, UC("invalid lexical syntax %A"), str);
676 }
677 return Sg_Intern(str);
678 }
679 }
680 /* if there is an lexical error after all checks, then raise it */
681 if (!SG_FALSEP(error)) Sg_ReadError(UC("%A"), error);
682 return Sg_Intern(str);
683
684 }
685
macro_reader(SgPort * port,SgChar c,readtab_t * tab,SgReadContext * ctx)686 SgObject macro_reader(SgPort *port, SgChar c, readtab_t *tab,
687 SgReadContext *ctx)
688 {
689 if (tab[c].cfunc) return (*tab[c].cfunc)(port, c, ctx);
690 if (SG_EQ(tab[c].sfunc, SG_UNBOUND))
691 lexical_error(port, ctx, UC("'%c' is not a macro char"), c);
692 /* FIXME: we should eliminate this type of dispatch */
693 if (SG_PROCEDURE_REQUIRED(tab[c].sfunc) > 2) {
694 return Sg_Apply3(tab[c].sfunc, port, SG_MAKE_CHAR(c), ctx);
695 } else {
696 return Sg_Apply2(tab[c].sfunc, port, SG_MAKE_CHAR(c));
697 }
698 }
699
read_list_int(SgPort * port,SgChar closer,SgReadContext * ctx,long start_line)700 static SgObject read_list_int(SgPort *port, SgChar closer, SgReadContext *ctx,
701 long start_line)
702 {
703 SgObject start = SG_NIL, last = SG_NIL, item;
704 item = read_expr4(port, ACCEPT_EOF, closer, ctx);
705 if (SG_EQ(item, SG_EOF)) goto eoferr;
706 /* return '() */
707 if (!ctx->escapedp && SG_EQ(item, SG_SYMBOL_RPAREN)) return start;
708
709 SG_APPEND1(start, last, item);
710
711 for (;;) {
712 ctx->escapedp = FALSE;
713 item = read_expr4(port, ACCEPT_EOF | ACCEPT_DOT, closer, ctx);
714 if (SG_EQ(item, SG_EOF)) goto eoferr;
715 if (!ctx->escapedp && SG_EQ(item, SG_SYMBOL_RPAREN)) return start;
716 if (!ctx->escapedp && SG_EQ(item, SG_SYMBOL_DOT)) {
717 SG_SET_CDR(last, read_expr(port, ctx));
718 item = read_expr4(port, ACCEPT_EOF, closer, ctx);
719 if (!SG_EQ(item, SG_SYMBOL_RPAREN)) {
720 parsing_range(ctx, start_line, Sg_LineNo(port));
721 lexical_error(port, ctx, UC("bad dot syntax"));
722 }
723 if (SG_EQ(item, SG_EOF)) goto eoferr;
724 return start;
725 }
726 SG_APPEND1(start, last, item);
727 }
728 eoferr:
729 parsing_range(ctx, start_line, Sg_LineNo(port));
730 lexical_error(port, ctx, UC("unexpected end-of-file while reading a list"));
731 return SG_UNDEF; /* dummy */
732 }
733
read_list(SgPort * port,SgChar closer,SgReadContext * ctx)734 static SgObject read_list(SgPort *port, SgChar closer, SgReadContext *ctx)
735 {
736 long line = Sg_LineNo(port);
737 SgObject r = read_list_int(port, closer, ctx, line);
738 if (SG_PAIRP(r) && line >= 0) {
739 SgVM *vm = Sg_VM();
740 if (!SG_VM_IS_SET_FLAG(vm, SG_NO_DEBUG_INFO)) {
741 SgObject info = Sg_FileName(port);
742 if (!SG_FALSEP(info) && ctx->flags & SG_READ_SOURCE_INFO) {
743 r = Sg_SetPairAnnotation(r, SYM_SOURCE_INFO,
744 Sg_Cons(info, SG_MAKE_INT(line)));
745 }
746 }
747 }
748 return r;
749 }
750
read_open_paren(SgPort * port,SgChar c,SgReadContext * ctx)751 SgObject read_open_paren(SgPort *port, SgChar c, SgReadContext *ctx)
752 {
753 return read_list(port, ')', ctx);
754 }
755
read_close_paren(SgPort * port,SgChar c,SgReadContext * ctx)756 SgObject read_close_paren(SgPort *port, SgChar c, SgReadContext *ctx)
757 {
758 lexical_error(port, ctx, UC("unexpected close paren ')'"));
759 return SG_UNDEF; /* dummy */
760 }
761
read_open_bracket(SgPort * port,SgChar c,SgReadContext * ctx)762 SgObject read_open_bracket(SgPort *port, SgChar c, SgReadContext *ctx)
763 {
764 return read_list(port, ']', ctx);
765 }
766
read_close_bracket(SgPort * port,SgChar c,SgReadContext * ctx)767 SgObject read_close_bracket(SgPort *port, SgChar c, SgReadContext *ctx)
768 {
769 lexical_error(port, ctx, UC("unexpected close bracket ']'"));
770 return SG_UNDEF; /* dummy */
771 }
772
read_double_quote(SgPort * port,SgChar c,SgReadContext * ctx)773 SgObject read_double_quote(SgPort *port, SgChar c, SgReadContext *ctx)
774 {
775 SgChar buf[READ_STRING_MAX_SIZE];
776 int i = 0;
777 SgObject out = NULL;
778
779 #define handle_linefeed(c, hndl) \
780 switch (c) { \
781 case CR: \
782 (c) = Sg_GetcUnsafe(port); \
783 if ((c) != LF && (c) != NEL) Sg_UngetcUnsafe(port, c); \
784 case LF: case NEL: case LS: \
785 hndl; \
786 }
787
788 next:
789 while (out != NULL || i < array_sizeof(buf)) {
790 SgChar c = Sg_GetcUnsafe(port);
791 if (c == EOF)
792 lexical_error(port, ctx,
793 UC("unexpected end-of-file while reading string"));
794
795 handle_linefeed(c, { append_char(buf, out, LF, i); continue; });
796
797 if (c == '"') {
798 if (out) {
799 return Sg_GetStringFromStringPort(out);
800 } else {
801 buf[i] = 0;
802 return Sg_MakeString(buf, SG_LITERAL_STRING, i);
803 }
804 }
805 if (c == '\\') {
806 c = Sg_GetcUnsafe(port);
807 if (Sg_Ucs4IntralineWhiteSpaceP(c)) {
808 do {
809 c = Sg_GetcUnsafe(port);
810 if (c == EOF) {
811 lexical_error(port, ctx,
812 UC("unexpected end-of-file while"
813 " reading intraline whitespace"));
814 }
815 } while (Sg_Ucs4IntralineWhiteSpaceP(c));
816 /* internal line feed is LF*/
817 handle_linefeed(c, break; default:
818 lexical_error(port, ctx,
819 UC("unexpected character %U while"
820 " reading intraline whitespace"), c)
821 );
822 do { c = Sg_GetcUnsafe(port); } while (Sg_Ucs4IntralineWhiteSpaceP(c));
823 Sg_UngetcUnsafe(port, c);
824 continue;
825 }
826 handle_linefeed(c,
827 {
828 do { c = Sg_GetcUnsafe(port); }
829 while (Sg_Ucs4IntralineWhiteSpaceP(c));
830 Sg_UngetcUnsafe(port, c);
831 continue;
832 }
833 );
834 Sg_UngetcUnsafe(port, c);
835 c = read_escape(port, ctx);
836 append_char(buf, out, c, i);
837 continue;
838 } else {
839 append_char(buf, out, c, i);
840 }
841 }
842 out = Sg_ConvertToStringOutputPort(buf, i);
843 goto next;
844 /* lexical_error(port, ctx, UC("token buffer overflow while reading string")); */
845 return SG_UNDEF; /* dummy */
846 }
847
read_quote(SgPort * port,SgChar c,SgReadContext * ctx)848 SgObject read_quote(SgPort *port, SgChar c, SgReadContext *ctx)
849 {
850 SgObject o = read_expr(port, ctx);
851 if (SG_EQ(o, SG_EOF)) {
852 lexical_error(port, ctx,
853 UC("unexpected end-of-file following quotation-mark(')"));
854 }
855 if (SG_PAIRP(o)) {
856 o = Sg_AddConstantLiteral(o);
857 }
858 return SG_LIST2(SG_SYMBOL_QUOTE, o);
859 }
860
read_quasiquote(SgPort * port,SgChar c,SgReadContext * ctx)861 SgObject read_quasiquote(SgPort *port, SgChar c, SgReadContext *ctx)
862 {
863 SgObject o = read_expr(port, ctx);
864 if (SG_EQ(o, SG_EOF)) {
865 lexical_error(port, ctx,
866 UC("unexpected end-of-file following grave-accent(`)"));
867 }
868 return SG_LIST2(SG_SYMBOL_QUASIQUOTE, o);
869 }
870
read_unquote(SgPort * port,SgChar c,SgReadContext * ctx)871 SgObject read_unquote(SgPort *port, SgChar c, SgReadContext *ctx)
872 {
873 c = Sg_GetcUnsafe(port);
874 if (c == EOF) {
875 lexical_error(port, ctx, UC("unexpected end-of-file following comma(,)"));
876 }
877 if (c == '@')
878 return SG_LIST2(SG_SYMBOL_UNQUOTE_SPLICING, read_expr(port, ctx));
879 Sg_UngetcUnsafe(port, c);
880 return SG_LIST2(SG_SYMBOL_UNQUOTE, read_expr(port, ctx));
881 }
882
read_quoted_symbol(SgPort * port,SgReadContext * ctx,int interned)883 static SgObject read_quoted_symbol(SgPort *port, SgReadContext *ctx,
884 int interned)
885 {
886 SgChar buf[SYMBOL_MAX_SIZE];
887 int i = 0;
888 /* TODO flag check */
889 ctx->escapedp = TRUE;
890 while (i < array_sizeof(buf)) {
891 SgChar c = Sg_GetcUnsafe(port);
892 if (c == EOF) {
893 lexical_error(port, ctx,
894 UC("unexpected end-of-file while reading quoted symbol"));
895 }
896 if (c == '|') {
897 SgChar *real = SG_NEW_ATOMIC2(SgChar *, (i + 1) * sizeof(SgChar));
898 buf[i] = 0;
899 memcpy(real, buf, (i + 1) * sizeof(SgChar));
900 return Sg_MakeSymbol(Sg_MakeString(real, SG_LITERAL_STRING, i),
901 interned);
902 }
903 if (c == '\\') {
904 c = read_escape(port, ctx);
905 }
906 buf[i++] = c;
907 }
908 lexical_error(port, ctx,
909 UC("token buffer overflow while reading quoted symbol"));
910 return SG_UNDEF; /* dummy */
911 }
912
read_colon(SgPort * port,SgChar c,SgReadContext * ctx)913 SgObject read_colon(SgPort *port, SgChar c, SgReadContext *ctx)
914 {
915 int c2 = Sg_GetcUnsafe(port);
916 SgObject name;
917 SgChar buf[SYMBOL_MAX_SIZE]; /* too small? */
918
919 if (c2 == '|') {
920 name = read_quoted_symbol(port, ctx, FALSE);
921 return Sg_MakeKeyword(SG_SYMBOL(name)->name);
922 } else {
923 long size;
924 Sg_UngetcUnsafe(port, c2);
925 size = read_thing(port, ctx, buf, array_sizeof(buf), -1);
926 buf[size] = 0;
927 return Sg_MakeKeyword(Sg_MakeString(buf, SG_LITERAL_STRING, size));
928 }
929 }
930
read_vertical_bar(SgPort * port,SgChar c,SgReadContext * ctx)931 SgObject read_vertical_bar(SgPort *port, SgChar c, SgReadContext *ctx)
932 {
933 return read_quoted_symbol(port, ctx, TRUE);
934 }
935
read_semicolon(SgPort * port,SgChar c,SgReadContext * ctx)936 SgObject read_semicolon(SgPort *port, SgChar c, SgReadContext *ctx)
937 {
938 while ((c = Sg_GetcUnsafe(port)) != EOF) {
939 if (c == LF) return NULL;
940 }
941 return SG_EOF;
942 }
943
read_hash_quote(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)944 SgObject read_hash_quote(SgPort *port, SgChar c,
945 dispmacro_param *param, SgReadContext *ctx)
946 {
947 return SG_LIST2(SG_SYMBOL_SYNTAX, read_expr(port, ctx));
948 }
949
read_hash_quasiquote(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)950 SgObject read_hash_quasiquote(SgPort *port, SgChar c,
951 dispmacro_param *param, SgReadContext *ctx)
952 {
953 return SG_LIST2(SG_SYMBOL_QUASISYNTAX, read_expr(port, ctx));
954 }
955
read_hash_unquote(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)956 SgObject read_hash_unquote(SgPort *port, SgChar c,
957 dispmacro_param *param, SgReadContext *ctx)
958 {
959 c = Sg_GetcUnsafe(port);
960 if (c == EOF) {
961 lexical_error(port, ctx,
962 UC("unexpected end-of-file following sharp-comma(,)"));
963 }
964 if (c == '@') {
965 return SG_LIST2(SG_SYMBOL_UNSYNTAX_SPLICING, read_expr(port, ctx));
966 }
967 Sg_UngetcUnsafe(port, c);
968 return SG_LIST2(SG_SYMBOL_UNSYNTAX, read_expr(port, ctx));
969 }
970
construct_lib_name(SgObject s)971 static SgObject construct_lib_name(SgObject s)
972 {
973 SgObject h = SG_NIL, t = SG_NIL;
974 int i, prev;
975 for (i = 0, prev = 0; i < SG_STRING_SIZE(s); i++) {
976 if (SG_STRING_VALUE_AT(s, i) == '/') {
977 SG_APPEND1(h, t, Sg_Intern(Sg_Substring(s, prev, i)));
978 prev = i + 1;
979 }
980 }
981 if (i != prev) {
982 SG_APPEND1(h, t, Sg_Intern(Sg_Substring(s, prev, i)));
983 }
984 return h;
985 }
986
987 /* FIXME: we may want to add more directive on runtime (like Gauche) */
Sg_ApplyDirective(SgPort * port,SgObject desc,SgReadContext * ctx)988 SgObject Sg_ApplyDirective(SgPort *port, SgObject desc, SgReadContext *ctx)
989 {
990 SgString *tag = SG_SYMBOL(desc)->name;
991
992 if (!ctx) {
993 ctx = DEFAULT_CONTEXT;
994 }
995 /* a bit of optimisation... */
996 switch (SG_STRING_VALUE_AT(tag, 0)) {
997 case 'c':
998 if (ustrcmp(tag->value, "compatible") == 0) {
999 SgVM *vm = Sg_VM();
1000 if (ctx->flags & SG_CHANGE_VM_MODE) {
1001 SG_VM_SET_FLAG(vm, SG_COMPATIBLE_MODE);
1002 SG_VM_UNSET_FLAG(vm, SG_R6RS_MODE);
1003 SG_VM_UNSET_FLAG(vm, SG_R7RS_MODE);
1004 SG_VM_SET_FLAG(vm, SG_ALLOW_OVERWRITE);
1005 SG_VM_UNSET_FLAG(vm, SG_ERROR_UNBOUND);
1006 }
1007 Sg_SetPortReadTable(port, Sg_CopyReadTable(&compat_read_table));
1008 return desc;
1009 }
1010 if (ustrcmp(tag->value, "core") == 0) {
1011 SgVM *vm = Sg_VM();
1012 if (ctx->flags & SG_CHANGE_VM_MODE) {
1013 SG_VM_UNSET_FLAG(vm, SG_COMPATIBLE_MODE);
1014 SG_VM_UNSET_FLAG(vm, SG_R6RS_MODE);
1015 SG_VM_UNSET_FLAG(vm, SG_R7RS_MODE);
1016 SG_VM_UNSET_FLAG(vm, SG_ALLOW_OVERWRITE);
1017 SG_VM_UNSET_FLAG(vm, SG_ERROR_UNBOUND);
1018 }
1019 Sg_SetPortReadTable(port, Sg_CopyReadTable(&compat_read_table));
1020 return desc;
1021 }
1022 if (ustrcmp(tag->value, "cache") == 0) {
1023 if (ctx->flags & SG_CHANGE_VM_MODE) {
1024 SG_VM_UNSET_FLAG(Sg_VM(), SG_DISABLE_CACHE);
1025 }
1026 return desc;
1027 }
1028 break;
1029 case 'd':
1030 if (ustrcmp(tag->value, "deprecated") == 0) {
1031 Sg_Warn(UC("deprecated file is being loaded %S"), Sg_FileName(port));
1032 return desc;
1033 }
1034 break;
1035 case 'f':
1036 if (ustrcmp(tag->value, "fold-case") == 0) {
1037 ENSURE_COPIED_TABLE(port);
1038 SG_PORT_READTABLE(port)->insensitiveP = TRUE;
1039 /* we need to preserve for include-ci with #!fold-case */
1040 ctx->flags |= SG_READ_NO_CASE;
1041 ctx->flags &= ~SG_READ_CASE;
1042 return desc;
1043 }
1044 break;
1045 case 'n':
1046 if (ustrcmp(tag->value, "no-overwrite") == 0) {
1047 if (ctx->flags & SG_CHANGE_VM_MODE) {
1048 SG_VM_UNSET_FLAG(Sg_VM(), SG_ALLOW_OVERWRITE);
1049 }
1050 return desc;
1051 }
1052 if (ustrcmp(tag->value, "no-fold-case") == 0) {
1053 ENSURE_COPIED_TABLE(port);
1054 SG_PORT_READTABLE(port)->insensitiveP = FALSE;
1055 ctx->flags &= ~SG_READ_NO_CASE;
1056 ctx->flags |= SG_READ_CASE;
1057 return desc;
1058 }
1059 if (ustrcmp(tag->value, "nocache") == 0) {
1060 if (ctx->flags & SG_CHANGE_VM_MODE) {
1061 SG_VM_SET_FLAG(Sg_VM(), SG_DISABLE_CACHE);
1062 }
1063 return desc;
1064 }
1065 if (ustrcmp(tag->value, "noinlineasm") == 0) {
1066 if (ctx->flags & SG_CHANGE_VM_MODE) {
1067 SG_VM_SET_FLAG(Sg_VM(), SG_NO_INLINE_ASM);
1068 }
1069 return desc;
1070 }
1071 if (ustrcmp(tag->value, "noinlinelocal") == 0) {
1072 if (ctx->flags & SG_CHANGE_VM_MODE) {
1073 SG_VM_SET_FLAG(Sg_VM(), SG_NO_INLINE_LOCAL);
1074 }
1075 return desc;
1076 }
1077 if (ustrcmp(tag->value, "nolambdalifting") == 0) {
1078 if (ctx->flags & SG_CHANGE_VM_MODE) {
1079 SG_VM_SET_FLAG(Sg_VM(), SG_NO_LAMBDA_LIFT);
1080 }
1081 return desc;
1082 }
1083 if (ustrcmp(tag->value, "nooptimization") == 0) {
1084 SgVM *vm = Sg_VM();
1085 if (ctx->flags & SG_CHANGE_VM_MODE) {
1086 SG_VM_SET_FLAG(vm, SG_NO_INLINE_ASM);
1087 SG_VM_SET_FLAG(vm, SG_NO_INLINE_LOCAL);
1088 SG_VM_SET_FLAG(vm, SG_NO_LAMBDA_LIFT);
1089 }
1090 return desc;
1091 }
1092 if (ustrcmp(tag->value, "nobacktrace") == 0) {
1093 SgVM *vm = Sg_VM();
1094 if (ctx->flags & SG_CHANGE_VM_MODE) {
1095 SG_VM_SET_FLAG(vm, SG_NO_DEBUG_INFO);
1096 }
1097 return desc;
1098 }
1099 if (ustrncmp(tag->value, "nounbound", 7) == 0) {
1100 SgVM *vm = Sg_VM();
1101 if (ctx->flags & SG_CHANGE_VM_MODE) {
1102 SG_VM_SET_FLAG(vm, SG_ERROR_UNBOUND);
1103 }
1104 return desc;
1105 }
1106 break;
1107 case 'o':
1108 if (ustrcmp(tag->value, "overwrite") == 0) {
1109 if (ctx->flags & SG_CHANGE_VM_MODE) {
1110 SG_VM_SET_FLAG(Sg_VM(), SG_ALLOW_OVERWRITE);
1111 }
1112 return desc;
1113 }
1114 break;
1115 case 'r':
1116 if (ustrcmp(tag->value, "r6rs") == 0) {
1117 SgVM *vm = Sg_VM();
1118 if (ctx->flags & SG_CHANGE_VM_MODE) {
1119 SG_VM_SET_FLAG(vm, SG_R6RS_MODE);
1120 SG_VM_UNSET_FLAG(vm, SG_ALLOW_OVERWRITE);
1121 SG_VM_UNSET_FLAG(vm, SG_R7RS_MODE);
1122 SG_VM_UNSET_FLAG(vm, SG_COMPATIBLE_MODE);
1123 SG_VM_SET_FLAG(vm, SG_ERROR_UNBOUND);
1124 }
1125 Sg_SetPortReadTable(port, Sg_CopyReadTable(&r6rs_read_table));
1126 return desc;
1127 }
1128 if (ustrcmp(tag->value, "r7rs") == 0) {
1129 SgVM *vm = Sg_VM();
1130 if (ctx->flags & SG_CHANGE_VM_MODE) {
1131 SG_VM_SET_FLAG(vm, SG_R7RS_MODE);
1132 SG_VM_UNSET_FLAG(vm, SG_ALLOW_OVERWRITE);
1133 SG_VM_UNSET_FLAG(vm, SG_COMPATIBLE_MODE);
1134 SG_VM_UNSET_FLAG(vm, SG_R6RS_MODE);
1135 /* TODO should we? */
1136 SG_VM_UNSET_FLAG(vm, SG_ERROR_UNBOUND);
1137 }
1138 Sg_SetPortReadTable(port, Sg_CopyReadTable(&r7rs_read_table));
1139 return desc;
1140 }
1141 if (ustrncmp(tag->value, "reader=", 7) == 0) {
1142 SgObject name = construct_lib_name(Sg_Substring(tag, 7, -1));
1143 SgObject lib = Sg_FindLibrary(name, FALSE);
1144 /* should we raise error or not? */
1145 if (SG_FALSEP(lib)) {
1146 lexical_error(port, ctx, UC("no library named %S"), name);
1147 return desc;
1148 }
1149 if (!SG_FALSEP(SG_LIBRARY_READER(lib))) {
1150 SG_PORT_READER(port) = SG_LIBRARY_READER(lib);
1151 }
1152 /* to let replaced reader read next expression, otherwise current
1153 reader keep reading the next one.
1154 */
1155 return SG_UNDEF;
1156 }
1157 /* for portability with other implementation */
1158 if (ustrncmp(tag->value, "read-macro=", 11) == 0) {
1159 SgObject name = construct_lib_name(Sg_Substring(tag, 11, -1));
1160 SgObject lib = Sg_FindLibrary(name, FALSE);
1161 /* should we raise error or not? */
1162 if (SG_FALSEP(lib)) {
1163 lexical_error(port, ctx, UC("no library named %S"), name);
1164 return desc;
1165 }
1166 if (SG_LIBRARY_READTABLE(lib)) {
1167 ENSURE_COPIED_TABLE(port);
1168 add_read_table(SG_LIBRARY_READTABLE(lib), Sg_PortReadTable(port));
1169 }
1170 return desc;
1171 }
1172 break;
1173 case 'u':
1174 if (ustrncmp(tag->value, "unbound", 7) == 0) {
1175 SgVM *vm = Sg_VM();
1176 if (ctx->flags & SG_CHANGE_VM_MODE) {
1177 SG_VM_UNSET_FLAG(vm, SG_ERROR_UNBOUND);
1178 }
1179 return desc;
1180 }
1181 break;
1182 default: break;
1183 }
1184 return desc; /* for convenience */
1185 }
1186
1187 /*
1188 TODO: some SRFIs (as far as I know only 105) requires speciall sh-bang
1189 it's better to have hook or something.
1190 */
read_hash_bang(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1191 SgObject read_hash_bang(SgPort *port, SgChar c, dispmacro_param *param,
1192 SgReadContext *ctx)
1193 {
1194 SgChar c2 = Sg_GetcUnsafe(port);
1195 /* if it starts with '#!/' or '#! ' we assume it's a script */
1196 if (c2 == '/' || c2 == ' ') {
1197 for (;;) {
1198 c2 = Sg_GetcUnsafe(port);
1199 /* internal eol-style is LF */
1200 if (c2 == LF) return NULL;
1201 if (c2 == EOF) return SG_EOF;
1202 }
1203 /* NOTREACHED */
1204 } else {
1205 readtable_t *table = Sg_PortReadTable(port);
1206 SgObject desc = read_symbol_or_number(port, c2, table, ctx);
1207 if (SG_SYMBOLP(desc)) {
1208 Sg_ApplyDirective(port, desc, ctx);
1209 }
1210 return NULL;
1211 }
1212 }
read_hash_bar(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1213 SgObject read_hash_bar(SgPort *port, SgChar c, dispmacro_param *param,
1214 SgReadContext *ctx)
1215 {
1216 SgChar c1, c2;
1217 int nest = 0;
1218
1219 seek_c1:
1220 c1 = Sg_GetcUnsafe(port);
1221
1222 seek_c2:
1223 c2 = Sg_GetcUnsafe(port);
1224 if (c2 == EOF) {
1225 lexical_error(port, ctx,
1226 UC("unexpected end-of-file while reading comments"));
1227 }
1228 if (c1 == '|' && c2 == '#') {
1229 if (nest == 0) return NULL;
1230 nest -= 1;
1231 goto seek_c1;
1232 }
1233 if (c1 == '#' && c2 == '|') {
1234 nest += 1;
1235 goto seek_c1;
1236 }
1237 c1 = c2;
1238 if (c1 == '|' || c1 == '#') goto seek_c2;
1239 goto seek_c1;
1240 }
1241
read_bytevector(SgPort * port,SgChar * buf,SgReadContext * ctx)1242 static SgObject read_bytevector(SgPort *port, SgChar *buf, SgReadContext *ctx)
1243 {
1244 #define CAST_FIXNUM_TO_U8(DATUM, REF) \
1245 ((SG_INT_VALUE(datum) >= 0 && SG_INT_VALUE(datum) <= UINT8_MAX) \
1246 ? (*ref = (SG_INT_VALUE(datum) & 0xFF), TRUE) : FALSE)
1247 #define READ_BVECTOR(s_type, c_type, s_type_test, c_type_test) \
1248 do { \
1249 if (ustrcmp(buf, s_type) == 0) { \
1250 long m = (long)(n * sizeof(c_type)); \
1251 long i; \
1252 SgByteVector *bvector = Sg_MakeByteVector(m, 0); \
1253 for (i = 0; i < m; i += (long)sizeof(c_type)) { \
1254 SgObject datum = SG_CAR(lst); \
1255 if ( s_type_test (datum)) { \
1256 c_type * ref = (c_type *)&bvector->elements[i]; \
1257 if ( c_type_test (datum, ref)) { \
1258 lst = SG_CDR(lst); \
1259 continue; \
1260 } \
1261 } \
1262 lexical_error(port, ctx, UC("expected " s_type ", but got %S"), \
1263 SG_CAR(lst)); \
1264 } \
1265 bvector = Sg_AddConstantLiteral(bvector); \
1266 return bvector; \
1267 } \
1268 } while (0)
1269
1270 long line_begin = Sg_LineNo(port);
1271 long n;
1272 SgObject lst = read_list(port, ')', ctx);
1273 parsing_range(ctx, line_begin, Sg_LineNo(port));
1274 n = Sg_Length(lst);
1275 READ_BVECTOR("u8", uint8_t, SG_INTP, CAST_FIXNUM_TO_U8);
1276 lexical_error(port, ctx, UC("invalid lexical syntax #v%s ..."), buf);
1277 #undef CAST_FIXNUM_TO_U8
1278 #undef READ_BVECTOR
1279 return SG_UNDEF; /* dummy */
1280 }
1281
read_hash_v(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1282 SgObject read_hash_v(SgPort *port, SgChar c, dispmacro_param *param,
1283 SgReadContext *ctx)
1284 {
1285 SgChar buf[16] = {0};
1286 int i;
1287 for (i = 0; i < 2; i++) {
1288 buf[i] = Sg_GetcUnsafe(port);
1289 if (buf[i] == EOF || delimited(port, buf[i])) break;
1290 }
1291 if (i != 2) {
1292 lexical_error(port, ctx, UC("invalid lexical syntax #v%s%A ..."), buf,
1293 SG_MAKE_CHAR(c));
1294 }
1295 c = Sg_GetcUnsafe(port);
1296 if (c == '(') {
1297 return read_bytevector(port, buf, ctx);
1298 }
1299 lexical_error(port, ctx, UC("invalid lexical syntax #v%s%A ..."), buf,
1300 SG_MAKE_CHAR(c));
1301 return SG_UNDEF; /* dummy */
1302 }
1303
read_hash_u(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1304 SgObject read_hash_u(SgPort *port, SgChar c, dispmacro_param *param,
1305 SgReadContext *ctx)
1306 {
1307 SgChar buf[16] = {0};
1308 buf[0] = 'u';
1309 buf[1] = Sg_GetcUnsafe(port);
1310 if (buf[1] != '8') {
1311 lexical_error(port, ctx, UC("invalid lexical syntax #%s ..."), buf);
1312 }
1313 c = Sg_GetcUnsafe(port);
1314 if (c == '(') {
1315 return read_bytevector(port, buf, ctx);
1316 }
1317 lexical_error(port, ctx, UC("invalid lexical syntax #%s%A ..."), buf,
1318 SG_MAKE_CHAR(c));
1319 return SG_UNDEF; /* dummy */
1320 }
1321
read_hash_t(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1322 SgObject read_hash_t(SgPort *port, SgChar c, dispmacro_param *param,
1323 SgReadContext *ctx)
1324 {
1325 SgChar c2 = Sg_GetcUnsafe(port);
1326 readtable_t *table = Sg_PortReadTable(port);
1327 if (c2 == EOF || delimited(port, c2)) {
1328 Sg_UngetcUnsafe(port, c2);
1329 return SG_TRUE;
1330 }
1331 /* R7RS allow #true so we need to check */
1332 if ((c == 't' && c2 == 'r') ||
1333 (table->insensitiveP && (c2 == 'r' || c2 == 'R'))) {
1334 SgObject rest = read_compatible_symbol(port, c2, ctx, NULL);
1335 if (ustrcmp(SG_STRING_VALUE(rest), "rue") == 0) {
1336 return SG_TRUE;
1337 }
1338 }
1339 lexical_error(port, ctx, UC("invalid lexical syntax #%S%S"),
1340 SG_MAKE_CHAR(c), SG_MAKE_CHAR(c2));
1341 return SG_UNDEF; /* dummy */
1342 }
1343
read_hash_f(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1344 SgObject read_hash_f(SgPort *port, SgChar c, dispmacro_param *param,
1345 SgReadContext *ctx)
1346 {
1347 SgChar c2 = Sg_GetcUnsafe(port);
1348 readtable_t *table = Sg_PortReadTable(port);
1349 if (c2 == EOF || delimited(port, c2)) {
1350 Sg_UngetcUnsafe(port, c2);
1351 return SG_FALSE;
1352 }
1353 /* R7RS allow #false so we need to check */
1354 if ((c == 'f' && c2 == 'a') ||
1355 (table->insensitiveP && (c2 == 'a' || c2 == 'A'))) {
1356 SgObject rest = read_compatible_symbol(port, c2, ctx, NULL);
1357 if (ustrcmp(SG_STRING_VALUE(rest), "alse") == 0) {
1358 return SG_FALSE;
1359 }
1360 }
1361 lexical_error(port, ctx, UC("invalid lexical syntax #%S%S"),
1362 SG_MAKE_CHAR(c), SG_MAKE_CHAR(c2));
1363 return SG_UNDEF; /* dummy */
1364 }
1365
read_prefixed_number(SgChar initial,SgPort * port,SgReadContext * ctx)1366 static SgObject read_prefixed_number(SgChar initial, SgPort *port,
1367 SgReadContext *ctx)
1368 {
1369 SgChar buf[4096];
1370 SgString *str;
1371 SgObject num;
1372 SgChar c;
1373 int offset = 0;
1374 buf[offset++] = '#';
1375 /* check if it's #%c#%c type or not */
1376 c = Sg_PeekcUnsafe(port);
1377 if (c == '#') {
1378 buf[offset++] = initial;
1379 buf[offset++] = c;
1380 Sg_GetcUnsafe(port); /* discard */
1381 initial = Sg_GetcUnsafe(port);
1382 }
1383 read_thing(port, ctx, buf + offset, array_sizeof(buf) - offset, initial);
1384 str = Sg_HeapString(buf);
1385 num = Sg_StringToNumber(str, 10, TRUE);
1386 if (SG_FALSEP(num)) {
1387 lexical_error(port, ctx,
1388 UC("invalid lexical syntax %s while reading number"), buf);
1389 }
1390 return num;
1391 }
1392
read_hash_b(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1393 SgObject read_hash_b(SgPort *port, SgChar c, dispmacro_param *param,
1394 SgReadContext *ctx)
1395 {
1396 return read_prefixed_number(c, port, ctx);
1397 }
1398
read_hash_o(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1399 SgObject read_hash_o(SgPort *port, SgChar c, dispmacro_param *param,
1400 SgReadContext *ctx)
1401 {
1402 return read_prefixed_number(c, port, ctx);
1403 }
read_hash_d(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1404 SgObject read_hash_d(SgPort *port, SgChar c, dispmacro_param *param,
1405 SgReadContext *ctx)
1406 {
1407 return read_prefixed_number(c, port, ctx);
1408 }
read_hash_x(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1409 SgObject read_hash_x(SgPort *port, SgChar c, dispmacro_param *param,
1410 SgReadContext *ctx)
1411 {
1412 return read_prefixed_number(c, port, ctx);
1413 }
read_hash_i(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1414 SgObject read_hash_i(SgPort *port, SgChar c, dispmacro_param *param,
1415 SgReadContext *ctx)
1416 {
1417 return read_prefixed_number(c, port, ctx);
1418 }
read_hash_e(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1419 SgObject read_hash_e(SgPort *port, SgChar c, dispmacro_param *param,
1420 SgReadContext *ctx)
1421 {
1422 return read_prefixed_number(c, port, ctx);
1423 }
read_hash_open_paren(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1424 SgObject read_hash_open_paren(SgPort *port, SgChar c, dispmacro_param *param,
1425 SgReadContext *ctx)
1426 {
1427 SgObject v = Sg_ListToVector(read_list(port, ')', ctx), 0, -1);
1428 v = Sg_AddConstantLiteral(v);
1429 return v;
1430 }
1431
read_hash_semicolon(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1432 SgObject read_hash_semicolon(SgPort *port, SgChar c,
1433 dispmacro_param *param, SgReadContext *ctx)
1434 {
1435 read_expr(port, ctx);
1436 return NULL;
1437 }
1438
1439 static const struct {
1440 const char* name;
1441 int code;
1442 } s_char_name[] = {
1443 { "nul", 0x0000 },
1444 { "null", 0x0000 },
1445 { "alarm", 0x0007 },
1446 { "backspace", 0x0008 },
1447 { "tab", 0x0009 },
1448 { "linefeed", 0x000A },
1449 { "newline", 0x000A },
1450 { "vtab", 0x000B },
1451 { "page", 0x000C },
1452 { "return", 0x000D },
1453 { "esc", 0x001B },
1454 { "escape", 0x001B },
1455 { "space", 0x0020 },
1456 { "delete", 0x007F }
1457 };
1458
read_hash_escape(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1459 SgObject read_hash_escape(SgPort *port, SgChar c, dispmacro_param *param,
1460 SgReadContext *ctx)
1461 {
1462 c = Sg_GetcUnsafe(port);
1463 if (c == 'x') {
1464 c = Sg_PeekcUnsafe(port);
1465 if (c == EOF || delimited(port, c)) return SG_MAKE_CHAR('x');
1466 return SG_MAKE_CHAR(read_hex_scalar_value(port, ctx));
1467 } else {
1468 SgChar buf[16];
1469 int i;
1470 if (c == '(') {
1471 c = Sg_PeekcUnsafe(port);
1472 if (c == EOF || delimited(port, c)) return SG_MAKE_CHAR('(');
1473 read_thing(port, ctx, buf, array_sizeof(buf), -1);
1474 lexical_error(port, ctx, UC("invalid lexical syntax #\\(%s"), buf);
1475 }
1476 Sg_UngetcUnsafe(port, c);
1477 read_thing(port, ctx, buf, array_sizeof(buf), -1);
1478 if (buf[0] == 0) {
1479 c = Sg_GetcUnsafe(port);
1480 if (c == EOF) {
1481 lexical_error(port, ctx,
1482 UC("unexpected end-of-file while reading character"));
1483 }
1484 return SG_MAKE_CHAR(c);
1485 }
1486 if (buf[1] == 0) return SG_MAKE_CHAR(buf[0]);
1487 for (i = 0; i < array_sizeof(s_char_name); i++) {
1488 if (ustrcmp(buf, s_char_name[i].name) == 0)
1489 return SG_MAKE_CHAR(s_char_name[i].code);
1490 }
1491 /* I assume this is not happen */
1492 lexical_error(port, ctx, UC("invalid lexical syntax #\\%s"), buf);
1493 }
1494 return SG_UNDEF; /* dummy */
1495 }
1496
read_hash_equal(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1497 SgObject read_hash_equal(SgPort *port, SgChar c, dispmacro_param *param,
1498 SgReadContext *ctx)
1499 {
1500 if (!ctx->graph) {
1501 lexical_error(port, ctx, UC("invalid lexical syntax #="));
1502 }
1503 if (param->present) {
1504 SgObject obj = read_expr(port, ctx);
1505 intptr_t mark = param->value;
1506 if (SG_EOFP(obj)) {
1507 lexical_error(port, ctx,
1508 UC("unexpected end-of-file while reading tag #%ld="), mark);
1509 }
1510 if (SG_UNDEFP(Sg_HashTableRef(ctx->graph, SG_MAKE_INT(mark), SG_UNDEF))) {
1511 Sg_HashTableSet(ctx->graph, SG_MAKE_INT(mark), obj, 0);
1512 return obj;
1513 }
1514 lexical_error(port, ctx, UC("duplicate tag #%ld="), mark);
1515 } else {
1516 lexical_error(port, ctx,
1517 UC("invalid lexical syntax %S"), SG_MAKE_CHAR(c));
1518 }
1519 return SG_UNDEF; /* dummy */
1520 }
1521
read_hash_hash(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1522 SgObject read_hash_hash(SgPort *port, SgChar c, dispmacro_param *param,
1523 SgReadContext *ctx)
1524 {
1525 if (param->present) {
1526 long mark = param->value;
1527 SgSharedRef *ref = make_shared_ref(mark);
1528 ctx->graphRef = TRUE;
1529 return SG_OBJ(ref);
1530 } else {
1531 lexical_error(port, ctx,
1532 UC("invalid lexical syntax %S"), SG_MAKE_CHAR(c));
1533 }
1534 return SG_UNDEF; /* dummy */
1535 }
1536
read_hash_less(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1537 SgObject read_hash_less(SgPort *port, SgChar c, dispmacro_param *param,
1538 SgReadContext *ctx)
1539 {
1540 /* #<(library) ...> imports readtables */
1541 SgObject libs = read_list(port, '>', ctx);
1542 SgObject cp;
1543 SG_FOR_EACH(cp, libs) {
1544 SgObject name = SG_CAR(cp);
1545 if (SG_PAIRP(name)) {
1546 SgObject lib = Sg_FindLibrary(name, FALSE);
1547 if (SG_FALSEP(lib)) {
1548 lexical_error(port, ctx, UC("no library named %S"), name);
1549 return NULL;
1550 }
1551 if (SG_LIBRARY_READTABLE(lib)) {
1552 ENSURE_COPIED_TABLE(port);
1553 add_read_table(SG_LIBRARY_READTABLE(lib), Sg_PortReadTable(port));
1554 }
1555 } else {
1556 lexical_error(port, ctx,
1557 UC("library name required but got %S"), name);
1558 return NULL;
1559 }
1560 }
1561 return NULL;
1562 }
1563
read_hash_colon(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1564 SgObject read_hash_colon(SgPort *port, SgChar c, dispmacro_param *param,
1565 SgReadContext *ctx)
1566 {
1567 /* TODO how to handle |? */
1568 SgString *s = read_compatible_symbol(port, -1, ctx, NULL);
1569 return Sg_MakeSymbol(s, FALSE);
1570 }
1571
dispmacro_reader(SgPort * port,SgChar c,SgReadContext * ctx)1572 SgObject dispmacro_reader(SgPort *port, SgChar c, SgReadContext *ctx)
1573 {
1574 readtable_t *table;
1575 disptab_t *disptab;
1576 if (c >= MAX_READTABLE_CHAR) {
1577 lexical_error(port, ctx, UC("macro char %S is out of range"),
1578 SG_MAKE_CHAR(c));
1579 }
1580 table = Sg_PortReadTable(port);
1581 disptab = table->readtable[c].disp;
1582 if (!disptab) {
1583 lexical_error(port, ctx,
1584 UC("%S is not a dispatch macro character"), SG_MAKE_CHAR(c));
1585 } else {
1586 dispmacro_param param;
1587 SgChar c2 = Sg_GetcUnsafe(port);
1588 if (c2 >= '0' && c2 <= '9') {
1589 param.present = TRUE;
1590 param.value = c2 - '0';
1591 while (1) {
1592 c2 = Sg_GetcUnsafe(port);
1593 if (c2 < '0' || c2 > '9') break;
1594 param.value = param.value * 10 + c2 - '0';
1595 if (param.value < 0 || (long)param.value > SG_INT_MAX) {
1596 lexical_error(port, ctx,
1597 UC("invalid object tag, value out of range"));
1598 }
1599 }
1600 } else {
1601 param.present = FALSE;
1602 }
1603 if (c2 == EOF) {
1604 lexical_error(port, ctx, UC("imcoplete dispatch macro"));
1605 }
1606 if (c2 >= MAX_READTABLE_CHAR) {
1607 lexical_error(port, ctx, UC("macro sub char %S is out of range"),
1608 SG_MAKE_CHAR(c2));
1609 }
1610 if (disptab[c2].cfunc) {
1611 return (*disptab[c2].cfunc)(port, c2, ¶m, ctx);
1612 }
1613 if (SG_EQ(disptab[c2].sfunc, SG_UNBOUND)) {
1614 lexical_error(port, ctx, UC("%S is not a dispatch macro sub character"),
1615 SG_MAKE_CHAR(c2));
1616 }
1617 /* FIXME: we should eliminate this type of dispatch. */
1618 if (SG_PROCEDURE_REQUIRED(disptab[c2].sfunc) > 3) {
1619 return Sg_Apply4(disptab[c2].sfunc, port, SG_MAKE_CHAR(c2),
1620 param.present ? SG_MAKE_INT(param.value) : SG_FALSE,
1621 ctx);
1622 } else {
1623 return Sg_Apply3(disptab[c2].sfunc, port, SG_MAKE_CHAR(c2),
1624 param.present ? SG_MAKE_INT(param.value) : SG_FALSE);
1625 }
1626 }
1627 return SG_UNDEF; /* dummy */
1628 }
1629
read_expr4(SgPort * port,int flags,SgChar delim,SgReadContext * ctx)1630 SgObject read_expr4(SgPort *port, int flags, SgChar delim, SgReadContext *ctx)
1631 {
1632 SgChar c;
1633 readtable_t *table;
1634 SgObject item;
1635 SgVM *vm = Sg_VM();
1636 while (1) {
1637 top:
1638 /* when previous execution was (values), then valuesCount = 0 and
1639 this skips all read expression. to prevent that we need to
1640 set it 1 here.
1641 this call must return a value anyway :)
1642 */
1643 vm->valuesCount = 1;
1644 c = Sg_GetcUnsafe(port);
1645 /* we ignore unicode space for now. */
1646 if (c == EOF) {
1647 if (flags & ACCEPT_EOF)
1648 return SG_EOF;
1649 lexical_error(port, ctx, UC("unexpected end-of-file"));
1650 }
1651 if (c > 127 && Sg_Ucs4WhiteSpaceP(c)) goto top;
1652 /* for reading list it does not matter either ')' or ']'
1653 if we got delmiter we can simply return RPAREN*/
1654 if (c == delim) return SG_SYMBOL_RPAREN;
1655
1656 parsing_line(ctx, Sg_LineNo(port));
1657 /* need to be re-get for #!read-macro */
1658 table = Sg_PortReadTable(port);
1659 if (c < 128 && isdigit(c)) {
1660 return read_symbol_or_number(port, c, table, ctx);
1661 }
1662
1663 if (c < MAX_READTABLE_CHAR) {
1664 /* lookup readtable */
1665 switch (table->readtable[c].type) {
1666 case CT_WHITE_SPACE: goto top;
1667 case CT_SINGLE_ESCAPE:
1668 /* TODO how to treat? */
1669 Sg_UngetcUnsafe(port, c);
1670 return read_escaped_symbol(port, ctx);
1671 case CT_TERM_MACRO:
1672 case CT_NON_TERM_MACRO: {
1673 SgObject o = macro_reader(port, c, table->readtable, ctx);
1674 /* if the (values) is the result of reader then we ignore the result */
1675 if (o && vm->valuesCount) return o;
1676 break;
1677 }
1678 case CT_ILLEGAL:
1679 lexical_error(port, ctx,
1680 UC("invalid character %U during reading identifier"), c);
1681 break;
1682 default:
1683 goto read_sym_or_num;
1684 }
1685 } else {
1686 goto read_sym_or_num;
1687 }
1688 }
1689 read_sym_or_num:
1690 item = read_symbol_or_number(port, c, table, ctx);
1691 if (!ctx->escapedp && SG_EQ(item, SG_SYMBOL_DOT)) {
1692 if (flags & ACCEPT_DOT) return item;
1693 lexical_error(port, ctx, UC("misplaced dot('.')"));
1694 }
1695 return item;
1696 }
lookup_graph(SgPort * port,SgReadContext * ctx,SgSharedRef * ref)1697 static SgObject lookup_graph(SgPort *port, SgReadContext *ctx,
1698 SgSharedRef *ref)
1699 {
1700 SgObject obj = Sg_HashTableRef(ctx->graph, ref->index, SG_UNDEF);
1701 if (SG_SHAREDREF_P(obj)) return lookup_graph(port, ctx, SG_SHAREDREF(obj));
1702 if (obj != SG_UNDEF) return obj;
1703 lexical_error(port, ctx, UC("attempt to reference undefined tag #%A#"),
1704 ref->index);
1705 return SG_UNDEF; /* dummy */
1706 }
1707
link_graph(SgPort * port,SgReadContext * ctx,SgObject obj)1708 static void link_graph(SgPort *port, SgReadContext *ctx, SgObject obj)
1709 {
1710 if (SG_PAIRP(obj)) {
1711 if (SG_SHAREDREF_P(SG_CAR(obj))) {
1712 SG_SET_CAR(obj, lookup_graph(port, ctx, SG_SHAREDREF(SG_CAR(obj))));
1713 } else {
1714 link_graph(port, ctx, SG_CAR(obj));
1715 }
1716 if (SG_SHAREDREF_P(SG_CDR(obj))) {
1717 SG_SET_CDR(obj, lookup_graph(port, ctx, SG_SHAREDREF(SG_CDR(obj))));
1718 } else {
1719 link_graph(port, ctx, SG_CDR(obj));
1720 }
1721 return;
1722 }
1723 if (SG_VECTORP(obj)) {
1724 long n = SG_VECTOR_SIZE(obj), i;
1725 for (i = 0; i < n; i++) {
1726 if (SG_SHAREDREF_P(SG_VECTOR_ELEMENT(obj, i))) {
1727 SG_VECTOR_ELEMENT(obj, i)
1728 = lookup_graph(port, ctx, SG_SHAREDREF(SG_VECTOR_ELEMENT(obj, i)));
1729 } else {
1730 link_graph(port, ctx, SG_VECTOR_ELEMENT(obj, i));
1731 }
1732 }
1733 return;
1734 }
1735 }
1736
Sg_ReadWithContext(SgObject port,SgReadContext * ctx)1737 SgObject Sg_ReadWithContext(SgObject port, SgReadContext *ctx)
1738 {
1739 SgObject obj;
1740 /* extends_loading_table(port); */
1741 if (ctx->graph) {
1742 /* clear it */
1743 Sg_HashCoreClear(SG_HASHTABLE_CORE(ctx->graph), 0);
1744 }
1745 /* we only set if the flag is explicitly set.
1746 this makes #!fold-case one time only. */
1747 if (ctx->flags & SG_READ_NO_CASE ||
1748 ctx->flags & SG_READ_CASE) {
1749 ENSURE_COPIED_TABLE(port);
1750 /* one or the other */
1751 SG_PORT_READTABLE(port)->insensitiveP = (ctx->flags & SG_READ_NO_CASE);
1752 }
1753
1754 ctx->firstLine = Sg_LineNo(port);
1755 obj = read_expr4(port, ACCEPT_EOF, EOF, ctx);
1756 if (!ctx->escapedp && SG_EQ(obj, SG_SYMBOL_DOT)) {
1757 lexical_error(port, ctx, UC("misplaced dot('.')"));
1758 }
1759 if (ctx->graph && ctx->graphRef) link_graph(port, ctx, obj);
1760 parsing_range(ctx, ctx->firstLine, Sg_LineNo(port));
1761 return obj;
1762 }
1763
Sg_Read(SgObject port,int readSharedObject)1764 SgObject Sg_Read(SgObject port, int readSharedObject)
1765 {
1766 SgReadContext ctx = SG_STATIC_READ_CONTEXT;
1767 ASSERT(SG_PORTP(port));
1768 /* ASSERT(SG_TEXTUAL_PORTP(port)); */
1769 /* make read context for shared object */
1770 if (readSharedObject) {
1771 SgHashTable graph;
1772 Sg_InitHashTableSimple(&graph, SG_HASH_EQ, 1);
1773 ctx.graph = &graph;
1774 }
1775 ctx.graphRef = FALSE;
1776 ctx.flags = SG_READ_SOURCE_INFO;
1777 return Sg_ReadWithContext(SG_PORT(port), &ctx);
1778 }
1779
Sg_ReadDelimitedList(SgObject port,SgChar delim,int sharedP)1780 SgObject Sg_ReadDelimitedList(SgObject port, SgChar delim, int sharedP)
1781 {
1782 SgObject obj;
1783 SgReadContext ctx = SG_STATIC_READ_CONTEXT;
1784 ASSERT(SG_PORTP(port));
1785
1786 /* extends_loading_table(port); */
1787 /* make read context for shared object */
1788 if (sharedP) {
1789 SgHashTable graph;
1790 Sg_InitHashTableSimple(&graph, SG_HASH_EQ, 1);
1791 ctx.graph = &graph;
1792 }
1793 ctx.graphRef = FALSE;
1794 ctx.firstLine = Sg_LineNo(port);
1795 obj = read_list(port, delim, &ctx);
1796 if (ctx.graph && ctx.graphRef) link_graph(port, &ctx, obj);
1797
1798 return obj;
1799 }
1800
1801 static disptab_t* alloc_disptab();
1802
make_readtable(int init)1803 static readtable_t* make_readtable(int init)
1804 {
1805 readtable_t *tab = SG_NEW(readtable_t);
1806 tab->insensitiveP = FALSE;
1807 if (init) {
1808 readtab_t *r = tab->readtable;
1809 int i;
1810 for (i = 0; i <= ' '; i++) {
1811 r[i].type = CT_ILLEGAL;
1812 r[i].cfunc = NULL;
1813 r[i].sfunc = SG_UNBOUND;
1814 r[i].disp = NULL;
1815 }
1816 for (; i < MAX_READTABLE_CHAR; i++) {
1817 r[i].type = CT_CONSTITUENT;
1818 r[i].cfunc = NULL;
1819 r[i].sfunc = SG_UNBOUND;
1820 r[i].disp = NULL;
1821 }
1822 }
1823 return tab;
1824 }
1825
add_read_table(readtable_t * src,readtable_t * dst)1826 void add_read_table(readtable_t *src, readtable_t *dst)
1827 {
1828 int i;
1829 readtab_t *sr = src->readtable;
1830 readtab_t *dr = dst->readtable;
1831 for (i = 0; i < MAX_READTABLE_CHAR; i++) {
1832 if (sr[i].disp) {
1833 int j;
1834 disptab_t *d = dr[i].disp;
1835 /* TODO should not be overwritten? */
1836 dr[i] = sr[i];
1837 if (!d) dr[i].disp = alloc_disptab();
1838 else dr[i].disp = d;
1839
1840 for (j = 0; j < MAX_READTABLE_CHAR; j++) {
1841 if (!SG_UNBOUNDP(sr[i].disp[j].sfunc) || sr[i].disp[j].cfunc) {
1842 dr[i].disp[j] = sr[i].disp[j];
1843 }
1844 }
1845 } else if (sr[i].cfunc) {
1846 dr[i] = sr[i];
1847 } else if (!SG_UNBOUNDP(sr[i].sfunc)) {
1848 dr[i] = sr[i];
1849 }
1850 }
1851 }
1852
default_readtable(int copyP)1853 static readtable_t* default_readtable(int copyP)
1854 {
1855 readtable_t* table;
1856 if (SG_VM_IS_SET_FLAG(Sg_VM(), SG_R6RS_MODE)) {
1857 table = &r6rs_read_table;
1858 } else if (SG_VM_IS_SET_FLAG(Sg_VM(), SG_R7RS_MODE)) {
1859 table = &r7rs_read_table;
1860 } else {
1861 table = &compat_read_table;
1862 }
1863 if (copyP) {
1864 return Sg_CopyReadTable(table);
1865 } else {
1866 return table;
1867 }
1868 }
1869
Sg_DefaultReadTable()1870 readtable_t* Sg_DefaultReadTable()
1871 {
1872 return default_readtable(TRUE);
1873 }
1874
Sg_SetPortReadTable(SgPort * port,readtable_t * table)1875 void Sg_SetPortReadTable(SgPort *port, readtable_t *table)
1876 {
1877 SG_PORT_READTABLE(port) = table;
1878 }
1879
Sg_PortReadTable(SgPort * port)1880 readtable_t* Sg_PortReadTable(SgPort *port)
1881 {
1882 readtable_t* table = SG_PORT_READTABLE(port);
1883 if (table) return table;
1884 return default_readtable(FALSE);
1885 }
1886
Sg_EnsureCopiedReadTable(SgPort * port)1887 readtable_t* Sg_EnsureCopiedReadTable(SgPort *port)
1888 {
1889 ENSURE_COPIED_TABLE(port);
1890 return SG_PORT_READTABLE(port);
1891 }
1892
Sg_CopyReadTable(readtable_t * src)1893 readtable_t* Sg_CopyReadTable(readtable_t *src)
1894 {
1895 readtable_t *newr = make_readtable(FALSE);
1896 readtab_t *newt = newr->readtable, *oldt = src->readtable;
1897 int i;
1898 *newr = *src;
1899 for (i = 0; i < MAX_READTABLE_CHAR; i++) {
1900 if (oldt[i].disp) {
1901 newt[i].disp = alloc_disptab();
1902 memcpy(newt[i].disp, oldt[i].disp,
1903 sizeof(*(newt[i].disp)) * MAX_READTABLE_CHAR);
1904 }
1905 }
1906 return newr;
1907 }
Sg_PortCaseInsensitiveP(SgPort * port)1908 int Sg_PortCaseInsensitiveP(SgPort *port)
1909 {
1910 if (SG_PORT_READTABLE(port)) {
1911 return SG_PORT_READTABLE(port)->insensitiveP;
1912 }
1913 return FALSE;
1914 }
1915
1916 static SgInternalMutex obtable_mutax;
1917 static SgHashTable *obtable = NULL;
1918
Sg_ConstantLiteralP(SgObject o)1919 int Sg_ConstantLiteralP(SgObject o)
1920 {
1921 SgObject e;
1922 if (SG_PAIRP(o)) {
1923 /* simple check */
1924 return !SG_FALSEP(Sg_GetPairAnnotation(o, SYM_CONST));
1925 } else if (SG_VECTORP(o)) {
1926 /* again simple check */
1927 return SG_LITERAL_VECTORP(o);
1928 }
1929 e = Sg_HashTableRef(obtable, o, SG_UNBOUND);
1930 if (SG_UNBOUNDP(e)) return FALSE;
1931 /* constant literal must satisfy eq? */
1932 return e == o;
1933 }
1934
Sg_AddConstantLiteral(SgObject o)1935 SgObject Sg_AddConstantLiteral(SgObject o)
1936 {
1937 SgObject e;
1938 Sg_LockMutex(&obtable_mutax);
1939 e = Sg_HashTableRef(obtable, o, SG_UNBOUND);
1940 if (SG_UNBOUNDP(e)) {
1941 Sg_HashTableSet(obtable, o, o, SG_HASH_NO_OVERWRITE);
1942 /* TODO after CLOS, we should not use header bits */
1943 if (SG_VECTORP(o)) {
1944 SG_VECTOR_SET_LITERAL(o);
1945 }
1946 if (SG_BVECTORP(o)) {
1947 SG_BVECTOR_SET_LITERAL(o);
1948 }
1949 if (SG_PAIRP(o)) {
1950 /* do the cdr parts. */
1951 Sg_SetPairAnnotation(o, SYM_CONST, SG_TRUE);
1952 if (SG_PAIRP(SG_CAR(o))) {
1953 SG_SET_CAR(o, Sg_AddConstantLiteral(SG_CAR(o)));
1954 }
1955 if (SG_PAIRP(SG_CDR(o))) {
1956 SG_SET_CDR(o, Sg_AddConstantLiteral(SG_CDR(o)));
1957 }
1958 }
1959 } else {
1960 o = e;
1961 }
1962 Sg_UnlockMutex(&obtable_mutax);
1963 return o;
1964 }
1965
Sg_DelimitedCharP(SgChar c,SgPort * p)1966 int Sg_DelimitedCharP(SgChar c, SgPort *p)
1967 {
1968 return delimited(p, c);
1969 }
1970
1971 #define SCHEME_OBJ(NAME) SG_CPP_CAT(NAME, _stub)
1972 #define STUB_NAME(NAME) SG_CPP_CAT(NAME, stub)
1973
1974 /* initialize */
1975 #define DEFINE_MACRO_STUB(FN, NAME) \
1976 static SgObject STUB_NAME(FN) (SgObject *args, int argc, void *data_) \
1977 { \
1978 SgReadContext ctx = SG_STATIC_READ_CONTEXT; \
1979 SgPort *p; \
1980 SgChar c; \
1981 SgObject r; \
1982 if (argc != 2) { \
1983 Sg_WrongNumberOfArgumentsAtLeastViolation(SG_INTERN(NAME), \
1984 2, argc, SG_NIL); \
1985 } \
1986 if (!SG_PORTP(args[0])) { \
1987 Sg_WrongTypeOfArgumentViolation(SG_INTERN(NAME), \
1988 SG_MAKE_STRING("port"), \
1989 args[0], SG_NIL); \
1990 } \
1991 if (!SG_CHARP(args[1])) { \
1992 Sg_WrongTypeOfArgumentViolation(SG_INTERN(NAME), \
1993 SG_MAKE_STRING("char"), \
1994 args[1], SG_NIL); \
1995 } \
1996 p = SG_PORT(args[0]); \
1997 c = SG_CHAR_VALUE(args[1]); \
1998 r = (FN)(p, c, &ctx); \
1999 if (r) return r; \
2000 else return SG_UNDEF; \
2001 } \
2002 SG_DEFINE_SUBR(SCHEME_OBJ(FN), 2, 0, STUB_NAME(FN), SG_FALSE, NULL)
2003
2004 #define DEFINE_DISPMACRO_STUB(FN, NAME) \
2005 static SgObject STUB_NAME(FN) \
2006 (SgObject *args, int argc, void *data_) \
2007 { \
2008 SgReadContext ctx = SG_STATIC_READ_CONTEXT; \
2009 SgObject param_scm, r; \
2010 SgPort *p; \
2011 SgChar c; \
2012 dispmacro_param param; \
2013 if (argc != 3) { \
2014 Sg_WrongNumberOfArgumentsAtLeastViolation(SG_INTERN(NAME),\
2015 3, argc, SG_NIL);\
2016 } \
2017 if (!SG_PORTP(args[0])) { \
2018 Sg_WrongTypeOfArgumentViolation(SG_INTERN(NAME), \
2019 SG_MAKE_STRING("port"), \
2020 args[0], SG_NIL); \
2021 } \
2022 if (!SG_CHARP(args[1])) { \
2023 Sg_WrongTypeOfArgumentViolation(SG_INTERN(NAME), \
2024 SG_MAKE_STRING("char"), \
2025 args[1], SG_NIL); \
2026 } \
2027 p = SG_PORT(args[0]); \
2028 c = SG_CHAR_VALUE(args[1]); \
2029 param_scm = args[2]; \
2030 if (SG_FALSEP(param_scm)) { \
2031 param.present = FALSE; \
2032 param.value = 0; \
2033 } else if (SG_INTP(param_scm)) { \
2034 param.present = TRUE; \
2035 param.value = SG_INT_VALUE(param_scm); \
2036 } else { \
2037 Sg_WrongTypeOfArgumentViolation(SG_INTERN(NAME), \
2038 SG_MAKE_STRING("fixnum"), \
2039 param_scm, SG_NIL); \
2040 return SG_UNDEF; \
2041 } \
2042 r = (FN)(p, c, ¶m, &ctx); \
2043 if (r) return r; \
2044 else return SG_UNDEF; \
2045 } \
2046 SG_DEFINE_SUBR(SCHEME_OBJ(FN), 3, 0, STUB_NAME(FN), SG_FALSE, NULL)
2047
2048 DEFINE_MACRO_STUB(read_vertical_bar, "|-reader");
2049 DEFINE_MACRO_STUB(read_double_quote, "\"-reader");
2050 DEFINE_MACRO_STUB(read_quote, "'-reader");
2051 DEFINE_MACRO_STUB(read_open_paren, "(-reader");
2052 DEFINE_MACRO_STUB(read_close_paren, ")-reader");
2053 DEFINE_MACRO_STUB(read_open_bracket, "[-reader");
2054 DEFINE_MACRO_STUB(read_close_bracket, "]-reader");
2055 DEFINE_MACRO_STUB(read_semicolon, ";-reader");
2056 DEFINE_MACRO_STUB(read_quasiquote, "`-reader");
2057 DEFINE_MACRO_STUB(read_unquote, ",-reader");
2058 DEFINE_MACRO_STUB(read_colon, ":-reader");
2059 DEFINE_MACRO_STUB(dispmacro_reader, "dispatch-macro-reader");
2060
2061 DEFINE_DISPMACRO_STUB(read_hash_quote, "#'-reader");
2062 DEFINE_DISPMACRO_STUB(read_hash_quasiquote, "#`-reader");
2063 DEFINE_DISPMACRO_STUB(read_hash_unquote, "#,-reader");
2064 DEFINE_DISPMACRO_STUB(read_hash_bang, "#!-reader");
2065 DEFINE_DISPMACRO_STUB(read_hash_v, "#v-reader");
2066 DEFINE_DISPMACRO_STUB(read_hash_u, "#u-reader");
2067 DEFINE_DISPMACRO_STUB(read_hash_t, "#t-reader");
2068 DEFINE_DISPMACRO_STUB(read_hash_f, "#f-reader");
2069 DEFINE_DISPMACRO_STUB(read_hash_b, "#b-reader");
2070 DEFINE_DISPMACRO_STUB(read_hash_o, "#o-reader");
2071 DEFINE_DISPMACRO_STUB(read_hash_d, "#d-reader");
2072 DEFINE_DISPMACRO_STUB(read_hash_x, "#x-reader");
2073 DEFINE_DISPMACRO_STUB(read_hash_i, "#i-reader");
2074 DEFINE_DISPMACRO_STUB(read_hash_e, "#e-reader");
2075 DEFINE_DISPMACRO_STUB(read_hash_open_paren, "#(-reader");
2076 DEFINE_DISPMACRO_STUB(read_hash_semicolon, "#;-reader");
2077 DEFINE_DISPMACRO_STUB(read_hash_bar, "#|-reader");
2078 DEFINE_DISPMACRO_STUB(read_hash_escape, "#\\-reader");
2079 DEFINE_DISPMACRO_STUB(read_hash_equal, "#=-reader");
2080 DEFINE_DISPMACRO_STUB(read_hash_hash, "##-reader");
2081 DEFINE_DISPMACRO_STUB(read_hash_less, "#<-reader");
2082 DEFINE_DISPMACRO_STUB(read_hash_colon, "#:-reader");
2083
2084
Sg_GetMacroCharacter(SgChar c,readtable_t * table)2085 SgObject Sg_GetMacroCharacter(SgChar c, readtable_t *table)
2086 {
2087 ASSERT(table);
2088 if (c < MAX_READTABLE_CHAR) {
2089 readtab_t *r = &table->readtable[c];
2090 SgObject term;
2091 if (r->type == CT_NON_TERM_MACRO) term = SG_TRUE;
2092 else if (r->type == CT_TERM_MACRO) term = SG_FALSE;
2093 else return Sg_Values2(SG_FALSE, SG_FALSE);
2094 return Sg_Values2(SG_UNBOUNDP(r->sfunc) ? SG_UNBOUND : r->sfunc, term);
2095 }
2096 return Sg_Values2(SG_FALSE, SG_FALSE);
2097 }
2098
2099 #define macro_function_item(name) { (name), SG_OBJ(&(SCHEME_OBJ(name))) }
get_macro_function(SgObject fn)2100 static macro_function get_macro_function(SgObject fn)
2101 {
2102 static const struct {
2103 macro_function f;
2104 SgObject s;
2105 } x[] = {
2106 macro_function_item(read_vertical_bar),
2107 macro_function_item(read_double_quote),
2108 macro_function_item(read_quote),
2109 macro_function_item(read_open_paren),
2110 macro_function_item(read_close_paren),
2111 macro_function_item(read_open_bracket),
2112 macro_function_item(read_close_bracket),
2113 macro_function_item(read_semicolon),
2114 macro_function_item(read_quasiquote),
2115 macro_function_item(read_unquote),
2116 macro_function_item(read_colon),
2117 macro_function_item(dispmacro_reader)
2118 };
2119 int i;
2120 for (i = 0; i < array_sizeof(x); i++) {
2121 if (SG_EQ(fn, x[i].s)) return x[i].f;
2122 }
2123 return NULL;
2124 }
2125
Sg_SetMacroCharacter(SgChar c,SgObject proc,int nontermP,readtable_t * table)2126 void Sg_SetMacroCharacter(SgChar c, SgObject proc, int nontermP,
2127 readtable_t *table)
2128 {
2129 ASSERT(table);
2130 if (!isdigit(c) && c < MAX_READTABLE_CHAR) {
2131 readtab_t *r = &table->readtable[c];
2132 r->type = nontermP ? CT_NON_TERM_MACRO : CT_TERM_MACRO;
2133 r->sfunc = proc;
2134 r->cfunc = get_macro_function(proc);
2135 if (r->disp) r->disp = 0;
2136 } else {
2137 Sg_ImplementationRestrictionViolation
2138 (SG_INTERN("set-macro-character"),
2139 SG_MAKE_STRING("non ascii char is not supported"),
2140 SG_MAKE_CHAR(c));
2141 }
2142 }
2143
Sg_GetDispatchMacroCharacter(SgChar c,SgChar subc,readtable_t * table)2144 SgObject Sg_GetDispatchMacroCharacter(SgChar c, SgChar subc, readtable_t *table)
2145 {
2146 ASSERT(table);
2147 if (c < MAX_READTABLE_CHAR) {
2148 readtab_t *r = &table->readtable[c];
2149 if (!r->disp) {
2150 Sg_AssertionViolation
2151 (SG_INTERN("get-dispatch-macro-character"),
2152 SG_MAKE_STRING("given character is not dispatch macro character"),
2153 SG_MAKE_CHAR(c));
2154 }
2155 return SG_UNBOUNDP(r->disp[subc].sfunc) ? SG_UNBOUND : r->disp[subc].sfunc;
2156 }
2157 return SG_FALSE;
2158 }
2159
get_dispatch_macro_function(SgObject fn)2160 static dispmacro_function get_dispatch_macro_function(SgObject fn)
2161 {
2162 static const struct {
2163 dispmacro_function f;
2164 SgObject s;
2165 } x[] = {
2166 macro_function_item(read_hash_quote),
2167 macro_function_item(read_hash_quasiquote),
2168 macro_function_item(read_hash_unquote),
2169 macro_function_item(read_hash_bang),
2170 macro_function_item(read_hash_v),
2171 macro_function_item(read_hash_u),
2172 macro_function_item(read_hash_t),
2173 macro_function_item(read_hash_f),
2174 macro_function_item(read_hash_b),
2175 macro_function_item(read_hash_o),
2176 macro_function_item(read_hash_d),
2177 macro_function_item(read_hash_x),
2178 macro_function_item(read_hash_i),
2179 macro_function_item(read_hash_e),
2180 macro_function_item(read_hash_open_paren),
2181 macro_function_item(read_hash_semicolon),
2182 macro_function_item(read_hash_bar),
2183 macro_function_item(read_hash_escape),
2184 macro_function_item(read_hash_equal),
2185 macro_function_item(read_hash_hash),
2186 macro_function_item(read_hash_less),
2187 macro_function_item(read_hash_colon)
2188 };
2189 int i;
2190 for (i = 0; i < array_sizeof(x); i++) {
2191 if (SG_EQ(fn, x[i].s)) return x[i].f;
2192 }
2193 return NULL;
2194 }
2195
Sg_MakeDispatchMacroCharacter(SgChar c,int nontermP,readtable_t * table)2196 int Sg_MakeDispatchMacroCharacter(SgChar c, int nontermP, readtable_t *table)
2197 {
2198 ASSERT(table);
2199 if (!isdigit(c) && c < MAX_READTABLE_CHAR) {
2200 readtab_t *r = &table->readtable[c];
2201 if (!r->disp) r->disp = alloc_disptab();
2202 r->type = nontermP ? CT_NON_TERM_MACRO : CT_TERM_MACRO;
2203 r->sfunc = SG_OBJ(&SCHEME_OBJ(dispmacro_reader));
2204 r->cfunc = dispmacro_reader;
2205 } else {
2206 Sg_ImplementationRestrictionViolation
2207 (SG_INTERN("make-dispatch-macro-character"),
2208 SG_MAKE_STRING("non ascii char is not supported"),
2209 SG_MAKE_CHAR(c));
2210 }
2211 return TRUE;
2212 }
2213
Sg_SetDispatchMacroCharacter(SgChar c,SgChar subc,SgObject proc,readtable_t * table)2214 void Sg_SetDispatchMacroCharacter(SgChar c, SgChar subc, SgObject proc,
2215 readtable_t *table)
2216 {
2217 ASSERT(table);
2218 if (!isdigit(c) && !isdigit(subc) &&
2219 c < MAX_READTABLE_CHAR && subc < MAX_READTABLE_CHAR) {
2220 readtab_t *r = &table->readtable[c];
2221 if (!r->disp) {
2222 Sg_AssertionViolation
2223 (SG_INTERN("set-dispatch-macro-character"),
2224 SG_MAKE_STRING("given character is not dispatch macro character"),
2225 SG_MAKE_CHAR(c));
2226 }
2227 r->disp[subc].sfunc = proc;
2228 r->disp[subc].cfunc = get_dispatch_macro_function(proc);
2229 } else {
2230 Sg_ImplementationRestrictionViolation
2231 (SG_INTERN("set-dispatch-macro-character"),
2232 SG_MAKE_STRING("non ascii char is not supported"),
2233 SG_LIST2(SG_MAKE_CHAR(c), SG_MAKE_CHAR(subc)));
2234 }
2235 }
2236
Sg_EnsureLibraryReadTable(SgLibrary * library)2237 void Sg_EnsureLibraryReadTable(SgLibrary *library)
2238 {
2239 if (!SG_LIBRARY_READTABLE(library)) {
2240 SG_LIBRARY_READTABLE(library) = make_readtable(TRUE);
2241 }
2242 }
2243
2244 #define SET_READ_MACRO(R, CH, FN, TYPE) \
2245 ((R)[(CH)].type = (TYPE), \
2246 (R)[(CH)].cfunc = (FN), \
2247 (R)[(CH)].sfunc = SG_OBJ(&SG_CPP_CAT(FN, _stub)))
2248 #define SET_TERM_MACRO(R, CH, FN) SET_READ_MACRO(R, CH, FN, CT_TERM_MACRO)
2249 #define SET_NONTERM_MACRO(R, CH, FN) \
2250 SET_READ_MACRO(R, CH, FN, CT_NON_TERM_MACRO)
2251 #define SET_DISP_MACRO(R, CH, FN) \
2252 ((R)[(CH)].cfunc = (FN), \
2253 (R)[(CH)].sfunc = SG_OBJ(&SG_CPP_CAT(FN, _stub)))
2254
alloc_disptab()2255 disptab_t* alloc_disptab()
2256 {
2257 disptab_t * d = SG_NEW_ARRAY(disptab_t, MAX_READTABLE_CHAR);
2258 int i;
2259 for (i = 0; i < MAX_READTABLE_CHAR; i++) {
2260 d[i].cfunc = 0;
2261 d[i].sfunc = SG_UNBOUND;
2262 }
2263 return d;
2264 }
2265
2266 #define INIT_COMPAT 0
2267 #define INIT_R6RS 1
2268 #define INIT_R7RS 2
2269
init_readtable(readtable_t * table,int type)2270 static void init_readtable(readtable_t *table, int type)
2271 {
2272 int i;
2273 readtab_t *r = table->readtable;
2274 disptab_t *d = alloc_disptab();
2275 for (i = 0; i <= ' '; i++) {
2276 r[i].type = CT_ILLEGAL;
2277 r[i].cfunc = NULL;
2278 r[i].sfunc = SG_UNBOUND;
2279 r[i].disp = NULL;
2280 }
2281 for (; i < MAX_READTABLE_CHAR; i++) {
2282 r[i].type = CT_CONSTITUENT;
2283 r[i].cfunc = NULL;
2284 r[i].sfunc = SG_UNBOUND;
2285 r[i].disp = NULL;
2286 }
2287 r['\t'].type = CT_WHITE_SPACE;
2288 r['\n'].type = CT_WHITE_SPACE;
2289 r['\v'].type = CT_WHITE_SPACE;
2290 r['\f'].type = CT_WHITE_SPACE;
2291 r['\r'].type = CT_WHITE_SPACE;
2292 r[' '].type = CT_WHITE_SPACE;
2293 r['\\'].type = CT_SINGLE_ESCAPE;
2294
2295 SET_TERM_MACRO(r, '"', read_double_quote);
2296 SET_TERM_MACRO(r, '(', read_open_paren);
2297 SET_TERM_MACRO(r, ')', read_close_paren);
2298 SET_TERM_MACRO(r, '[', read_open_bracket);
2299 SET_TERM_MACRO(r, ']', read_close_bracket);
2300 SET_TERM_MACRO(r, ';', read_semicolon);
2301
2302
2303 r['#'].disp = d;
2304
2305 SET_DISP_MACRO(d, '\'', read_hash_quote);
2306 SET_DISP_MACRO(d, '`', read_hash_quasiquote);
2307 SET_DISP_MACRO(d, ',', read_hash_unquote);
2308 SET_DISP_MACRO(d, '!', read_hash_bang);
2309 /* strict mode things */
2310 if (type == INIT_R6RS) {
2311 SET_DISP_MACRO(d, 'v', read_hash_v);
2312 SET_TERM_MACRO(r, '#', dispmacro_reader);
2313 /* tread these as nonterm and let validator raise an error
2314 for "foo'bar" case.
2315 */
2316 SET_NONTERM_MACRO(r, '\'', read_quote);
2317 SET_NONTERM_MACRO(r, '`', read_quasiquote);
2318 SET_NONTERM_MACRO(r, ',', read_unquote);
2319
2320 table->symbol_reader = read_r6rs_symbol;
2321 } else {
2322 SET_NONTERM_MACRO(r, '#', dispmacro_reader);
2323 SET_DISP_MACRO(d, 'u', read_hash_u);
2324 SET_NONTERM_MACRO(r, '|', read_vertical_bar);
2325 SET_TERM_MACRO(r, '\'', read_quote);
2326 SET_TERM_MACRO(r, '`', read_quasiquote);
2327 SET_TERM_MACRO(r, ',', read_unquote);
2328
2329 table->symbol_reader = read_compatible_symbol;
2330 }
2331
2332 /* only compat mode */
2333 if (type == INIT_COMPAT) {
2334 /* #:a is only for compat mode */
2335 SET_DISP_MACRO(d, ':', read_hash_colon);
2336 SET_NONTERM_MACRO(r, ':', read_colon);
2337 /* well it's set only R6RS above but compat mode should read this as well */
2338 SET_DISP_MACRO(d, 'v', read_hash_v);
2339 }
2340 SET_DISP_MACRO(d, 't', read_hash_t);
2341 SET_DISP_MACRO(d, 'T', read_hash_t);
2342 SET_DISP_MACRO(d, 'f', read_hash_f);
2343 SET_DISP_MACRO(d, 'F', read_hash_f);
2344 SET_DISP_MACRO(d, 'b', read_hash_b);
2345 SET_DISP_MACRO(d, 'B', read_hash_b);
2346 SET_DISP_MACRO(d, 'o', read_hash_o);
2347 SET_DISP_MACRO(d, 'O', read_hash_o);
2348 SET_DISP_MACRO(d, 'd', read_hash_d);
2349 SET_DISP_MACRO(d, 'D', read_hash_d);
2350 SET_DISP_MACRO(d, 'x', read_hash_x);
2351 SET_DISP_MACRO(d, 'X', read_hash_x);
2352 SET_DISP_MACRO(d, 'i', read_hash_i);
2353 SET_DISP_MACRO(d, 'I', read_hash_i);
2354 SET_DISP_MACRO(d, 'e', read_hash_e);
2355 SET_DISP_MACRO(d, 'E', read_hash_e);
2356 SET_DISP_MACRO(d, '(', read_hash_open_paren);
2357 SET_DISP_MACRO(d, ';', read_hash_semicolon);
2358 SET_DISP_MACRO(d, '|', read_hash_bar);
2359 SET_DISP_MACRO(d, '\\', read_hash_escape);
2360 SET_DISP_MACRO(d, '=', read_hash_equal);
2361 SET_DISP_MACRO(d, '#', read_hash_hash);
2362 SET_DISP_MACRO(d, '<', read_hash_less);
2363 }
2364
Sg__InitReader()2365 void Sg__InitReader()
2366 {
2367 init_readtable(&r6rs_read_table, INIT_R6RS);
2368 init_readtable(&r7rs_read_table, INIT_R7RS);
2369 init_readtable(&compat_read_table, INIT_COMPAT);
2370
2371 Sg_InitMutex(&obtable_mutax, TRUE);
2372 obtable = Sg_MakeHashTableSimple(SG_HASH_EQUAL, 4096);
2373
2374 #define SET_READER_NAME(fn, name) \
2375 (SG_PROCEDURE_NAME(&(SCHEME_OBJ(fn))) = SG_MAKE_STRING(name))
2376 SET_READER_NAME(read_vertical_bar, "|-reader");
2377 SET_READER_NAME(read_double_quote, "\"-reader");
2378 SET_READER_NAME(read_quote, "'-reader");
2379 SET_READER_NAME(read_open_paren, "(-reader");
2380 SET_READER_NAME(read_close_paren, ")-reader");
2381 SET_READER_NAME(read_open_bracket, "[-reader");
2382 SET_READER_NAME(read_close_bracket, "]-reader");
2383 SET_READER_NAME(read_semicolon, ";-reader");
2384 SET_READER_NAME(read_quasiquote, "`-reader");
2385 SET_READER_NAME(read_unquote, ",-reader");
2386 SET_READER_NAME(read_colon, ":-reader");
2387 SET_READER_NAME(dispmacro_reader, "dispatch-macro-reader");
2388
2389 SET_READER_NAME(read_hash_quote, "#'-reader");
2390 SET_READER_NAME(read_hash_quasiquote, "#`-reader");
2391 SET_READER_NAME(read_hash_unquote, "#,-reader");
2392 SET_READER_NAME(read_hash_bang, "#!-reader");
2393 SET_READER_NAME(read_hash_v, "#v-reader");
2394 SET_READER_NAME(read_hash_u, "#u-reader");
2395 SET_READER_NAME(read_hash_t, "#t-reader");
2396 SET_READER_NAME(read_hash_f, "#f-reader");
2397 SET_READER_NAME(read_hash_b, "#b-reader");
2398 SET_READER_NAME(read_hash_o, "#o-reader");
2399 SET_READER_NAME(read_hash_d, "#d-reader");
2400 SET_READER_NAME(read_hash_x, "#x-reader");
2401 SET_READER_NAME(read_hash_i, "#i-reader");
2402 SET_READER_NAME(read_hash_e, "#e-reader");
2403 SET_READER_NAME(read_hash_open_paren, "#(-reader");
2404 SET_READER_NAME(read_hash_semicolon, "#;-reader");
2405 SET_READER_NAME(read_hash_bar, "#|-reader");
2406 SET_READER_NAME(read_hash_escape, "#\\-reader");
2407 SET_READER_NAME(read_hash_equal, "#=-reader");
2408 SET_READER_NAME(read_hash_hash, "##-reader");
2409 SET_READER_NAME(read_hash_less, "#<-reader");
2410 SET_READER_NAME(read_hash_colon, "#:-reader");
2411
2412 }
2413
Sg__InitReaderClass()2414 void Sg__InitReaderClass()
2415 {
2416 SgLibrary *lib = Sg_FindLibrary(SG_INTERN("(sagittarius clos)"), TRUE);
2417 #define CINIT(cl, nam) \
2418 Sg_InitStaticClassWithMeta(cl, UC(nam), lib, NULL, SG_FALSE, NULL, 0)
2419
2420 /* shared-ref */
2421 CINIT(SG_CLASS_SHARED_REF, "<shared-ref>");
2422 /* for now no slot def */
2423 CINIT(SG_CLASS_READ_CONTEXT, "<read-context>");
2424
2425 DEFAULT_CONTEXT = Sg_MakeDefaultReadContext();
2426 SYM_CONST = SG_INTERN("const");
2427 SYM_SOURCE_INFO = SG_INTERN("source-info");
2428 }
2429
2430 /*
2431 end of file
2432 Local Variables:
2433 coding: utf-8-unix
2434 End:
2435 */
2436