1 /* reader.c                                        -*- mode:c; coding:utf-8; -*-
2  *
3  *   Copyright (c) 2010-2021  Takashi Kato <ktakashi@ymail.com>
4  *
5  *   Redistribution and use in source and binary forms, with or without
6  *   modification, are permitted provided that the following conditions
7  *   are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright
13  *      notice, this list of conditions and the following disclaimer in the
14  *      documentation and/or other materials provided with the distribution.
15  *
16  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22  *   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23  *   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24  *   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25  *   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  *   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  *  $Id: $
29  */
30 #include <ctype.h>
31 #include <string.h>
32 #define LIBSAGITTARIUS_BODY
33 #include "sagittarius/private/reader.h"
34 #include "sagittarius/private/core.h"
35 #include "sagittarius/private/port.h"
36 #include "sagittarius/private/pair.h"
37 #include "sagittarius/private/symbol.h"
38 #include "sagittarius/private/hashtable.h"
39 #include "sagittarius/private/transcoder.h"
40 #include "sagittarius/private/compare.h"
41 #include "sagittarius/private/keyword.h"
42 #include "sagittarius/private/builtin-symbols.h"
43 #include "sagittarius/private/error.h"
44 #include "sagittarius/private/gloc.h"
45 #include "sagittarius/private/writer.h"
46 #include "sagittarius/private/unicode.h"
47 #include "sagittarius/private/values.h"
48 #include "sagittarius/private/vector.h"
49 #include "sagittarius/private/number.h"
50 #include "sagittarius/private/vm.h"
51 #include "sagittarius/private/bytevector.h"
52 #include "sagittarius/private/unicode.h"
53 #include "sagittarius/private/weak.h"
54 #include "sagittarius/private/writer.h"
55 #include "sagittarius/private/library.h"
56 
57 #include "shortnames.incl"
58 
59 static uint8_t CHAR_MAP[] = {
60     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
61     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
62  /*     !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /  */
63     0,  3,  4,  4,  3,  3,  3,  0,  4,  4,  3,  1,  0,  1,  1,  3,
64  /* 0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?  */
65     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  3,  4,  3,  3,  3,  3,
66  /* @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O  */
67     1,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
68  /* P   Q   R   S   T   U   V   W   X   Y   Z   [   \   ]   ^   _  */
69     3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  0,  4,  3,  3,
70  /* `   a   b   c   d   e   f   g   h   i   j   k   l   m   n   o  */
71     0,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
72  /* p   q   r   s   t   u   v   w   x   y   z   {   |   }   ~   ^? */
73     3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  0,  0,  0,  3,  0,
74 };
75 #define CHAR_MAP_SYMBOL    0x01
76 #define CHAR_MAP_INITIAL   0x02
77 #define CHAR_MAP_DELIMITER 0x04
78 
79 #define SYMBOL_CHARP(x)    ((CHAR_MAP[x] & CHAR_MAP_SYMBOL) != 0)
80 #define INITIAL_CHARP(x)   ((CHAR_MAP[x] & CHAR_MAP_INITIAL) != 0)
81 #define DELIMITER_CHARP(x) ((CHAR_MAP[x] & CHAR_MAP_DELIMITER) != 0)
82 
83 static SgObject pair_infos[] = {
84   SG_UNDEF,
85   SG_UNDEF
86 };
87 
88 #define SYM_CONST       pair_infos[0]
89 #define SYM_SOURCE_INFO pair_infos[1]
90 
convert_hex_char_to_int(SgChar c)91 static int convert_hex_char_to_int(SgChar c)
92 {
93   if ((c >= '0') & (c <= '9')) return c - '0';
94   else if ((c >= 'a') & (c <= 'f')) return c - 'a' + 10;
95   else if ((c >= 'A') & (c <= 'F')) return c - 'A' + 10;
96   return -1;
97 }
98 
sref_print(SgObject obj,SgPort * port,SgWriteContext * ctx)99 static void sref_print(SgObject obj, SgPort *port, SgWriteContext *ctx)
100 {
101   SgSharedRef *ref = SG_SHAREDREF(obj);
102   Sg_Putuz(port, UC("#<shared-ref "));
103   if (SG_NUMBERP(ref->index)) {
104     Sg_Puts(port, Sg_NumberToString(ref->index, 10, FALSE));
105   } else {
106     Sg_Putuz(port, UC("???"));
107   }
108   Sg_Putc(port, '>');
109 }
110 SG_DEFINE_BUILTIN_CLASS_SIMPLE(Sg_SharedRefClass, sref_print);
111 
112 /* TODO, we probably want to have slot access */
rctx_print(SgObject obj,SgPort * port,SgWriteContext * ctx)113 static void rctx_print(SgObject obj, SgPort *port, SgWriteContext *ctx)
114 {
115   Sg_PutuzUnsafe(port, UC("#<read-context>"));
116 }
117 SG_DEFINE_BUILTIN_CLASS_SIMPLE(Sg_ReadContextClass, rctx_print);
118 
make_read_context()119 static SgReadContext * make_read_context()
120 {
121   SgReadContext *ctx = SG_NEW(SgReadContext);
122   SG_SET_CLASS(ctx, SG_CLASS_READ_CONTEXT);
123   return ctx;
124 }
125 static SgReadContext *DEFAULT_CONTEXT = NULL;
126 
Sg_MakeDefaultReadContext()127 SgObject Sg_MakeDefaultReadContext()
128 {
129   return SG_OBJ(make_read_context());
130 }
131 
Sg_MakeReadContextForLoad()132 SgObject Sg_MakeReadContextForLoad()
133 {
134   SgReadContext *ctx = make_read_context();
135   ctx->flags = SG_READ_SOURCE_INFO | SG_CHANGE_VM_MODE;
136   ctx->graph = SG_HASHTABLE(Sg_MakeHashTableSimple(SG_HASH_EQ, 1));
137   return SG_OBJ(ctx);
138 }
139 
make_shared_ref(long mark)140 static SgSharedRef* make_shared_ref(long mark)
141 {
142   SgSharedRef *z = SG_NEW(SgSharedRef);
143   SG_SET_CLASS(z, SG_CLASS_SHARED_REF);
144   z->index = SG_MAKE_INT(mark);
145   return z;
146 }
147 
148 /* ctx utility */
parsing_range(SgReadContext * ctx,long from,long to)149 static void parsing_range(SgReadContext *ctx, long from, long to)
150 {
151   ctx->parsingLineFrom = from;
152   ctx->parsingLineTo = to;
153 }
154 
parsing_line(SgReadContext * ctx,long line)155 static void parsing_line(SgReadContext *ctx, long line)
156 {
157   parsing_range(ctx, line, line);
158 }
159 
160 
161 typedef struct
162 {
163   long value;
164   int present;
165 } dispmacro_param;
166 
167 typedef SgObject (*macro_function)(SgPort *, SgChar, SgReadContext *);
168 typedef SgObject (*dispmacro_function)(SgPort *, SgChar,
169 				       dispmacro_param *, SgReadContext *);
170 
171 typedef enum {
172   CT_ILLEGAL,
173   CT_CONSTITUENT,
174   CT_SINGLE_ESCAPE,
175   CT_WHITE_SPACE,
176   CT_TERM_MACRO,
177   CT_NON_TERM_MACRO
178 } char_type;
179 
180 typedef struct
181 {
182   SgObject sfunc;
183   dispmacro_function cfunc;
184 } disptab_t;
185 
186 /* we only support ASCII */
187 #define MAX_READTABLE_CHAR 128
188 
189 typedef struct
190 {
191   char_type type;
192   SgObject  sfunc;
193   macro_function cfunc;
194   disptab_t *disp;
195 } readtab_t;
196 
197 struct readtable_rec_t
198 {
199   int      insensitiveP;
200   SgObject (*symbol_reader)(SgPort *, SgChar, SgReadContext *,
201 			    SgObject *errorp);
202   readtab_t readtable[MAX_READTABLE_CHAR];
203 };
204 
205 /* template readtables */
206 static readtable_t r6rs_read_table;
207 static readtable_t r7rs_read_table;
208 /* including r7rs #u8() */
209 static readtable_t compat_read_table;
210 
211 static readtable_t* default_readtable(int copyP);
212 
213 #define DEFAULT_TABLEP(port)				\
214   (!SG_PORT_READTABLE(port) ||				\
215    SG_PORT_READTABLE(port) == default_readtable(FALSE))
216 
217 #define ENSURE_COPIED_TABLE(port)				\
218   do {								\
219     if (DEFAULT_TABLEP(port)) {					\
220       SG_PORT_READTABLE(port) = default_readtable(TRUE);	\
221     }								\
222   } while(0)
223 
delimited(SgPort * p,SgChar c)224 static int delimited(SgPort *p, SgChar c)
225 {
226   readtable_t *table = SG_PORT_READTABLE(p);
227   if (c > 127 && Sg_Ucs4WhiteSpaceP(c)) return TRUE;
228   if (c > 127) return FALSE;
229   /* return DELIMITER_CHARP(c); */
230   if (!table) {
231     /* use default table */
232     table = default_readtable(FALSE);
233   }
234   switch (table->readtable[c].type) {
235   case CT_NON_TERM_MACRO:
236   case CT_CONSTITUENT:
237     /* is escape a delmiter? */
238   case CT_SINGLE_ESCAPE:
239     return FALSE;
240   default: return TRUE;
241   }
242 }
243 
244 static SgObject read_expr4(SgPort *port, int flags, SgChar delim,
245 			   SgReadContext *ctx);
246 #define ACCEPT_EOF 1
247 #define ACCEPT_DOT 2
248 
249 #define read_expr(p, c) read_expr4((p), 0, EOF, (c))
250 /* one char reader */
251 static SgObject read_open_paren(SgPort *port, SgChar c, SgReadContext *ctx);
252 static SgObject read_close_paren(SgPort *port, SgChar c, SgReadContext *ctx);
253 static SgObject read_double_quote(SgPort *port, SgChar c, SgReadContext *ctx);
254 static SgObject read_quote(SgPort *port, SgChar c, SgReadContext *ctx);
255 static SgObject read_quasiquote(SgPort *port, SgChar c, SgReadContext *ctx);
256 static SgObject read_unquote(SgPort *port, SgChar c, SgReadContext *ctx);
257 static SgObject read_colon(SgPort *port, SgChar c, SgReadContext *ctx);
258 static SgObject read_vertical_bar(SgPort *port, SgChar c, SgReadContext *ctx);
259 static SgObject read_open_bracket(SgPort *port, SgChar c, SgReadContext *ctx);
260 static SgObject read_close_bracket(SgPort *port, SgChar c, SgReadContext *ctx);
261 static SgObject read_semicolon(SgPort *port, SgChar c, SgReadContext *ctx);
262 /* two chars */
263 /* list of two char syntax
264    #-',`!|uvtTfFobBOdDxXiIeE(\;=#
265  */
266 static SgObject read_hash_quote(SgPort *port, SgChar c,
267 				dispmacro_param *param, SgReadContext *ctx);
268 static SgObject read_hash_quasiquote(SgPort *port, SgChar c,
269 				     dispmacro_param *param,
270 				     SgReadContext *ctx);
271 static SgObject read_hash_unquote(SgPort *port, SgChar c,
272 				  dispmacro_param *param, SgReadContext *ctx);
273 static SgObject read_hash_bang(SgPort *port, SgChar c, dispmacro_param *param,
274 			       SgReadContext *ctx);
275 static SgObject read_hash_bar(SgPort *port, SgChar c, dispmacro_param *param,
276 			      SgReadContext *ctx);
277 static SgObject read_hash_v(SgPort *port, SgChar c, dispmacro_param *param,
278 			    SgReadContext *ctx);
279 static SgObject read_hash_u(SgPort *port, SgChar c, dispmacro_param *param,
280 			    SgReadContext *ctx);
281 static SgObject read_hash_t(SgPort *port, SgChar c, dispmacro_param *param,
282 			    SgReadContext *ctx);
283 static SgObject read_hash_f(SgPort *port, SgChar c, dispmacro_param *param,
284 			    SgReadContext *ctx);
285 static SgObject read_hash_b(SgPort *port, SgChar c, dispmacro_param *param,
286 			    SgReadContext *ctx);
287 static SgObject read_hash_o(SgPort *port, SgChar c, dispmacro_param *param,
288 			    SgReadContext *ctx);
289 static SgObject read_hash_d(SgPort *port, SgChar c, dispmacro_param *param,
290 			    SgReadContext *ctx);
291 static SgObject read_hash_x(SgPort *port, SgChar c, dispmacro_param *param,
292 			    SgReadContext *ctx);
293 static SgObject read_hash_i(SgPort *port, SgChar c, dispmacro_param *param,
294 			    SgReadContext *ctx);
295 static SgObject read_hash_e(SgPort *port, SgChar c, dispmacro_param *param,
296 			    SgReadContext *ctx);
297 static SgObject read_hash_open_paren(SgPort *port, SgChar c,
298 				     dispmacro_param *param,
299 				     SgReadContext *ctx);
300 static SgObject read_hash_semicolon(SgPort *port, SgChar c,
301 				    dispmacro_param *param, SgReadContext *ctx);
302 static SgObject read_hash_escape(SgPort *port, SgChar c, dispmacro_param *param,
303 				 SgReadContext *ctx);
304 static SgObject read_hash_equal(SgPort *port, SgChar c, dispmacro_param *param,
305 				SgReadContext *ctx);
306 static SgObject read_hash_hash(SgPort *port, SgChar c, dispmacro_param *param,
307 			       SgReadContext *ctx);
308 static SgObject read_hash_less(SgPort *port, SgChar c, dispmacro_param *param,
309 			       SgReadContext *ctx);
310 static SgObject read_hash_colon(SgPort *port, SgChar c, dispmacro_param *param,
311 				SgReadContext *ctx);
312 
313 /* mode  */
314 static SgObject read_r6rs_symbol(SgPort *port, SgChar c, SgReadContext *ctx,
315 				 SgObject *errorp);
316 static SgObject read_compatible_symbol(SgPort *port, SgChar c,
317 				       SgReadContext *ctx, SgObject *errorp);
318 /* utility */
319 static SgObject read_symbol_or_number(SgPort *port, SgChar c,
320 				      readtable_t *table, SgReadContext *ctx);
321 static SgObject macro_reader(SgPort *port, SgChar c, readtab_t *tab,
322 			     SgReadContext *ctx);
323 static SgObject dispmacro_reader(SgPort *port, SgChar c, SgReadContext *ctx);
324 
325 static void add_read_table(readtable_t *src, readtable_t *dst);
326 /* TODO use this in lexical_error */
lexical_error_msg(SgPort * port,SgReadContext * ctx,const SgChar * fmt,...)327 static SgObject lexical_error_msg(SgPort * port, SgReadContext *ctx,
328 				  const SgChar *fmt, ...)
329 {
330   va_list ap;
331   SgObject msg, line, file;
332 
333   va_start(ap, fmt);
334   msg = Sg_Vsprintf(fmt, ap, TRUE);
335   va_end(ap);
336 
337   file = Sg_FileName(port);
338   if (ctx->parsingLineFrom == ctx->parsingLineTo) {
339     line = Sg_Sprintf(UC("file %S, line %d"),
340 		      file,
341 		      ctx->parsingLineFrom);
342   } else {
343     line = Sg_Sprintf(UC("file %S, line %d-%d"),
344 		      file,
345 		      ctx->parsingLineFrom, ctx->parsingLineTo);
346   }
347   return Sg_Sprintf(UC("%A (%A)"), msg, line);
348 }
349 
lexical_error(SgPort * port,SgReadContext * ctx,const SgChar * fmt,...)350 static void lexical_error(SgPort * port, SgReadContext *ctx,
351 			  const SgChar *fmt, ...)
352 {
353   va_list ap;
354   SgObject msg, line, file;
355 
356   va_start(ap, fmt);
357   msg = Sg_Vsprintf(fmt, ap, TRUE);
358   va_end(ap);
359 
360   file = Sg_FileName(port);
361   if (ctx->parsingLineFrom == ctx->parsingLineTo) {
362     line = Sg_Sprintf(UC("file %S, line %d"),
363 		      file,
364 		      ctx->parsingLineFrom);
365   } else {
366     line = Sg_Sprintf(UC("file %S, line %d-%d"),
367 		      file,
368 		      ctx->parsingLineFrom, ctx->parsingLineTo);
369   }
370   Sg_ReadError(UC("%A (%A)"), msg, line);
371 }
372 
read_thing(SgPort * port,SgReadContext * ctx,SgChar * buf,size_t size,SgChar initial)373 static long read_thing(SgPort *port, SgReadContext *ctx, SgChar *buf,
374 		       size_t size, SgChar initial)
375 {
376   long i = 0;
377   if (initial != -1) {
378     buf[i++] = initial;
379   }
380   while (i < (long)size) {
381     SgChar c = Sg_PeekcUnsafe(port);
382     if (c == EOF || delimited(port, c)) {
383       buf[i] = 0;
384       return i;
385     }
386     Sg_GetcUnsafe(port);
387     buf[i++] = c;
388   }
389   lexical_error(port, ctx,
390 		UC("token buffer overflow while reading identifier, %s ..."),
391 		buf);
392   return -1;			/* dummy */
393 }
394 
read_hex_scalar_value(SgPort * port,SgReadContext * ctx)395 static SgChar read_hex_scalar_value(SgPort *port, SgReadContext *ctx)
396 {
397   int n;
398   SgChar ucs4 = 0, c = Sg_GetcUnsafe(port);
399   if (c == EOF) {
400     lexical_error(port, ctx,
401 		  UC("unexpected end-of-file while reading hex scalar value"));
402   }
403   if (delimited(port, c)) {
404     lexical_error(port, ctx,
405 		  UC("expected hex digit, but got %c, while reading hex scalar value"), c);
406   }
407   Sg_UngetcUnsafe(port, c);
408 
409   while (TRUE) {
410     c = Sg_GetcUnsafe(port);
411     if (c == EOF || delimited(port, c)) {
412       Sg_UngetcUnsafe(port, c);
413       return Sg_EnsureUcs4(ucs4);
414     }
415     n = convert_hex_char_to_int(c);
416     if (n < 0) {
417       lexical_error(port, ctx,
418 		    UC("expected hex digit, but got %c, while reading hex scalar value"), c);
419     }
420     ucs4 = (ucs4 << 4) + n;
421     if (ucs4 > 0x10ffff) {
422       lexical_error(port, ctx, UC("hex scalar value out of range"));
423     }
424   }
425 }
426 
read_escape(SgPort * port,SgReadContext * ctx)427 static SgChar read_escape(SgPort *port, SgReadContext *ctx)
428 {
429   SgChar c = Sg_GetcUnsafe(port);
430   switch (c) {
431   case 'x':
432     c = Sg_GetcUnsafe(port);
433     if (c == EOF) {
434       lexical_error(port, ctx,
435 		    UC("unexpected end-of-file while reading escape sequence"));
436     }
437     Sg_UngetcUnsafe(port, c);
438     c = read_hex_scalar_value(port, ctx);
439     if (Sg_GetcUnsafe(port) != ';') {
440       lexical_error(port, ctx,
441 		    UC("inline hex escape missing terminating semi-colon"));
442     }
443     return c;
444   case 'a':  return 0x0007;
445   case 'b':  return 0x0008;
446   case 't':  return 0x0009;
447   case 'n':  return 0x000A;
448   case 'v':  return 0x000B;
449   case 'f':  return 0x000C;
450   case 'r':  return 0x000D;
451   case '"':  return 0x0022;
452   case '\\': return 0x005C;
453     /* R7RS */
454   case '|':  return 0x007C;
455   case EOF:
456     lexical_error(port, ctx,
457 		  UC("unexpected end-of-file while reading escape sequence"));
458   default:
459     if (ctx->escapedp) return c;
460     lexical_error(port, ctx,
461 		  UC("invalid escape sequence, \\%c"), c);
462   }
463 
464   return -1;			/* dummy */
465 }
466 
467 typedef int (*read_helper)(SgPort *, SgReadContext *, SgChar *,
468 			   int, SgChar, SgObject, readtable_t *table,
469 			   SgObject *errorp);
470 /* convenient macro, used both symbol and string reader*/
471 #define append_char(buf, p, c, i)		\
472   do {						\
473     if (p) {					\
474       Sg_PutcUnsafe(p, (c));			\
475     } else {					\
476       (buf)[(i)++] = (c);			\
477     }						\
478   } while (0)
479 
480 #define READ_SYMBOL_MAX_SIZE 256
481 
read_symbol_generic(SgPort * port,SgChar initial,read_helper helper,SgReadContext * ctx,SgObject * errorp)482 static SgObject read_symbol_generic(SgPort *port, SgChar initial,
483 				    read_helper helper,
484 				    SgReadContext *ctx, SgObject *errorp)
485 {
486   SgChar buf[READ_SYMBOL_MAX_SIZE], c;
487   int i = 0;
488   readtable_t *table = Sg_PortReadTable(port);
489   SgObject out = NULL;
490 
491   if (initial > 0) {
492     if (initial > 127) {
493       Sg_EnsureUcs4(initial);
494       if (i == 0) {
495 	if (Sg_Ucs4ConstituentP(initial)) {
496 	  append_char(buf, out, initial, i);
497 	  goto next;
498 	}
499       } else {
500 	if (Sg_Ucs4SubsequentP(initial)) {
501 	  append_char(buf, out, initial, i);
502 	  goto next;
503 	}
504       }
505       lexical_error(port, ctx,
506 		    UC("invalid character %U during reading identifier"),
507 		    initial);
508     } else {
509       i = helper(port, ctx, buf, i, initial, out, table, errorp);
510     }
511   }
512  next:
513   while (out != NULL || i < array_sizeof(buf)) {
514     c = Sg_PeekcUnsafe(port);
515     if (c == EOF || delimited(port, c)) {
516       if (out) {
517 	return Sg_GetStringFromStringPort(out);
518       } else {
519 	SgObject s;
520 	buf[i] = 0;
521 	s = Sg_MakeString(buf, SG_LITERAL_STRING, i);
522 	if (table->insensitiveP) {
523 	  s = Sg_StringFoldCase(SG_STRING(s));
524 	}
525 	return s;
526       }
527     }
528     Sg_GetcUnsafe(port);
529     if (c == '\\') {
530       /* TODO how should I treat '\\' during reading symbol */
531       c = Sg_GetcUnsafe(port);
532       if (c == 'x') {
533 	Sg_UngetcUnsafe(port, c);
534 	append_char(buf, out, read_escape(port, ctx), i);
535 	continue;
536       }
537       lexical_error(port, ctx,
538 		    UC("invalid character '\\' during reading identifier"));
539     }
540     if (c > 127) {
541       Sg_EnsureUcs4(c);
542       if (i == 0) {
543 	if (Sg_Ucs4ConstituentP(c)) {
544 	  append_char(buf, out, c, i);
545 	  continue;
546 	}
547       } else {
548 	if (Sg_Ucs4SubsequentP(c)) {
549 	  append_char(buf, out, c, i);
550 	  continue;
551 	}
552       }
553       lexical_error(port, ctx,
554 		    UC("invalid character %U during reading identifier"), c);
555     }
556     i = helper(port, ctx, buf, i, c, out, table, errorp);
557   }
558   out = Sg_ConvertToStringOutputPort(buf, i);
559   goto next;
560   /* lexical_error(port, ctx, */
561   /* 		UC("token buffer overflow during reading identifier")); */
562   return SG_UNDEF;
563 #undef check_range
564 }
565 
read_r6rs_symbol_helper(SgPort * port,SgReadContext * ctx,SgChar * buf,int i,SgChar c,SgObject out,readtable_t * table,SgObject * errorp)566 static int read_r6rs_symbol_helper(SgPort *port, SgReadContext *ctx,
567 				   SgChar *buf, int i, SgChar c,
568 				   SgObject out,
569 				   readtable_t *table, SgObject *errorp)
570 {
571   if (c > 127) {
572     if (buf != NULL) append_char(buf, out, c, i);
573     return i;
574   }
575   if (i == 0) {
576     if (INITIAL_CHARP(c)) {
577       if (buf != NULL) append_char(buf, out, c, i);
578       return i;
579     }
580   } else {
581     if (SYMBOL_CHARP(c)) {
582       if (buf != NULL) append_char(buf, out, c, i);
583       return i;
584     }
585   }
586   if (errorp) {
587     if (buf != NULL) append_char(buf, out, c, i);
588     *errorp =
589       lexical_error_msg(port, ctx,
590 			UC("invalid character %U while reading identifier"), c);
591     return i;
592   } else {
593     lexical_error(port, ctx,
594 		  UC("invalid character %U while reading identifier"), c);
595   }
596   return -1;			/* dummy */
597 }
598 
read_r6rs_symbol(SgPort * port,SgChar initial,SgReadContext * ctx,SgObject * errorp)599 SgObject read_r6rs_symbol(SgPort *port, SgChar initial, SgReadContext *ctx,
600 			  SgObject *errorp)
601 {
602   return read_symbol_generic(port, initial, read_r6rs_symbol_helper, ctx,
603 			     errorp);
604 }
605 
read_compat_symbol_helper(SgPort * port,SgReadContext * ctx,SgChar * buf,int i,SgChar c,SgObject out,readtable_t * table,SgObject * errorp)606 static int read_compat_symbol_helper(SgPort *port, SgReadContext *ctx,
607 				     SgChar *buf, int i, SgChar c,
608 				     SgObject out,
609 				     readtable_t *table, SgObject *errorp)
610 {
611   if (!delimited(port, c)) {
612     append_char(buf, out, c, i);
613     return i;
614   }
615   if (errorp) {
616     append_char(buf, out, c, i);
617     *errorp =
618       lexical_error_msg(port, ctx,
619 			UC("invalid character %U while reading identifier"), c);
620     return i;
621   } else {
622     lexical_error(port, ctx,
623 		  UC("invalid character %U while reading identifier"), c);
624   }
625   return -1;			/* dummy */
626 }
627 
read_compatible_symbol(SgPort * port,SgChar initial,SgReadContext * ctx,SgObject * errorp)628 SgObject read_compatible_symbol(SgPort *port, SgChar initial,
629 				SgReadContext *ctx, SgObject *errorp)
630 {
631   return read_symbol_generic(port, initial, read_compat_symbol_helper, ctx,
632 			     errorp);
633 }
634 
read_escaped_symbol(SgPort * port,SgReadContext * ctx)635 static SgObject read_escaped_symbol(SgPort *port, SgReadContext *ctx)
636 {
637   SgObject str = read_compatible_symbol(port, -1, ctx, NULL);
638   return Sg_Intern(str);
639 }
640 
read_symbol_or_number(SgPort * port,SgChar c,readtable_t * table,SgReadContext * ctx)641 SgObject read_symbol_or_number(SgPort *port, SgChar c,
642 			       readtable_t *table, SgReadContext *ctx)
643 {
644   SgObject str, num, tmp;
645   SgObject error = SG_FALSE;
646   str = table->symbol_reader(port, c, ctx, &error);
647   if (table->symbol_reader == read_r6rs_symbol) {
648     tmp = str;
649   } else {
650     /* R7RS requires capital NaN or InF */
651     tmp = Sg_StringDownCase(str);
652   }
653   num  = Sg_StringToNumber(tmp, 10, TRUE);
654   if (!SG_FALSEP(num)) return num;
655 
656   /* check special case first */
657   if (SG_STRING_SIZE(str) == 1 && SG_STRING_VALUE_AT(str, 0) == '.')
658     return SG_SYMBOL_DOT;
659   /* well for now we do not check */
660   if (table->symbol_reader == read_r6rs_symbol &&
661       SG_STRING_VALUE_AT(str, 0) < 128) {
662     int i;
663     if (SG_STRING_SIZE(str) == 1 &&
664 	(SG_STRING_VALUE_AT(str, 0)=='+' || SG_STRING_VALUE_AT(str, 0)=='-')) {
665       return Sg_Intern(str);
666     }
667     if (ustrcmp(SG_STRING_VALUE(str), "...") == 0) return SG_SYMBOL_ELLIPSIS;
668     if (SG_STRING_SIZE(str) >= 2 &&
669 	SG_STRING_VALUE_AT(str, 0) == '-'
670 	&& SG_STRING_VALUE_AT(str, 1) == '>') {
671       for (i = 2; i < SG_STRING_SIZE(str); i++) {
672 	SgChar c = SG_STRING_VALUE_AT(str, i);
673 	if (c > 127) continue;
674 	if (SYMBOL_CHARP(c)) continue;
675 	lexical_error(port, ctx, UC("invalid lexical syntax %A"), str);
676       }
677       return Sg_Intern(str);
678     }
679   }
680   /* if there is an lexical error after all checks, then raise it */
681   if (!SG_FALSEP(error)) Sg_ReadError(UC("%A"), error);
682   return Sg_Intern(str);
683 
684 }
685 
macro_reader(SgPort * port,SgChar c,readtab_t * tab,SgReadContext * ctx)686 SgObject macro_reader(SgPort *port, SgChar c, readtab_t *tab,
687 			     SgReadContext *ctx)
688 {
689   if (tab[c].cfunc) return (*tab[c].cfunc)(port, c, ctx);
690   if (SG_EQ(tab[c].sfunc, SG_UNBOUND))
691     lexical_error(port, ctx, UC("'%c' is not a macro char"), c);
692   /* FIXME: we should eliminate this type of dispatch */
693   if (SG_PROCEDURE_REQUIRED(tab[c].sfunc) > 2) {
694     return Sg_Apply3(tab[c].sfunc, port, SG_MAKE_CHAR(c), ctx);
695   } else {
696     return Sg_Apply2(tab[c].sfunc, port, SG_MAKE_CHAR(c));
697   }
698 }
699 
read_list_int(SgPort * port,SgChar closer,SgReadContext * ctx,long start_line)700 static SgObject read_list_int(SgPort *port, SgChar closer, SgReadContext *ctx,
701 			      long start_line)
702 {
703   SgObject start = SG_NIL, last = SG_NIL, item;
704   item = read_expr4(port, ACCEPT_EOF, closer, ctx);
705   if (SG_EQ(item, SG_EOF)) goto eoferr;
706   /* return '() */
707   if (!ctx->escapedp && SG_EQ(item, SG_SYMBOL_RPAREN)) return start;
708 
709   SG_APPEND1(start, last, item);
710 
711   for (;;) {
712     ctx->escapedp = FALSE;
713     item = read_expr4(port, ACCEPT_EOF | ACCEPT_DOT, closer, ctx);
714     if (SG_EQ(item, SG_EOF)) goto eoferr;
715     if (!ctx->escapedp && SG_EQ(item, SG_SYMBOL_RPAREN)) return start;
716     if (!ctx->escapedp && SG_EQ(item, SG_SYMBOL_DOT)) {
717       SG_SET_CDR(last, read_expr(port, ctx));
718       item = read_expr4(port, ACCEPT_EOF, closer, ctx);
719       if (!SG_EQ(item, SG_SYMBOL_RPAREN)) {
720 	parsing_range(ctx, start_line, Sg_LineNo(port));
721 	lexical_error(port, ctx, UC("bad dot syntax"));
722       }
723       if (SG_EQ(item, SG_EOF)) goto eoferr;
724       return start;
725     }
726     SG_APPEND1(start, last, item);
727   }
728  eoferr:
729   parsing_range(ctx, start_line, Sg_LineNo(port));
730   lexical_error(port, ctx, UC("unexpected end-of-file while reading a list"));
731   return SG_UNDEF;		/* dummy */
732 }
733 
read_list(SgPort * port,SgChar closer,SgReadContext * ctx)734 static SgObject read_list(SgPort *port, SgChar closer, SgReadContext *ctx)
735 {
736   long line = Sg_LineNo(port);
737   SgObject r = read_list_int(port, closer, ctx, line);
738   if (SG_PAIRP(r) && line >= 0) {
739     SgVM *vm = Sg_VM();
740     if (!SG_VM_IS_SET_FLAG(vm, SG_NO_DEBUG_INFO)) {
741       SgObject info = Sg_FileName(port);
742       if (!SG_FALSEP(info) && ctx->flags & SG_READ_SOURCE_INFO) {
743 	r = Sg_SetPairAnnotation(r, SYM_SOURCE_INFO,
744 				 Sg_Cons(info, SG_MAKE_INT(line)));
745       }
746     }
747   }
748   return r;
749 }
750 
read_open_paren(SgPort * port,SgChar c,SgReadContext * ctx)751 SgObject read_open_paren(SgPort *port, SgChar c, SgReadContext *ctx)
752 {
753   return read_list(port, ')', ctx);
754 }
755 
read_close_paren(SgPort * port,SgChar c,SgReadContext * ctx)756 SgObject read_close_paren(SgPort *port, SgChar c, SgReadContext *ctx)
757 {
758   lexical_error(port, ctx, UC("unexpected close paren ')'"));
759   return SG_UNDEF;		/* dummy */
760 }
761 
read_open_bracket(SgPort * port,SgChar c,SgReadContext * ctx)762 SgObject read_open_bracket(SgPort *port, SgChar c, SgReadContext *ctx)
763 {
764   return read_list(port, ']', ctx);
765 }
766 
read_close_bracket(SgPort * port,SgChar c,SgReadContext * ctx)767 SgObject read_close_bracket(SgPort *port, SgChar c, SgReadContext *ctx)
768 {
769   lexical_error(port, ctx, UC("unexpected close bracket ']'"));
770   return SG_UNDEF;		/* dummy */
771 }
772 
read_double_quote(SgPort * port,SgChar c,SgReadContext * ctx)773 SgObject read_double_quote(SgPort *port, SgChar c, SgReadContext *ctx)
774 {
775   SgChar buf[READ_STRING_MAX_SIZE];
776   int i = 0;
777   SgObject out = NULL;
778 
779 #define handle_linefeed(c, hndl)				\
780   switch (c) {							\
781   case CR:							\
782     (c) = Sg_GetcUnsafe(port);					\
783     if ((c) != LF && (c) != NEL) Sg_UngetcUnsafe(port, c);	\
784   case LF: case NEL: case LS:					\
785     hndl;							\
786   }
787 
788  next:
789   while (out != NULL || i < array_sizeof(buf)) {
790     SgChar c = Sg_GetcUnsafe(port);
791     if (c == EOF)
792       lexical_error(port, ctx,
793 		    UC("unexpected end-of-file while reading string"));
794 
795     handle_linefeed(c, { append_char(buf, out, LF, i); continue; });
796 
797     if (c == '"') {
798       if (out) {
799 	return Sg_GetStringFromStringPort(out);
800       } else {
801 	buf[i] = 0;
802 	return Sg_MakeString(buf, SG_LITERAL_STRING, i);
803       }
804     }
805     if (c == '\\') {
806       c = Sg_GetcUnsafe(port);
807       if (Sg_Ucs4IntralineWhiteSpaceP(c)) {
808 	do {
809 	  c = Sg_GetcUnsafe(port);
810 	  if (c == EOF) {
811 	    lexical_error(port, ctx,
812 			  UC("unexpected end-of-file while"
813 			     " reading intraline whitespace"));
814 	  }
815 	} while (Sg_Ucs4IntralineWhiteSpaceP(c));
816 	/* internal line feed is LF*/
817 	handle_linefeed(c, break; default:
818 			lexical_error(port, ctx,
819 				      UC("unexpected character %U while"
820 					 " reading intraline whitespace"), c)
821 			);
822 	do { c = Sg_GetcUnsafe(port); } while (Sg_Ucs4IntralineWhiteSpaceP(c));
823 	Sg_UngetcUnsafe(port, c);
824 	continue;
825       }
826       handle_linefeed(c,
827 		      {
828 			do { c = Sg_GetcUnsafe(port); }
829 			while (Sg_Ucs4IntralineWhiteSpaceP(c));
830 			Sg_UngetcUnsafe(port, c);
831 			continue;
832 		      }
833 		      );
834       Sg_UngetcUnsafe(port, c);
835       c = read_escape(port, ctx);
836       append_char(buf, out, c, i);
837       continue;
838     } else {
839       append_char(buf, out, c, i);
840     }
841   }
842   out = Sg_ConvertToStringOutputPort(buf, i);
843   goto next;
844   /* lexical_error(port, ctx, UC("token buffer overflow while reading string")); */
845   return SG_UNDEF; /* dummy */
846 }
847 
read_quote(SgPort * port,SgChar c,SgReadContext * ctx)848 SgObject read_quote(SgPort *port, SgChar c, SgReadContext *ctx)
849 {
850   SgObject o = read_expr(port, ctx);
851   if (SG_EQ(o, SG_EOF)) {
852     lexical_error(port, ctx,
853 		  UC("unexpected end-of-file following quotation-mark(')"));
854   }
855   if (SG_PAIRP(o)) {
856     o = Sg_AddConstantLiteral(o);
857   }
858   return SG_LIST2(SG_SYMBOL_QUOTE, o);
859 }
860 
read_quasiquote(SgPort * port,SgChar c,SgReadContext * ctx)861 SgObject read_quasiquote(SgPort *port, SgChar c, SgReadContext *ctx)
862 {
863   SgObject o = read_expr(port, ctx);
864   if (SG_EQ(o, SG_EOF)) {
865     lexical_error(port, ctx,
866 		  UC("unexpected end-of-file following grave-accent(`)"));
867   }
868   return SG_LIST2(SG_SYMBOL_QUASIQUOTE, o);
869 }
870 
read_unquote(SgPort * port,SgChar c,SgReadContext * ctx)871 SgObject read_unquote(SgPort *port, SgChar c, SgReadContext *ctx)
872 {
873   c = Sg_GetcUnsafe(port);
874   if (c == EOF) {
875     lexical_error(port, ctx, UC("unexpected end-of-file following comma(,)"));
876   }
877   if (c == '@')
878     return SG_LIST2(SG_SYMBOL_UNQUOTE_SPLICING, read_expr(port, ctx));
879   Sg_UngetcUnsafe(port, c);
880   return SG_LIST2(SG_SYMBOL_UNQUOTE, read_expr(port, ctx));
881 }
882 
read_quoted_symbol(SgPort * port,SgReadContext * ctx,int interned)883 static SgObject read_quoted_symbol(SgPort *port, SgReadContext *ctx,
884 				   int interned)
885 {
886   SgChar buf[SYMBOL_MAX_SIZE];
887   int i = 0;
888   /* TODO flag check */
889   ctx->escapedp = TRUE;
890   while (i < array_sizeof(buf)) {
891     SgChar c = Sg_GetcUnsafe(port);
892     if (c == EOF) {
893       lexical_error(port, ctx,
894 		    UC("unexpected end-of-file while reading quoted symbol"));
895     }
896     if (c == '|') {
897       SgChar *real = SG_NEW_ATOMIC2(SgChar *, (i + 1) * sizeof(SgChar));
898       buf[i] = 0;
899       memcpy(real, buf, (i + 1) * sizeof(SgChar));
900       return Sg_MakeSymbol(Sg_MakeString(real, SG_LITERAL_STRING, i),
901 			   interned);
902     }
903     if (c == '\\') {
904       c = read_escape(port, ctx);
905     }
906     buf[i++] = c;
907   }
908   lexical_error(port, ctx,
909 		UC("token buffer overflow while reading quoted symbol"));
910   return SG_UNDEF; /* dummy */
911 }
912 
read_colon(SgPort * port,SgChar c,SgReadContext * ctx)913 SgObject read_colon(SgPort *port, SgChar c, SgReadContext *ctx)
914 {
915   int c2 = Sg_GetcUnsafe(port);
916   SgObject name;
917   SgChar buf[SYMBOL_MAX_SIZE];	/* too small? */
918 
919   if (c2 == '|') {
920     name = read_quoted_symbol(port, ctx, FALSE);
921     return Sg_MakeKeyword(SG_SYMBOL(name)->name);
922   } else {
923     long size;
924     Sg_UngetcUnsafe(port, c2);
925     size = read_thing(port, ctx, buf, array_sizeof(buf), -1);
926     buf[size] = 0;
927     return Sg_MakeKeyword(Sg_MakeString(buf, SG_LITERAL_STRING, size));
928   }
929 }
930 
read_vertical_bar(SgPort * port,SgChar c,SgReadContext * ctx)931 SgObject read_vertical_bar(SgPort *port, SgChar c, SgReadContext *ctx)
932 {
933   return read_quoted_symbol(port, ctx, TRUE);
934 }
935 
read_semicolon(SgPort * port,SgChar c,SgReadContext * ctx)936 SgObject read_semicolon(SgPort *port, SgChar c, SgReadContext *ctx)
937 {
938   while ((c = Sg_GetcUnsafe(port)) != EOF) {
939     if (c == LF) return NULL;
940   }
941   return SG_EOF;
942 }
943 
read_hash_quote(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)944 SgObject read_hash_quote(SgPort *port, SgChar c,
945 			 dispmacro_param *param, SgReadContext *ctx)
946 {
947   return SG_LIST2(SG_SYMBOL_SYNTAX, read_expr(port, ctx));
948 }
949 
read_hash_quasiquote(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)950 SgObject read_hash_quasiquote(SgPort *port, SgChar c,
951 			      dispmacro_param *param, SgReadContext *ctx)
952 {
953   return SG_LIST2(SG_SYMBOL_QUASISYNTAX, read_expr(port, ctx));
954 }
955 
read_hash_unquote(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)956 SgObject read_hash_unquote(SgPort *port, SgChar c,
957 			   dispmacro_param *param, SgReadContext *ctx)
958 {
959   c = Sg_GetcUnsafe(port);
960   if (c == EOF) {
961     lexical_error(port, ctx,
962 		  UC("unexpected end-of-file following sharp-comma(,)"));
963   }
964   if (c == '@') {
965     return SG_LIST2(SG_SYMBOL_UNSYNTAX_SPLICING, read_expr(port, ctx));
966   }
967   Sg_UngetcUnsafe(port, c);
968   return SG_LIST2(SG_SYMBOL_UNSYNTAX, read_expr(port, ctx));
969 }
970 
construct_lib_name(SgObject s)971 static SgObject construct_lib_name(SgObject s)
972 {
973   SgObject h = SG_NIL, t = SG_NIL;
974   int i, prev;
975   for (i = 0, prev = 0; i < SG_STRING_SIZE(s); i++) {
976     if (SG_STRING_VALUE_AT(s, i) == '/') {
977       SG_APPEND1(h, t, Sg_Intern(Sg_Substring(s, prev, i)));
978       prev = i + 1;
979     }
980   }
981   if (i != prev) {
982     SG_APPEND1(h, t, Sg_Intern(Sg_Substring(s, prev, i)));
983   }
984   return h;
985 }
986 
987 /* FIXME: we may want to add more directive on runtime (like Gauche) */
Sg_ApplyDirective(SgPort * port,SgObject desc,SgReadContext * ctx)988 SgObject Sg_ApplyDirective(SgPort *port, SgObject desc, SgReadContext *ctx)
989 {
990   SgString *tag = SG_SYMBOL(desc)->name;
991 
992   if (!ctx) {
993     ctx = DEFAULT_CONTEXT;
994   }
995   /* a bit of optimisation... */
996   switch (SG_STRING_VALUE_AT(tag, 0)) {
997   case 'c':
998     if (ustrcmp(tag->value, "compatible") == 0) {
999       SgVM *vm = Sg_VM();
1000       if (ctx->flags & SG_CHANGE_VM_MODE) {
1001 	SG_VM_SET_FLAG(vm, SG_COMPATIBLE_MODE);
1002 	SG_VM_UNSET_FLAG(vm, SG_R6RS_MODE);
1003 	SG_VM_UNSET_FLAG(vm, SG_R7RS_MODE);
1004 	SG_VM_SET_FLAG(vm, SG_ALLOW_OVERWRITE);
1005 	SG_VM_UNSET_FLAG(vm, SG_ERROR_UNBOUND);
1006       }
1007       Sg_SetPortReadTable(port, Sg_CopyReadTable(&compat_read_table));
1008       return desc;
1009     }
1010     if (ustrcmp(tag->value, "core") == 0) {
1011       SgVM *vm = Sg_VM();
1012       if (ctx->flags & SG_CHANGE_VM_MODE) {
1013 	SG_VM_UNSET_FLAG(vm, SG_COMPATIBLE_MODE);
1014 	SG_VM_UNSET_FLAG(vm, SG_R6RS_MODE);
1015 	SG_VM_UNSET_FLAG(vm, SG_R7RS_MODE);
1016 	SG_VM_UNSET_FLAG(vm, SG_ALLOW_OVERWRITE);
1017 	SG_VM_UNSET_FLAG(vm, SG_ERROR_UNBOUND);
1018       }
1019       Sg_SetPortReadTable(port, Sg_CopyReadTable(&compat_read_table));
1020       return desc;
1021     }
1022     if (ustrcmp(tag->value, "cache") == 0) {
1023       if (ctx->flags & SG_CHANGE_VM_MODE) {
1024 	SG_VM_UNSET_FLAG(Sg_VM(), SG_DISABLE_CACHE);
1025       }
1026       return desc;
1027     }
1028     break;
1029   case 'd':
1030     if (ustrcmp(tag->value, "deprecated") == 0) {
1031       Sg_Warn(UC("deprecated file is being loaded %S"), Sg_FileName(port));
1032       return desc;
1033     }
1034     break;
1035   case 'f':
1036     if (ustrcmp(tag->value, "fold-case") == 0) {
1037       ENSURE_COPIED_TABLE(port);
1038       SG_PORT_READTABLE(port)->insensitiveP = TRUE;
1039       /* we need to preserve for include-ci with #!fold-case */
1040       ctx->flags |= SG_READ_NO_CASE;
1041       ctx->flags &= ~SG_READ_CASE;
1042       return desc;
1043     }
1044     break;
1045   case 'n':
1046     if (ustrcmp(tag->value, "no-overwrite") == 0) {
1047       if (ctx->flags & SG_CHANGE_VM_MODE) {
1048 	SG_VM_UNSET_FLAG(Sg_VM(), SG_ALLOW_OVERWRITE);
1049       }
1050       return desc;
1051     }
1052     if (ustrcmp(tag->value, "no-fold-case") == 0) {
1053       ENSURE_COPIED_TABLE(port);
1054       SG_PORT_READTABLE(port)->insensitiveP = FALSE;
1055       ctx->flags &= ~SG_READ_NO_CASE;
1056       ctx->flags |= SG_READ_CASE;
1057       return desc;
1058     }
1059     if (ustrcmp(tag->value, "nocache") == 0) {
1060       if (ctx->flags & SG_CHANGE_VM_MODE) {
1061 	SG_VM_SET_FLAG(Sg_VM(), SG_DISABLE_CACHE);
1062       }
1063       return desc;
1064     }
1065     if (ustrcmp(tag->value, "noinlineasm") == 0) {
1066       if (ctx->flags & SG_CHANGE_VM_MODE) {
1067 	SG_VM_SET_FLAG(Sg_VM(), SG_NO_INLINE_ASM);
1068       }
1069       return desc;
1070     }
1071     if (ustrcmp(tag->value, "noinlinelocal") == 0) {
1072       if (ctx->flags & SG_CHANGE_VM_MODE) {
1073 	SG_VM_SET_FLAG(Sg_VM(), SG_NO_INLINE_LOCAL);
1074       }
1075       return desc;
1076     }
1077     if (ustrcmp(tag->value, "nolambdalifting") == 0) {
1078       if (ctx->flags & SG_CHANGE_VM_MODE) {
1079 	SG_VM_SET_FLAG(Sg_VM(), SG_NO_LAMBDA_LIFT);
1080       }
1081       return desc;
1082     }
1083     if (ustrcmp(tag->value, "nooptimization") == 0) {
1084       SgVM *vm = Sg_VM();
1085       if (ctx->flags & SG_CHANGE_VM_MODE) {
1086 	SG_VM_SET_FLAG(vm, SG_NO_INLINE_ASM);
1087 	SG_VM_SET_FLAG(vm, SG_NO_INLINE_LOCAL);
1088 	SG_VM_SET_FLAG(vm, SG_NO_LAMBDA_LIFT);
1089       }
1090       return desc;
1091     }
1092     if (ustrcmp(tag->value, "nobacktrace") == 0) {
1093       SgVM *vm = Sg_VM();
1094       if (ctx->flags & SG_CHANGE_VM_MODE) {
1095 	SG_VM_SET_FLAG(vm, SG_NO_DEBUG_INFO);
1096       }
1097       return desc;
1098     }
1099     if (ustrncmp(tag->value, "nounbound", 7) == 0) {
1100       SgVM *vm = Sg_VM();
1101       if (ctx->flags & SG_CHANGE_VM_MODE) {
1102 	SG_VM_SET_FLAG(vm, SG_ERROR_UNBOUND);
1103       }
1104       return desc;
1105     }
1106     break;
1107   case 'o':
1108     if (ustrcmp(tag->value, "overwrite") == 0) {
1109       if (ctx->flags & SG_CHANGE_VM_MODE) {
1110 	SG_VM_SET_FLAG(Sg_VM(), SG_ALLOW_OVERWRITE);
1111       }
1112       return desc;
1113     }
1114     break;
1115   case 'r':
1116     if (ustrcmp(tag->value, "r6rs") == 0) {
1117       SgVM *vm = Sg_VM();
1118       if (ctx->flags & SG_CHANGE_VM_MODE) {
1119 	SG_VM_SET_FLAG(vm, SG_R6RS_MODE);
1120 	SG_VM_UNSET_FLAG(vm, SG_ALLOW_OVERWRITE);
1121 	SG_VM_UNSET_FLAG(vm, SG_R7RS_MODE);
1122 	SG_VM_UNSET_FLAG(vm, SG_COMPATIBLE_MODE);
1123 	SG_VM_SET_FLAG(vm, SG_ERROR_UNBOUND);
1124       }
1125       Sg_SetPortReadTable(port, Sg_CopyReadTable(&r6rs_read_table));
1126       return desc;
1127     }
1128     if (ustrcmp(tag->value, "r7rs") == 0) {
1129       SgVM *vm = Sg_VM();
1130       if (ctx->flags & SG_CHANGE_VM_MODE) {
1131 	SG_VM_SET_FLAG(vm, SG_R7RS_MODE);
1132 	SG_VM_UNSET_FLAG(vm, SG_ALLOW_OVERWRITE);
1133 	SG_VM_UNSET_FLAG(vm, SG_COMPATIBLE_MODE);
1134 	SG_VM_UNSET_FLAG(vm, SG_R6RS_MODE);
1135 	/* TODO should we? */
1136 	SG_VM_UNSET_FLAG(vm, SG_ERROR_UNBOUND);
1137       }
1138       Sg_SetPortReadTable(port, Sg_CopyReadTable(&r7rs_read_table));
1139       return desc;
1140     }
1141     if (ustrncmp(tag->value, "reader=", 7) == 0) {
1142       SgObject name = construct_lib_name(Sg_Substring(tag, 7, -1));
1143       SgObject lib = Sg_FindLibrary(name, FALSE);
1144       /* should we raise error or not? */
1145       if (SG_FALSEP(lib)) {
1146 	lexical_error(port, ctx, UC("no library named %S"), name);
1147 	return desc;
1148       }
1149       if (!SG_FALSEP(SG_LIBRARY_READER(lib))) {
1150 	SG_PORT_READER(port) = SG_LIBRARY_READER(lib);
1151       }
1152       /* to let replaced reader read next expression, otherwise current
1153 	 reader keep reading the next one.
1154       */
1155       return SG_UNDEF;
1156     }
1157     /* for portability with other implementation */
1158     if (ustrncmp(tag->value, "read-macro=", 11) == 0) {
1159       SgObject name = construct_lib_name(Sg_Substring(tag, 11, -1));
1160       SgObject lib = Sg_FindLibrary(name, FALSE);
1161       /* should we raise error or not? */
1162       if (SG_FALSEP(lib)) {
1163 	lexical_error(port, ctx, UC("no library named %S"), name);
1164 	return desc;
1165       }
1166       if (SG_LIBRARY_READTABLE(lib)) {
1167 	ENSURE_COPIED_TABLE(port);
1168 	add_read_table(SG_LIBRARY_READTABLE(lib), Sg_PortReadTable(port));
1169       }
1170       return desc;
1171     }
1172     break;
1173   case 'u':
1174     if (ustrncmp(tag->value, "unbound", 7) == 0) {
1175       SgVM *vm = Sg_VM();
1176       if (ctx->flags & SG_CHANGE_VM_MODE) {
1177 	SG_VM_UNSET_FLAG(vm, SG_ERROR_UNBOUND);
1178       }
1179       return desc;
1180     }
1181     break;
1182   default: break;
1183   }
1184   return desc;			/* for convenience */
1185 }
1186 
1187 /*
1188   TODO: some SRFIs (as far as I know only 105) requires speciall sh-bang
1189   it's better to have hook or something.
1190  */
read_hash_bang(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1191 SgObject read_hash_bang(SgPort *port, SgChar c, dispmacro_param *param,
1192 			SgReadContext *ctx)
1193 {
1194   SgChar c2 = Sg_GetcUnsafe(port);
1195   /* if it starts with '#!/' or '#! ' we assume it's a script */
1196   if (c2 == '/' || c2 == ' ') {
1197     for (;;) {
1198       c2 = Sg_GetcUnsafe(port);
1199       /* internal eol-style is LF */
1200       if (c2 == LF) return NULL;
1201       if (c2 == EOF) return SG_EOF;
1202     }
1203     /* NOTREACHED */
1204   } else {
1205     readtable_t *table = Sg_PortReadTable(port);
1206     SgObject desc = read_symbol_or_number(port, c2, table, ctx);
1207     if (SG_SYMBOLP(desc)) {
1208       Sg_ApplyDirective(port, desc, ctx);
1209     }
1210     return NULL;
1211   }
1212 }
read_hash_bar(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1213 SgObject read_hash_bar(SgPort *port, SgChar c, dispmacro_param *param,
1214 		       SgReadContext *ctx)
1215 {
1216   SgChar c1, c2;
1217   int nest = 0;
1218 
1219  seek_c1:
1220   c1 = Sg_GetcUnsafe(port);
1221 
1222  seek_c2:
1223   c2 = Sg_GetcUnsafe(port);
1224   if (c2 == EOF) {
1225     lexical_error(port, ctx,
1226 		  UC("unexpected end-of-file while reading comments"));
1227   }
1228   if (c1 == '|' && c2 == '#') {
1229     if (nest == 0) return NULL;
1230     nest -= 1;
1231     goto seek_c1;
1232   }
1233   if (c1 == '#' && c2 == '|') {
1234     nest += 1;
1235     goto seek_c1;
1236   }
1237   c1 = c2;
1238   if (c1 == '|' || c1 == '#') goto seek_c2;
1239   goto seek_c1;
1240 }
1241 
read_bytevector(SgPort * port,SgChar * buf,SgReadContext * ctx)1242 static SgObject read_bytevector(SgPort *port, SgChar *buf, SgReadContext *ctx)
1243 {
1244 #define CAST_FIXNUM_TO_U8(DATUM, REF)					\
1245   ((SG_INT_VALUE(datum) >= 0 && SG_INT_VALUE(datum) <= UINT8_MAX)	\
1246    ? (*ref = (SG_INT_VALUE(datum) & 0xFF), TRUE) : FALSE)
1247 #define READ_BVECTOR(s_type, c_type, s_type_test, c_type_test)		\
1248   do {									\
1249     if (ustrcmp(buf, s_type) == 0) {					\
1250       long m = (long)(n * sizeof(c_type));				\
1251       long i;								\
1252       SgByteVector *bvector = Sg_MakeByteVector(m, 0);			\
1253       for (i = 0; i < m; i += (long)sizeof(c_type)) {			\
1254 	SgObject datum = SG_CAR(lst);					\
1255 	if ( s_type_test (datum)) {					\
1256 	  c_type * ref = (c_type *)&bvector->elements[i];		\
1257 	  if ( c_type_test (datum, ref)) {				\
1258 	    lst = SG_CDR(lst);						\
1259 	    continue;							\
1260 	  }								\
1261 	}								\
1262 	lexical_error(port, ctx, UC("expected " s_type ", but got %S"),	\
1263 		      SG_CAR(lst));					\
1264       }									\
1265       bvector = Sg_AddConstantLiteral(bvector);				\
1266       return bvector;							\
1267     }									\
1268   } while (0)
1269 
1270   long line_begin = Sg_LineNo(port);
1271   long n;
1272   SgObject lst = read_list(port, ')', ctx);
1273   parsing_range(ctx, line_begin, Sg_LineNo(port));
1274   n = Sg_Length(lst);
1275   READ_BVECTOR("u8", uint8_t, SG_INTP, CAST_FIXNUM_TO_U8);
1276   lexical_error(port, ctx, UC("invalid lexical syntax #v%s ..."), buf);
1277 #undef CAST_FIXNUM_TO_U8
1278 #undef READ_BVECTOR
1279   return SG_UNDEF;		/* dummy */
1280 }
1281 
read_hash_v(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1282 SgObject read_hash_v(SgPort *port, SgChar c, dispmacro_param *param,
1283 		     SgReadContext *ctx)
1284 {
1285   SgChar buf[16] = {0};
1286   int i;
1287   for (i = 0; i < 2; i++) {
1288     buf[i] = Sg_GetcUnsafe(port);
1289     if (buf[i] == EOF || delimited(port, buf[i])) break;
1290   }
1291   if (i != 2) {
1292     lexical_error(port, ctx, UC("invalid lexical syntax #v%s%A ..."), buf,
1293 		  SG_MAKE_CHAR(c));
1294   }
1295   c = Sg_GetcUnsafe(port);
1296   if (c == '(') {
1297     return read_bytevector(port, buf, ctx);
1298   }
1299   lexical_error(port, ctx, UC("invalid lexical syntax #v%s%A ..."), buf,
1300 		SG_MAKE_CHAR(c));
1301   return SG_UNDEF;		/* dummy */
1302 }
1303 
read_hash_u(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1304 SgObject read_hash_u(SgPort *port, SgChar c, dispmacro_param *param,
1305 		     SgReadContext *ctx)
1306 {
1307   SgChar buf[16] = {0};
1308   buf[0] = 'u';
1309   buf[1] = Sg_GetcUnsafe(port);
1310   if (buf[1] != '8') {
1311     lexical_error(port, ctx, UC("invalid lexical syntax #%s ..."), buf);
1312   }
1313   c = Sg_GetcUnsafe(port);
1314   if (c == '(') {
1315     return read_bytevector(port, buf, ctx);
1316   }
1317   lexical_error(port, ctx, UC("invalid lexical syntax #%s%A ..."), buf,
1318 		SG_MAKE_CHAR(c));
1319   return SG_UNDEF;		/* dummy */
1320 }
1321 
read_hash_t(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1322 SgObject read_hash_t(SgPort *port, SgChar c, dispmacro_param *param,
1323 		     SgReadContext *ctx)
1324 {
1325   SgChar c2 = Sg_GetcUnsafe(port);
1326   readtable_t *table = Sg_PortReadTable(port);
1327   if (c2 == EOF || delimited(port, c2)) {
1328     Sg_UngetcUnsafe(port, c2);
1329     return SG_TRUE;
1330   }
1331   /* R7RS allow #true so we need to check */
1332   if ((c == 't' && c2 == 'r') ||
1333       (table->insensitiveP && (c2 == 'r' || c2 == 'R'))) {
1334     SgObject rest = read_compatible_symbol(port, c2, ctx, NULL);
1335     if (ustrcmp(SG_STRING_VALUE(rest), "rue") == 0) {
1336       return SG_TRUE;
1337     }
1338   }
1339   lexical_error(port, ctx, UC("invalid lexical syntax #%S%S"),
1340 		SG_MAKE_CHAR(c), SG_MAKE_CHAR(c2));
1341   return SG_UNDEF;		/* dummy */
1342 }
1343 
read_hash_f(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1344 SgObject read_hash_f(SgPort *port, SgChar c, dispmacro_param *param,
1345 		     SgReadContext *ctx)
1346 {
1347   SgChar c2 = Sg_GetcUnsafe(port);
1348   readtable_t *table = Sg_PortReadTable(port);
1349   if (c2 == EOF || delimited(port, c2)) {
1350     Sg_UngetcUnsafe(port, c2);
1351     return SG_FALSE;
1352   }
1353   /* R7RS allow #false so we need to check */
1354   if ((c == 'f' && c2 == 'a') ||
1355       (table->insensitiveP && (c2 == 'a' || c2 == 'A'))) {
1356     SgObject rest = read_compatible_symbol(port, c2, ctx, NULL);
1357     if (ustrcmp(SG_STRING_VALUE(rest), "alse") == 0) {
1358       return SG_FALSE;
1359     }
1360   }
1361   lexical_error(port, ctx, UC("invalid lexical syntax #%S%S"),
1362 		SG_MAKE_CHAR(c), SG_MAKE_CHAR(c2));
1363   return SG_UNDEF;		/* dummy */
1364 }
1365 
read_prefixed_number(SgChar initial,SgPort * port,SgReadContext * ctx)1366 static SgObject read_prefixed_number(SgChar initial, SgPort *port,
1367 				     SgReadContext *ctx)
1368 {
1369   SgChar buf[4096];
1370   SgString *str;
1371   SgObject num;
1372   SgChar c;
1373   int offset = 0;
1374   buf[offset++] = '#';
1375   /* check if it's #%c#%c type or not */
1376   c = Sg_PeekcUnsafe(port);
1377   if (c == '#') {
1378     buf[offset++] = initial;
1379     buf[offset++] = c;
1380     Sg_GetcUnsafe(port);	/* discard */
1381     initial = Sg_GetcUnsafe(port);
1382   }
1383   read_thing(port, ctx, buf + offset, array_sizeof(buf) - offset, initial);
1384   str = Sg_HeapString(buf);
1385   num = Sg_StringToNumber(str, 10, TRUE);
1386   if (SG_FALSEP(num)) {
1387     lexical_error(port, ctx,
1388 		  UC("invalid lexical syntax %s while reading number"), buf);
1389   }
1390   return num;
1391 }
1392 
read_hash_b(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1393 SgObject read_hash_b(SgPort *port, SgChar c, dispmacro_param *param,
1394 			    SgReadContext *ctx)
1395 {
1396   return read_prefixed_number(c, port, ctx);
1397 }
1398 
read_hash_o(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1399 SgObject read_hash_o(SgPort *port, SgChar c, dispmacro_param *param,
1400 		     SgReadContext *ctx)
1401 {
1402   return read_prefixed_number(c, port, ctx);
1403 }
read_hash_d(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1404 SgObject read_hash_d(SgPort *port, SgChar c, dispmacro_param *param,
1405 		     SgReadContext *ctx)
1406 {
1407   return read_prefixed_number(c, port, ctx);
1408 }
read_hash_x(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1409 SgObject read_hash_x(SgPort *port, SgChar c, dispmacro_param *param,
1410 		     SgReadContext *ctx)
1411 {
1412   return read_prefixed_number(c, port, ctx);
1413 }
read_hash_i(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1414 SgObject read_hash_i(SgPort *port, SgChar c, dispmacro_param *param,
1415 		     SgReadContext *ctx)
1416 {
1417   return read_prefixed_number(c, port, ctx);
1418 }
read_hash_e(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1419 SgObject read_hash_e(SgPort *port, SgChar c, dispmacro_param *param,
1420 		     SgReadContext *ctx)
1421 {
1422   return read_prefixed_number(c, port, ctx);
1423 }
read_hash_open_paren(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1424 SgObject read_hash_open_paren(SgPort *port, SgChar c, dispmacro_param *param,
1425 			      SgReadContext *ctx)
1426 {
1427   SgObject v = Sg_ListToVector(read_list(port, ')', ctx), 0, -1);
1428   v = Sg_AddConstantLiteral(v);
1429   return v;
1430 }
1431 
read_hash_semicolon(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1432 SgObject read_hash_semicolon(SgPort *port, SgChar c,
1433 			     dispmacro_param *param, SgReadContext *ctx)
1434 {
1435   read_expr(port, ctx);
1436   return NULL;
1437 }
1438 
1439 static const struct {
1440   const char* name;
1441   int code;
1442 } s_char_name[] = {
1443   { "nul",        0x0000 },
1444   { "null",       0x0000 },
1445   { "alarm",      0x0007 },
1446   { "backspace",  0x0008 },
1447   { "tab",        0x0009 },
1448   { "linefeed",   0x000A },
1449   { "newline",    0x000A },
1450   { "vtab",       0x000B },
1451   { "page",       0x000C },
1452   { "return",     0x000D },
1453   { "esc",        0x001B },
1454   { "escape",     0x001B },
1455   { "space",      0x0020 },
1456   { "delete",     0x007F }
1457 };
1458 
read_hash_escape(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1459 SgObject read_hash_escape(SgPort *port, SgChar c, dispmacro_param *param,
1460 			  SgReadContext *ctx)
1461 {
1462   c = Sg_GetcUnsafe(port);
1463   if (c == 'x') {
1464     c = Sg_PeekcUnsafe(port);
1465     if (c == EOF || delimited(port, c)) return SG_MAKE_CHAR('x');
1466     return SG_MAKE_CHAR(read_hex_scalar_value(port, ctx));
1467   } else {
1468     SgChar buf[16];
1469     int i;
1470     if (c == '(') {
1471       c = Sg_PeekcUnsafe(port);
1472       if (c == EOF || delimited(port, c)) return SG_MAKE_CHAR('(');
1473       read_thing(port, ctx, buf, array_sizeof(buf), -1);
1474       lexical_error(port, ctx, UC("invalid lexical syntax #\\(%s"), buf);
1475     }
1476     Sg_UngetcUnsafe(port, c);
1477     read_thing(port, ctx, buf, array_sizeof(buf), -1);
1478     if (buf[0] == 0) {
1479       c = Sg_GetcUnsafe(port);
1480       if (c == EOF) {
1481 	lexical_error(port, ctx,
1482 		      UC("unexpected end-of-file while reading character"));
1483       }
1484       return SG_MAKE_CHAR(c);
1485     }
1486     if (buf[1] == 0) return SG_MAKE_CHAR(buf[0]);
1487     for (i = 0; i < array_sizeof(s_char_name); i++) {
1488       if (ustrcmp(buf, s_char_name[i].name) == 0)
1489 	return SG_MAKE_CHAR(s_char_name[i].code);
1490     }
1491     /* I assume this is not happen */
1492     lexical_error(port, ctx, UC("invalid lexical syntax #\\%s"), buf);
1493   }
1494   return SG_UNDEF; /* dummy */
1495 }
1496 
read_hash_equal(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1497 SgObject read_hash_equal(SgPort *port, SgChar c, dispmacro_param *param,
1498 			 SgReadContext *ctx)
1499 {
1500   if (!ctx->graph) {
1501     lexical_error(port, ctx, UC("invalid lexical syntax #="));
1502   }
1503   if (param->present) {
1504     SgObject obj = read_expr(port, ctx);
1505     intptr_t mark = param->value;
1506     if (SG_EOFP(obj)) {
1507       lexical_error(port, ctx,
1508 		    UC("unexpected end-of-file while reading tag #%ld="), mark);
1509     }
1510     if (SG_UNDEFP(Sg_HashTableRef(ctx->graph, SG_MAKE_INT(mark), SG_UNDEF))) {
1511       Sg_HashTableSet(ctx->graph, SG_MAKE_INT(mark), obj, 0);
1512       return obj;
1513     }
1514     lexical_error(port, ctx, UC("duplicate tag #%ld="), mark);
1515   } else {
1516     lexical_error(port, ctx,
1517 		  UC("invalid lexical syntax %S"), SG_MAKE_CHAR(c));
1518   }
1519   return SG_UNDEF;		/* dummy */
1520 }
1521 
read_hash_hash(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1522 SgObject read_hash_hash(SgPort *port, SgChar c, dispmacro_param *param,
1523 			SgReadContext *ctx)
1524 {
1525   if (param->present) {
1526     long mark = param->value;
1527     SgSharedRef *ref = make_shared_ref(mark);
1528     ctx->graphRef = TRUE;
1529     return SG_OBJ(ref);
1530   } else {
1531     lexical_error(port, ctx,
1532 		  UC("invalid lexical syntax %S"), SG_MAKE_CHAR(c));
1533   }
1534   return SG_UNDEF;		/* dummy */
1535 }
1536 
read_hash_less(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1537 SgObject read_hash_less(SgPort *port, SgChar c, dispmacro_param *param,
1538 			SgReadContext *ctx)
1539 {
1540   /* #<(library) ...> imports readtables */
1541   SgObject libs = read_list(port, '>', ctx);
1542   SgObject cp;
1543   SG_FOR_EACH(cp, libs) {
1544     SgObject name = SG_CAR(cp);
1545     if (SG_PAIRP(name)) {
1546       SgObject lib = Sg_FindLibrary(name, FALSE);
1547       if (SG_FALSEP(lib)) {
1548 	lexical_error(port, ctx, UC("no library named %S"), name);
1549 	return NULL;
1550       }
1551       if (SG_LIBRARY_READTABLE(lib)) {
1552 	ENSURE_COPIED_TABLE(port);
1553 	add_read_table(SG_LIBRARY_READTABLE(lib), Sg_PortReadTable(port));
1554       }
1555     } else {
1556       lexical_error(port, ctx,
1557 		    UC("library name required but got %S"), name);
1558       return NULL;
1559     }
1560   }
1561   return NULL;
1562 }
1563 
read_hash_colon(SgPort * port,SgChar c,dispmacro_param * param,SgReadContext * ctx)1564 SgObject read_hash_colon(SgPort *port, SgChar c, dispmacro_param *param,
1565 			 SgReadContext *ctx)
1566 {
1567   /* TODO how to handle |? */
1568   SgString *s = read_compatible_symbol(port, -1, ctx, NULL);
1569   return Sg_MakeSymbol(s, FALSE);
1570 }
1571 
dispmacro_reader(SgPort * port,SgChar c,SgReadContext * ctx)1572 SgObject dispmacro_reader(SgPort *port, SgChar c, SgReadContext *ctx)
1573 {
1574   readtable_t *table;
1575   disptab_t *disptab;
1576   if (c >= MAX_READTABLE_CHAR) {
1577     lexical_error(port, ctx, UC("macro char %S is out of range"),
1578 		  SG_MAKE_CHAR(c));
1579   }
1580   table = Sg_PortReadTable(port);
1581   disptab = table->readtable[c].disp;
1582   if (!disptab) {
1583     lexical_error(port, ctx,
1584 		  UC("%S is not a dispatch macro character"), SG_MAKE_CHAR(c));
1585   } else {
1586     dispmacro_param param;
1587     SgChar c2 = Sg_GetcUnsafe(port);
1588     if (c2 >= '0' && c2 <= '9') {
1589       param.present = TRUE;
1590       param.value = c2 - '0';
1591       while (1) {
1592 	c2 = Sg_GetcUnsafe(port);
1593 	if (c2 < '0' || c2 > '9') break;
1594 	param.value = param.value * 10 + c2 - '0';
1595 	if (param.value < 0 || (long)param.value > SG_INT_MAX) {
1596 	  lexical_error(port, ctx,
1597 			UC("invalid object tag, value out of range"));
1598 	}
1599       }
1600     } else {
1601       param.present = FALSE;
1602     }
1603     if (c2 == EOF) {
1604       lexical_error(port, ctx, UC("imcoplete dispatch macro"));
1605     }
1606     if (c2 >= MAX_READTABLE_CHAR) {
1607       lexical_error(port, ctx, UC("macro sub char %S is out of range"),
1608 		    SG_MAKE_CHAR(c2));
1609     }
1610     if (disptab[c2].cfunc) {
1611       return (*disptab[c2].cfunc)(port, c2, &param, ctx);
1612     }
1613     if (SG_EQ(disptab[c2].sfunc, SG_UNBOUND)) {
1614       lexical_error(port, ctx, UC("%S is not a dispatch macro sub character"),
1615 		    SG_MAKE_CHAR(c2));
1616     }
1617     /* FIXME: we should eliminate this type of dispatch. */
1618     if (SG_PROCEDURE_REQUIRED(disptab[c2].sfunc) > 3) {
1619       return Sg_Apply4(disptab[c2].sfunc, port, SG_MAKE_CHAR(c2),
1620 		       param.present ? SG_MAKE_INT(param.value) : SG_FALSE,
1621 		       ctx);
1622     } else {
1623       return Sg_Apply3(disptab[c2].sfunc, port, SG_MAKE_CHAR(c2),
1624 		       param.present ? SG_MAKE_INT(param.value) : SG_FALSE);
1625     }
1626   }
1627   return SG_UNDEF;		/* dummy */
1628 }
1629 
read_expr4(SgPort * port,int flags,SgChar delim,SgReadContext * ctx)1630 SgObject read_expr4(SgPort *port, int flags, SgChar delim, SgReadContext *ctx)
1631 {
1632   SgChar c;
1633   readtable_t *table;
1634   SgObject item;
1635   SgVM *vm = Sg_VM();
1636   while (1) {
1637   top:
1638     /* when previous execution was (values), then valuesCount = 0 and
1639        this skips all read expression. to prevent that we need to
1640        set it 1 here.
1641        this call must return a value anyway :)
1642     */
1643     vm->valuesCount = 1;
1644     c = Sg_GetcUnsafe(port);
1645     /* we ignore unicode space for now. */
1646     if (c == EOF) {
1647       if (flags & ACCEPT_EOF)
1648 	return SG_EOF;
1649       lexical_error(port, ctx, UC("unexpected end-of-file"));
1650     }
1651     if (c > 127 && Sg_Ucs4WhiteSpaceP(c)) goto top;
1652     /* for reading list it does not matter either ')' or ']'
1653        if we got delmiter we can simply return RPAREN*/
1654     if (c == delim) return SG_SYMBOL_RPAREN;
1655 
1656     parsing_line(ctx, Sg_LineNo(port));
1657     /* need to be re-get for #!read-macro */
1658     table = Sg_PortReadTable(port);
1659     if (c < 128 && isdigit(c)) {
1660       return read_symbol_or_number(port, c, table, ctx);
1661     }
1662 
1663     if (c < MAX_READTABLE_CHAR) {
1664       /* lookup readtable */
1665       switch (table->readtable[c].type) {
1666       case CT_WHITE_SPACE: goto top;
1667       case CT_SINGLE_ESCAPE:
1668 	/* TODO how to treat? */
1669 	Sg_UngetcUnsafe(port, c);
1670 	return read_escaped_symbol(port, ctx);
1671       case CT_TERM_MACRO:
1672       case CT_NON_TERM_MACRO: {
1673 	SgObject o = macro_reader(port, c, table->readtable, ctx);
1674 	/* if the (values) is the result of reader then we ignore the result */
1675 	if (o && vm->valuesCount) return o;
1676 	break;
1677       }
1678       case CT_ILLEGAL:
1679 	lexical_error(port, ctx,
1680 		      UC("invalid character %U during reading identifier"), c);
1681 	break;
1682       default:
1683 	goto read_sym_or_num;
1684       }
1685     } else {
1686       goto read_sym_or_num;
1687     }
1688   }
1689  read_sym_or_num:
1690   item = read_symbol_or_number(port, c, table, ctx);
1691   if (!ctx->escapedp && SG_EQ(item, SG_SYMBOL_DOT)) {
1692     if (flags & ACCEPT_DOT) return item;
1693     lexical_error(port, ctx, UC("misplaced dot('.')"));
1694   }
1695   return item;
1696 }
lookup_graph(SgPort * port,SgReadContext * ctx,SgSharedRef * ref)1697 static SgObject lookup_graph(SgPort *port, SgReadContext *ctx,
1698 			     SgSharedRef *ref)
1699 {
1700   SgObject obj = Sg_HashTableRef(ctx->graph, ref->index, SG_UNDEF);
1701   if (SG_SHAREDREF_P(obj)) return lookup_graph(port, ctx, SG_SHAREDREF(obj));
1702   if (obj != SG_UNDEF) return obj;
1703   lexical_error(port, ctx, UC("attempt to reference undefined tag #%A#"),
1704 		ref->index);
1705   return SG_UNDEF; /* dummy */
1706 }
1707 
link_graph(SgPort * port,SgReadContext * ctx,SgObject obj)1708 static void link_graph(SgPort *port, SgReadContext *ctx, SgObject obj)
1709 {
1710   if (SG_PAIRP(obj)) {
1711     if (SG_SHAREDREF_P(SG_CAR(obj))) {
1712       SG_SET_CAR(obj, lookup_graph(port, ctx, SG_SHAREDREF(SG_CAR(obj))));
1713     } else {
1714       link_graph(port, ctx, SG_CAR(obj));
1715     }
1716     if (SG_SHAREDREF_P(SG_CDR(obj))) {
1717       SG_SET_CDR(obj, lookup_graph(port, ctx, SG_SHAREDREF(SG_CDR(obj))));
1718     } else {
1719       link_graph(port, ctx, SG_CDR(obj));
1720     }
1721     return;
1722   }
1723   if (SG_VECTORP(obj)) {
1724     long n = SG_VECTOR_SIZE(obj), i;
1725     for (i = 0; i < n; i++) {
1726       if (SG_SHAREDREF_P(SG_VECTOR_ELEMENT(obj, i))) {
1727 	SG_VECTOR_ELEMENT(obj, i)
1728 	  = lookup_graph(port, ctx, SG_SHAREDREF(SG_VECTOR_ELEMENT(obj, i)));
1729       } else {
1730 	link_graph(port, ctx, SG_VECTOR_ELEMENT(obj, i));
1731       }
1732     }
1733     return;
1734   }
1735 }
1736 
Sg_ReadWithContext(SgObject port,SgReadContext * ctx)1737 SgObject Sg_ReadWithContext(SgObject port, SgReadContext *ctx)
1738 {
1739   SgObject obj;
1740   /* extends_loading_table(port); */
1741   if (ctx->graph) {
1742     /* clear it */
1743     Sg_HashCoreClear(SG_HASHTABLE_CORE(ctx->graph), 0);
1744   }
1745   /* we only set if the flag is explicitly set.
1746      this makes #!fold-case one time only. */
1747   if (ctx->flags & SG_READ_NO_CASE ||
1748       ctx->flags & SG_READ_CASE) {
1749     ENSURE_COPIED_TABLE(port);
1750     /* one or the other */
1751     SG_PORT_READTABLE(port)->insensitiveP = (ctx->flags & SG_READ_NO_CASE);
1752   }
1753 
1754   ctx->firstLine = Sg_LineNo(port);
1755   obj = read_expr4(port, ACCEPT_EOF, EOF, ctx);
1756   if (!ctx->escapedp && SG_EQ(obj, SG_SYMBOL_DOT)) {
1757     lexical_error(port, ctx, UC("misplaced dot('.')"));
1758   }
1759   if (ctx->graph && ctx->graphRef) link_graph(port, ctx, obj);
1760   parsing_range(ctx, ctx->firstLine, Sg_LineNo(port));
1761   return obj;
1762 }
1763 
Sg_Read(SgObject port,int readSharedObject)1764 SgObject Sg_Read(SgObject port, int readSharedObject)
1765 {
1766   SgReadContext ctx = SG_STATIC_READ_CONTEXT;
1767   ASSERT(SG_PORTP(port));
1768   /* ASSERT(SG_TEXTUAL_PORTP(port)); */
1769   /* make read context for shared object */
1770   if (readSharedObject) {
1771     SgHashTable graph;
1772     Sg_InitHashTableSimple(&graph, SG_HASH_EQ, 1);
1773     ctx.graph = &graph;
1774   }
1775   ctx.graphRef = FALSE;
1776   ctx.flags = SG_READ_SOURCE_INFO;
1777   return Sg_ReadWithContext(SG_PORT(port), &ctx);
1778 }
1779 
Sg_ReadDelimitedList(SgObject port,SgChar delim,int sharedP)1780 SgObject Sg_ReadDelimitedList(SgObject port, SgChar delim, int sharedP)
1781 {
1782   SgObject obj;
1783   SgReadContext ctx = SG_STATIC_READ_CONTEXT;
1784   ASSERT(SG_PORTP(port));
1785 
1786   /* extends_loading_table(port); */
1787   /* make read context for shared object */
1788   if (sharedP) {
1789     SgHashTable graph;
1790     Sg_InitHashTableSimple(&graph, SG_HASH_EQ, 1);
1791     ctx.graph = &graph;
1792   }
1793   ctx.graphRef = FALSE;
1794   ctx.firstLine = Sg_LineNo(port);
1795   obj = read_list(port, delim, &ctx);
1796   if (ctx.graph && ctx.graphRef) link_graph(port, &ctx, obj);
1797 
1798   return obj;
1799 }
1800 
1801 static disptab_t* alloc_disptab();
1802 
make_readtable(int init)1803 static readtable_t* make_readtable(int init)
1804 {
1805   readtable_t *tab = SG_NEW(readtable_t);
1806   tab->insensitiveP = FALSE;
1807   if (init) {
1808     readtab_t *r = tab->readtable;
1809     int i;
1810     for (i = 0; i <= ' '; i++) {
1811       r[i].type = CT_ILLEGAL;
1812       r[i].cfunc = NULL;
1813       r[i].sfunc = SG_UNBOUND;
1814       r[i].disp = NULL;
1815     }
1816     for (; i < MAX_READTABLE_CHAR; i++) {
1817       r[i].type = CT_CONSTITUENT;
1818       r[i].cfunc = NULL;
1819       r[i].sfunc = SG_UNBOUND;
1820       r[i].disp = NULL;
1821     }
1822   }
1823   return tab;
1824 }
1825 
add_read_table(readtable_t * src,readtable_t * dst)1826 void add_read_table(readtable_t *src, readtable_t *dst)
1827 {
1828   int i;
1829   readtab_t *sr = src->readtable;
1830   readtab_t *dr = dst->readtable;
1831   for (i = 0; i < MAX_READTABLE_CHAR; i++) {
1832     if (sr[i].disp) {
1833       int j;
1834       disptab_t *d = dr[i].disp;
1835       /* TODO should not be overwritten? */
1836       dr[i] = sr[i];
1837       if (!d) dr[i].disp = alloc_disptab();
1838       else dr[i].disp = d;
1839 
1840       for (j = 0; j < MAX_READTABLE_CHAR; j++) {
1841 	if (!SG_UNBOUNDP(sr[i].disp[j].sfunc) || sr[i].disp[j].cfunc) {
1842 	  dr[i].disp[j] = sr[i].disp[j];
1843 	}
1844       }
1845     } else if (sr[i].cfunc) {
1846       dr[i] = sr[i];
1847     } else if (!SG_UNBOUNDP(sr[i].sfunc)) {
1848       dr[i] = sr[i];
1849     }
1850   }
1851 }
1852 
default_readtable(int copyP)1853 static readtable_t* default_readtable(int copyP)
1854 {
1855   readtable_t* table;
1856   if (SG_VM_IS_SET_FLAG(Sg_VM(), SG_R6RS_MODE)) {
1857     table = &r6rs_read_table;
1858   } else if (SG_VM_IS_SET_FLAG(Sg_VM(), SG_R7RS_MODE)) {
1859     table = &r7rs_read_table;
1860   } else {
1861     table = &compat_read_table;
1862   }
1863   if (copyP) {
1864     return Sg_CopyReadTable(table);
1865   } else {
1866     return table;
1867   }
1868 }
1869 
Sg_DefaultReadTable()1870 readtable_t* Sg_DefaultReadTable()
1871 {
1872   return default_readtable(TRUE);
1873 }
1874 
Sg_SetPortReadTable(SgPort * port,readtable_t * table)1875 void Sg_SetPortReadTable(SgPort *port, readtable_t *table)
1876 {
1877   SG_PORT_READTABLE(port) = table;
1878 }
1879 
Sg_PortReadTable(SgPort * port)1880 readtable_t* Sg_PortReadTable(SgPort *port)
1881 {
1882   readtable_t* table = SG_PORT_READTABLE(port);
1883   if (table) return table;
1884   return default_readtable(FALSE);
1885 }
1886 
Sg_EnsureCopiedReadTable(SgPort * port)1887 readtable_t* Sg_EnsureCopiedReadTable(SgPort *port)
1888 {
1889   ENSURE_COPIED_TABLE(port);
1890   return SG_PORT_READTABLE(port);
1891 }
1892 
Sg_CopyReadTable(readtable_t * src)1893 readtable_t* Sg_CopyReadTable(readtable_t *src)
1894 {
1895   readtable_t *newr = make_readtable(FALSE);
1896   readtab_t *newt = newr->readtable, *oldt = src->readtable;
1897   int i;
1898   *newr = *src;
1899   for (i = 0; i < MAX_READTABLE_CHAR; i++) {
1900     if (oldt[i].disp) {
1901       newt[i].disp = alloc_disptab();
1902       memcpy(newt[i].disp, oldt[i].disp,
1903 	     sizeof(*(newt[i].disp)) * MAX_READTABLE_CHAR);
1904     }
1905   }
1906   return newr;
1907 }
Sg_PortCaseInsensitiveP(SgPort * port)1908 int Sg_PortCaseInsensitiveP(SgPort *port)
1909 {
1910   if (SG_PORT_READTABLE(port)) {
1911     return SG_PORT_READTABLE(port)->insensitiveP;
1912   }
1913   return FALSE;
1914 }
1915 
1916 static SgInternalMutex obtable_mutax;
1917 static SgHashTable *obtable = NULL;
1918 
Sg_ConstantLiteralP(SgObject o)1919 int Sg_ConstantLiteralP(SgObject o)
1920 {
1921   SgObject e;
1922   if (SG_PAIRP(o)) {
1923     /* simple check */
1924     return !SG_FALSEP(Sg_GetPairAnnotation(o, SYM_CONST));
1925   } else if (SG_VECTORP(o)) {
1926     /* again simple check */
1927     return SG_LITERAL_VECTORP(o);
1928   }
1929   e = Sg_HashTableRef(obtable, o, SG_UNBOUND);
1930   if (SG_UNBOUNDP(e)) return FALSE;
1931   /* constant literal must satisfy eq? */
1932   return e == o;
1933 }
1934 
Sg_AddConstantLiteral(SgObject o)1935 SgObject Sg_AddConstantLiteral(SgObject o)
1936 {
1937   SgObject e;
1938   Sg_LockMutex(&obtable_mutax);
1939   e = Sg_HashTableRef(obtable, o, SG_UNBOUND);
1940   if (SG_UNBOUNDP(e)) {
1941     Sg_HashTableSet(obtable, o, o, SG_HASH_NO_OVERWRITE);
1942     /* TODO after CLOS, we should not use header bits */
1943     if (SG_VECTORP(o)) {
1944       SG_VECTOR_SET_LITERAL(o);
1945     }
1946     if (SG_BVECTORP(o)) {
1947       SG_BVECTOR_SET_LITERAL(o);
1948     }
1949     if (SG_PAIRP(o)) {
1950       /* do the cdr parts. */
1951       Sg_SetPairAnnotation(o, SYM_CONST, SG_TRUE);
1952       if (SG_PAIRP(SG_CAR(o))) {
1953 	SG_SET_CAR(o, Sg_AddConstantLiteral(SG_CAR(o)));
1954       }
1955       if (SG_PAIRP(SG_CDR(o))) {
1956 	SG_SET_CDR(o, Sg_AddConstantLiteral(SG_CDR(o)));
1957       }
1958     }
1959   } else {
1960     o = e;
1961   }
1962   Sg_UnlockMutex(&obtable_mutax);
1963   return o;
1964 }
1965 
Sg_DelimitedCharP(SgChar c,SgPort * p)1966 int Sg_DelimitedCharP(SgChar c, SgPort *p)
1967 {
1968   return delimited(p, c);
1969 }
1970 
1971 #define SCHEME_OBJ(NAME) SG_CPP_CAT(NAME, _stub)
1972 #define STUB_NAME(NAME) SG_CPP_CAT(NAME, stub)
1973 
1974 /* initialize */
1975 #define DEFINE_MACRO_STUB(FN, NAME)					\
1976   static SgObject STUB_NAME(FN) (SgObject *args, int argc, void *data_)	\
1977   {									\
1978     SgReadContext ctx = SG_STATIC_READ_CONTEXT;				\
1979     SgPort *p;								\
1980     SgChar c;								\
1981     SgObject r;								\
1982     if (argc != 2) {							\
1983       Sg_WrongNumberOfArgumentsAtLeastViolation(SG_INTERN(NAME),	\
1984 						2, argc, SG_NIL);	\
1985     }									\
1986     if (!SG_PORTP(args[0])) {						\
1987       Sg_WrongTypeOfArgumentViolation(SG_INTERN(NAME),			\
1988 				      SG_MAKE_STRING("port"),		\
1989 				      args[0], SG_NIL);			\
1990     }									\
1991     if (!SG_CHARP(args[1])) {						\
1992       Sg_WrongTypeOfArgumentViolation(SG_INTERN(NAME),			\
1993 				      SG_MAKE_STRING("char"),		\
1994 				      args[1], SG_NIL);			\
1995     }									\
1996     p = SG_PORT(args[0]);						\
1997     c = SG_CHAR_VALUE(args[1]);						\
1998     r = (FN)(p, c, &ctx);						\
1999     if (r) return r;							\
2000     else return SG_UNDEF;						\
2001   }									\
2002   SG_DEFINE_SUBR(SCHEME_OBJ(FN), 2, 0, STUB_NAME(FN), SG_FALSE, NULL)
2003 
2004 #define DEFINE_DISPMACRO_STUB(FN, NAME)				\
2005   static SgObject STUB_NAME(FN)					\
2006   (SgObject *args, int argc, void *data_)			\
2007   {								\
2008     SgReadContext ctx = SG_STATIC_READ_CONTEXT;			\
2009     SgObject param_scm, r;					\
2010     SgPort *p;							\
2011     SgChar c;							\
2012     dispmacro_param param;					\
2013     if (argc != 3) {						\
2014       Sg_WrongNumberOfArgumentsAtLeastViolation(SG_INTERN(NAME),\
2015 						3, argc, SG_NIL);\
2016     }								\
2017     if (!SG_PORTP(args[0])) {					\
2018       Sg_WrongTypeOfArgumentViolation(SG_INTERN(NAME),		\
2019 				      SG_MAKE_STRING("port"),	\
2020 				      args[0], SG_NIL);		\
2021     }								\
2022     if (!SG_CHARP(args[1])) {					\
2023       Sg_WrongTypeOfArgumentViolation(SG_INTERN(NAME),		\
2024 				      SG_MAKE_STRING("char"),	\
2025 				      args[1], SG_NIL);		\
2026     }								\
2027     p = SG_PORT(args[0]);					\
2028     c = SG_CHAR_VALUE(args[1]);					\
2029     param_scm = args[2];					\
2030     if (SG_FALSEP(param_scm)) {					\
2031       param.present = FALSE;					\
2032       param.value = 0;						\
2033     } else if (SG_INTP(param_scm)) {				\
2034       param.present = TRUE;					\
2035       param.value = SG_INT_VALUE(param_scm);			\
2036     } else {							\
2037       Sg_WrongTypeOfArgumentViolation(SG_INTERN(NAME),		\
2038 				      SG_MAKE_STRING("fixnum"),	\
2039 				      param_scm, SG_NIL);	\
2040       return SG_UNDEF;						\
2041     }								\
2042     r = (FN)(p, c, &param, &ctx);				\
2043     if (r) return r;						\
2044     else return SG_UNDEF;					\
2045   }								\
2046   SG_DEFINE_SUBR(SCHEME_OBJ(FN), 3, 0, STUB_NAME(FN), SG_FALSE, NULL)
2047 
2048 DEFINE_MACRO_STUB(read_vertical_bar,   "|-reader");
2049 DEFINE_MACRO_STUB(read_double_quote,   "\"-reader");
2050 DEFINE_MACRO_STUB(read_quote,          "'-reader");
2051 DEFINE_MACRO_STUB(read_open_paren,     "(-reader");
2052 DEFINE_MACRO_STUB(read_close_paren,    ")-reader");
2053 DEFINE_MACRO_STUB(read_open_bracket,   "[-reader");
2054 DEFINE_MACRO_STUB(read_close_bracket,  "]-reader");
2055 DEFINE_MACRO_STUB(read_semicolon,      ";-reader");
2056 DEFINE_MACRO_STUB(read_quasiquote,     "`-reader");
2057 DEFINE_MACRO_STUB(read_unquote,        ",-reader");
2058 DEFINE_MACRO_STUB(read_colon,          ":-reader");
2059 DEFINE_MACRO_STUB(dispmacro_reader,    "dispatch-macro-reader");
2060 
2061 DEFINE_DISPMACRO_STUB(read_hash_quote,      "#'-reader");
2062 DEFINE_DISPMACRO_STUB(read_hash_quasiquote, "#`-reader");
2063 DEFINE_DISPMACRO_STUB(read_hash_unquote,    "#,-reader");
2064 DEFINE_DISPMACRO_STUB(read_hash_bang,       "#!-reader");
2065 DEFINE_DISPMACRO_STUB(read_hash_v, 	    "#v-reader");
2066 DEFINE_DISPMACRO_STUB(read_hash_u, 	    "#u-reader");
2067 DEFINE_DISPMACRO_STUB(read_hash_t, 	    "#t-reader");
2068 DEFINE_DISPMACRO_STUB(read_hash_f, 	    "#f-reader");
2069 DEFINE_DISPMACRO_STUB(read_hash_b, 	    "#b-reader");
2070 DEFINE_DISPMACRO_STUB(read_hash_o, 	    "#o-reader");
2071 DEFINE_DISPMACRO_STUB(read_hash_d, 	    "#d-reader");
2072 DEFINE_DISPMACRO_STUB(read_hash_x, 	    "#x-reader");
2073 DEFINE_DISPMACRO_STUB(read_hash_i, 	    "#i-reader");
2074 DEFINE_DISPMACRO_STUB(read_hash_e, 	    "#e-reader");
2075 DEFINE_DISPMACRO_STUB(read_hash_open_paren, "#(-reader");
2076 DEFINE_DISPMACRO_STUB(read_hash_semicolon,  "#;-reader");
2077 DEFINE_DISPMACRO_STUB(read_hash_bar,        "#|-reader");
2078 DEFINE_DISPMACRO_STUB(read_hash_escape,     "#\\-reader");
2079 DEFINE_DISPMACRO_STUB(read_hash_equal,      "#=-reader");
2080 DEFINE_DISPMACRO_STUB(read_hash_hash,       "##-reader");
2081 DEFINE_DISPMACRO_STUB(read_hash_less,       "#<-reader");
2082 DEFINE_DISPMACRO_STUB(read_hash_colon,      "#:-reader");
2083 
2084 
Sg_GetMacroCharacter(SgChar c,readtable_t * table)2085 SgObject Sg_GetMacroCharacter(SgChar c, readtable_t *table)
2086 {
2087   ASSERT(table);
2088   if (c < MAX_READTABLE_CHAR) {
2089     readtab_t *r = &table->readtable[c];
2090     SgObject term;
2091     if (r->type == CT_NON_TERM_MACRO) term = SG_TRUE;
2092     else if (r->type == CT_TERM_MACRO) term = SG_FALSE;
2093     else return Sg_Values2(SG_FALSE, SG_FALSE);
2094     return Sg_Values2(SG_UNBOUNDP(r->sfunc) ? SG_UNBOUND : r->sfunc, term);
2095   }
2096   return Sg_Values2(SG_FALSE, SG_FALSE);
2097 }
2098 
2099 #define macro_function_item(name) { (name), SG_OBJ(&(SCHEME_OBJ(name))) }
get_macro_function(SgObject fn)2100 static macro_function get_macro_function(SgObject fn)
2101 {
2102   static const struct {
2103     macro_function f;
2104     SgObject s;
2105   } x[] = {
2106     macro_function_item(read_vertical_bar),
2107     macro_function_item(read_double_quote),
2108     macro_function_item(read_quote),
2109     macro_function_item(read_open_paren),
2110     macro_function_item(read_close_paren),
2111     macro_function_item(read_open_bracket),
2112     macro_function_item(read_close_bracket),
2113     macro_function_item(read_semicolon),
2114     macro_function_item(read_quasiquote),
2115     macro_function_item(read_unquote),
2116     macro_function_item(read_colon),
2117     macro_function_item(dispmacro_reader)
2118   };
2119   int i;
2120   for (i = 0; i < array_sizeof(x); i++) {
2121     if (SG_EQ(fn, x[i].s)) return x[i].f;
2122   }
2123   return NULL;
2124 }
2125 
Sg_SetMacroCharacter(SgChar c,SgObject proc,int nontermP,readtable_t * table)2126 void Sg_SetMacroCharacter(SgChar c, SgObject proc, int nontermP,
2127 			  readtable_t *table)
2128 {
2129   ASSERT(table);
2130   if (!isdigit(c) && c < MAX_READTABLE_CHAR) {
2131     readtab_t *r = &table->readtable[c];
2132     r->type = nontermP ? CT_NON_TERM_MACRO : CT_TERM_MACRO;
2133     r->sfunc = proc;
2134     r->cfunc = get_macro_function(proc);
2135     if (r->disp) r->disp = 0;
2136   } else {
2137     Sg_ImplementationRestrictionViolation
2138       (SG_INTERN("set-macro-character"),
2139        SG_MAKE_STRING("non ascii char is not supported"),
2140        SG_MAKE_CHAR(c));
2141   }
2142 }
2143 
Sg_GetDispatchMacroCharacter(SgChar c,SgChar subc,readtable_t * table)2144 SgObject Sg_GetDispatchMacroCharacter(SgChar c, SgChar subc, readtable_t *table)
2145 {
2146   ASSERT(table);
2147   if (c < MAX_READTABLE_CHAR) {
2148     readtab_t *r = &table->readtable[c];
2149     if (!r->disp) {
2150       Sg_AssertionViolation
2151 	(SG_INTERN("get-dispatch-macro-character"),
2152 	 SG_MAKE_STRING("given character is not dispatch macro character"),
2153 	 SG_MAKE_CHAR(c));
2154     }
2155     return SG_UNBOUNDP(r->disp[subc].sfunc) ? SG_UNBOUND : r->disp[subc].sfunc;
2156   }
2157   return SG_FALSE;
2158 }
2159 
get_dispatch_macro_function(SgObject fn)2160 static dispmacro_function get_dispatch_macro_function(SgObject fn)
2161 {
2162   static const struct {
2163     dispmacro_function f;
2164     SgObject s;
2165   } x[] = {
2166     macro_function_item(read_hash_quote),
2167     macro_function_item(read_hash_quasiquote),
2168     macro_function_item(read_hash_unquote),
2169     macro_function_item(read_hash_bang),
2170     macro_function_item(read_hash_v),
2171     macro_function_item(read_hash_u),
2172     macro_function_item(read_hash_t),
2173     macro_function_item(read_hash_f),
2174     macro_function_item(read_hash_b),
2175     macro_function_item(read_hash_o),
2176     macro_function_item(read_hash_d),
2177     macro_function_item(read_hash_x),
2178     macro_function_item(read_hash_i),
2179     macro_function_item(read_hash_e),
2180     macro_function_item(read_hash_open_paren),
2181     macro_function_item(read_hash_semicolon),
2182     macro_function_item(read_hash_bar),
2183     macro_function_item(read_hash_escape),
2184     macro_function_item(read_hash_equal),
2185     macro_function_item(read_hash_hash),
2186     macro_function_item(read_hash_less),
2187     macro_function_item(read_hash_colon)
2188   };
2189   int i;
2190   for (i = 0; i < array_sizeof(x); i++) {
2191     if (SG_EQ(fn, x[i].s)) return x[i].f;
2192   }
2193   return NULL;
2194 }
2195 
Sg_MakeDispatchMacroCharacter(SgChar c,int nontermP,readtable_t * table)2196 int Sg_MakeDispatchMacroCharacter(SgChar c, int nontermP, readtable_t *table)
2197 {
2198   ASSERT(table);
2199   if (!isdigit(c) && c < MAX_READTABLE_CHAR) {
2200     readtab_t *r = &table->readtable[c];
2201     if (!r->disp) r->disp = alloc_disptab();
2202     r->type = nontermP ? CT_NON_TERM_MACRO : CT_TERM_MACRO;
2203     r->sfunc = SG_OBJ(&SCHEME_OBJ(dispmacro_reader));
2204     r->cfunc = dispmacro_reader;
2205   } else {
2206     Sg_ImplementationRestrictionViolation
2207       (SG_INTERN("make-dispatch-macro-character"),
2208        SG_MAKE_STRING("non ascii char is not supported"),
2209        SG_MAKE_CHAR(c));
2210   }
2211   return TRUE;
2212 }
2213 
Sg_SetDispatchMacroCharacter(SgChar c,SgChar subc,SgObject proc,readtable_t * table)2214 void Sg_SetDispatchMacroCharacter(SgChar c, SgChar subc, SgObject proc,
2215 				  readtable_t *table)
2216 {
2217   ASSERT(table);
2218   if (!isdigit(c) && !isdigit(subc) &&
2219       c < MAX_READTABLE_CHAR && subc < MAX_READTABLE_CHAR) {
2220     readtab_t *r = &table->readtable[c];
2221     if (!r->disp) {
2222       Sg_AssertionViolation
2223 	(SG_INTERN("set-dispatch-macro-character"),
2224 	 SG_MAKE_STRING("given character is not dispatch macro character"),
2225 	 SG_MAKE_CHAR(c));
2226     }
2227     r->disp[subc].sfunc = proc;
2228     r->disp[subc].cfunc = get_dispatch_macro_function(proc);
2229   } else {
2230     Sg_ImplementationRestrictionViolation
2231       (SG_INTERN("set-dispatch-macro-character"),
2232        SG_MAKE_STRING("non ascii char is not supported"),
2233        SG_LIST2(SG_MAKE_CHAR(c), SG_MAKE_CHAR(subc)));
2234   }
2235 }
2236 
Sg_EnsureLibraryReadTable(SgLibrary * library)2237 void Sg_EnsureLibraryReadTable(SgLibrary *library)
2238 {
2239   if (!SG_LIBRARY_READTABLE(library)) {
2240     SG_LIBRARY_READTABLE(library) = make_readtable(TRUE);
2241   }
2242 }
2243 
2244 #define SET_READ_MACRO(R, CH, FN, TYPE)			\
2245   ((R)[(CH)].type = (TYPE),				\
2246    (R)[(CH)].cfunc = (FN),				\
2247    (R)[(CH)].sfunc = SG_OBJ(&SG_CPP_CAT(FN, _stub)))
2248 #define SET_TERM_MACRO(R, CH, FN) SET_READ_MACRO(R, CH, FN, CT_TERM_MACRO)
2249 #define SET_NONTERM_MACRO(R, CH, FN)		\
2250   SET_READ_MACRO(R, CH, FN, CT_NON_TERM_MACRO)
2251 #define SET_DISP_MACRO(R, CH, FN)			\
2252   ((R)[(CH)].cfunc = (FN),				\
2253    (R)[(CH)].sfunc = SG_OBJ(&SG_CPP_CAT(FN, _stub)))
2254 
alloc_disptab()2255 disptab_t* alloc_disptab()
2256 {
2257   disptab_t * d = SG_NEW_ARRAY(disptab_t, MAX_READTABLE_CHAR);
2258   int i;
2259   for (i = 0; i < MAX_READTABLE_CHAR; i++) {
2260     d[i].cfunc = 0;
2261     d[i].sfunc = SG_UNBOUND;
2262   }
2263   return d;
2264 }
2265 
2266 #define INIT_COMPAT 0
2267 #define INIT_R6RS   1
2268 #define INIT_R7RS   2
2269 
init_readtable(readtable_t * table,int type)2270 static void init_readtable(readtable_t *table, int type)
2271 {
2272   int i;
2273   readtab_t *r = table->readtable;
2274   disptab_t *d = alloc_disptab();
2275   for (i = 0; i <= ' '; i++) {
2276     r[i].type = CT_ILLEGAL;
2277     r[i].cfunc = NULL;
2278     r[i].sfunc = SG_UNBOUND;
2279     r[i].disp = NULL;
2280   }
2281   for (; i < MAX_READTABLE_CHAR; i++) {
2282     r[i].type = CT_CONSTITUENT;
2283     r[i].cfunc = NULL;
2284     r[i].sfunc = SG_UNBOUND;
2285     r[i].disp = NULL;
2286   }
2287   r['\t'].type = CT_WHITE_SPACE;
2288   r['\n'].type = CT_WHITE_SPACE;
2289   r['\v'].type = CT_WHITE_SPACE;
2290   r['\f'].type = CT_WHITE_SPACE;
2291   r['\r'].type = CT_WHITE_SPACE;
2292   r[' '].type = CT_WHITE_SPACE;
2293   r['\\'].type = CT_SINGLE_ESCAPE;
2294 
2295   SET_TERM_MACRO(r, '"', read_double_quote);
2296   SET_TERM_MACRO(r, '(', read_open_paren);
2297   SET_TERM_MACRO(r, ')', read_close_paren);
2298   SET_TERM_MACRO(r, '[', read_open_bracket);
2299   SET_TERM_MACRO(r, ']', read_close_bracket);
2300   SET_TERM_MACRO(r, ';', read_semicolon);
2301 
2302 
2303   r['#'].disp = d;
2304 
2305   SET_DISP_MACRO(d, '\'', read_hash_quote);
2306   SET_DISP_MACRO(d, '`', read_hash_quasiquote);
2307   SET_DISP_MACRO(d, ',', read_hash_unquote);
2308   SET_DISP_MACRO(d, '!', read_hash_bang);
2309   /* strict mode things */
2310   if (type == INIT_R6RS) {
2311     SET_DISP_MACRO(d, 'v', read_hash_v);
2312     SET_TERM_MACRO(r, '#', dispmacro_reader);
2313     /* tread these as nonterm and let validator raise an error
2314        for "foo'bar" case.
2315      */
2316     SET_NONTERM_MACRO(r, '\'', read_quote);
2317     SET_NONTERM_MACRO(r, '`', read_quasiquote);
2318     SET_NONTERM_MACRO(r, ',', read_unquote);
2319 
2320     table->symbol_reader = read_r6rs_symbol;
2321   } else {
2322     SET_NONTERM_MACRO(r, '#', dispmacro_reader);
2323     SET_DISP_MACRO(d, 'u', read_hash_u);
2324     SET_NONTERM_MACRO(r, '|', read_vertical_bar);
2325     SET_TERM_MACRO(r, '\'', read_quote);
2326     SET_TERM_MACRO(r, '`', read_quasiquote);
2327     SET_TERM_MACRO(r, ',', read_unquote);
2328 
2329     table->symbol_reader = read_compatible_symbol;
2330   }
2331 
2332   /* only compat mode */
2333   if (type == INIT_COMPAT) {
2334     /* #:a is only for compat mode */
2335     SET_DISP_MACRO(d, ':', read_hash_colon);
2336     SET_NONTERM_MACRO(r, ':', read_colon);
2337     /* well it's set only R6RS above but compat mode should read this as well */
2338     SET_DISP_MACRO(d, 'v', read_hash_v);
2339   }
2340   SET_DISP_MACRO(d, 't', read_hash_t);
2341   SET_DISP_MACRO(d, 'T', read_hash_t);
2342   SET_DISP_MACRO(d, 'f', read_hash_f);
2343   SET_DISP_MACRO(d, 'F', read_hash_f);
2344   SET_DISP_MACRO(d, 'b', read_hash_b);
2345   SET_DISP_MACRO(d, 'B', read_hash_b);
2346   SET_DISP_MACRO(d, 'o', read_hash_o);
2347   SET_DISP_MACRO(d, 'O', read_hash_o);
2348   SET_DISP_MACRO(d, 'd', read_hash_d);
2349   SET_DISP_MACRO(d, 'D', read_hash_d);
2350   SET_DISP_MACRO(d, 'x', read_hash_x);
2351   SET_DISP_MACRO(d, 'X', read_hash_x);
2352   SET_DISP_MACRO(d, 'i', read_hash_i);
2353   SET_DISP_MACRO(d, 'I', read_hash_i);
2354   SET_DISP_MACRO(d, 'e', read_hash_e);
2355   SET_DISP_MACRO(d, 'E', read_hash_e);
2356   SET_DISP_MACRO(d, '(', read_hash_open_paren);
2357   SET_DISP_MACRO(d, ';', read_hash_semicolon);
2358   SET_DISP_MACRO(d, '|', read_hash_bar);
2359   SET_DISP_MACRO(d, '\\', read_hash_escape);
2360   SET_DISP_MACRO(d, '=', read_hash_equal);
2361   SET_DISP_MACRO(d, '#', read_hash_hash);
2362   SET_DISP_MACRO(d, '<', read_hash_less);
2363 }
2364 
Sg__InitReader()2365 void Sg__InitReader()
2366 {
2367   init_readtable(&r6rs_read_table, INIT_R6RS);
2368   init_readtable(&r7rs_read_table, INIT_R7RS);
2369   init_readtable(&compat_read_table, INIT_COMPAT);
2370 
2371   Sg_InitMutex(&obtable_mutax, TRUE);
2372   obtable = Sg_MakeHashTableSimple(SG_HASH_EQUAL, 4096);
2373 
2374 #define SET_READER_NAME(fn, name)			\
2375   (SG_PROCEDURE_NAME(&(SCHEME_OBJ(fn))) = SG_MAKE_STRING(name))
2376   SET_READER_NAME(read_vertical_bar,   "|-reader");
2377   SET_READER_NAME(read_double_quote,   "\"-reader");
2378   SET_READER_NAME(read_quote,          "'-reader");
2379   SET_READER_NAME(read_open_paren,     "(-reader");
2380   SET_READER_NAME(read_close_paren,    ")-reader");
2381   SET_READER_NAME(read_open_bracket,   "[-reader");
2382   SET_READER_NAME(read_close_bracket,  "]-reader");
2383   SET_READER_NAME(read_semicolon,      ";-reader");
2384   SET_READER_NAME(read_quasiquote,     "`-reader");
2385   SET_READER_NAME(read_unquote,        ",-reader");
2386   SET_READER_NAME(read_colon,          ":-reader");
2387   SET_READER_NAME(dispmacro_reader,    "dispatch-macro-reader");
2388 
2389   SET_READER_NAME(read_hash_quote,      "#'-reader");
2390   SET_READER_NAME(read_hash_quasiquote, "#`-reader");
2391   SET_READER_NAME(read_hash_unquote,    "#,-reader");
2392   SET_READER_NAME(read_hash_bang,       "#!-reader");
2393   SET_READER_NAME(read_hash_v, 	    "#v-reader");
2394   SET_READER_NAME(read_hash_u, 	    "#u-reader");
2395   SET_READER_NAME(read_hash_t, 	    "#t-reader");
2396   SET_READER_NAME(read_hash_f, 	    "#f-reader");
2397   SET_READER_NAME(read_hash_b, 	    "#b-reader");
2398   SET_READER_NAME(read_hash_o, 	    "#o-reader");
2399   SET_READER_NAME(read_hash_d, 	    "#d-reader");
2400   SET_READER_NAME(read_hash_x, 	    "#x-reader");
2401   SET_READER_NAME(read_hash_i, 	    "#i-reader");
2402   SET_READER_NAME(read_hash_e, 	    "#e-reader");
2403   SET_READER_NAME(read_hash_open_paren, "#(-reader");
2404   SET_READER_NAME(read_hash_semicolon,  "#;-reader");
2405   SET_READER_NAME(read_hash_bar,        "#|-reader");
2406   SET_READER_NAME(read_hash_escape,     "#\\-reader");
2407   SET_READER_NAME(read_hash_equal,      "#=-reader");
2408   SET_READER_NAME(read_hash_hash,       "##-reader");
2409   SET_READER_NAME(read_hash_less,       "#<-reader");
2410   SET_READER_NAME(read_hash_colon,      "#:-reader");
2411 
2412 }
2413 
Sg__InitReaderClass()2414 void Sg__InitReaderClass()
2415 {
2416   SgLibrary *lib = Sg_FindLibrary(SG_INTERN("(sagittarius clos)"), TRUE);
2417 #define CINIT(cl, nam)					\
2418   Sg_InitStaticClassWithMeta(cl, UC(nam), lib, NULL, SG_FALSE, NULL, 0)
2419 
2420   /* shared-ref */
2421   CINIT(SG_CLASS_SHARED_REF,   "<shared-ref>");
2422   /* for now no slot def */
2423   CINIT(SG_CLASS_READ_CONTEXT, "<read-context>");
2424 
2425   DEFAULT_CONTEXT = Sg_MakeDefaultReadContext();
2426   SYM_CONST = SG_INTERN("const");
2427   SYM_SOURCE_INFO = SG_INTERN("source-info");
2428 }
2429 
2430 /*
2431   end of file
2432   Local Variables:
2433   coding: utf-8-unix
2434   End:
2435 */
2436