1 /* Some copilers don't like re-def of GC_malloc in schemef.h: */
2 #ifndef MZ_PRECISE_GC
3 # define SCHEME_NO_GC_PROTO
4 #endif
5 
6 #include "schpriv.h"
7 #include <string.h>
8 #include <ctype.h>
9 #include "schgc.h"
10 
11 # define HASH_TABLE_INIT_SIZE 256
12 #ifdef SMALL_HASH_TABLES
13 # define FILL_FACTOR 1.30
14 #else
15 # define FILL_FACTOR 2
16 #endif
17 
18 #ifndef MZ_PRECISE_GC
19 extern MZGC_DLLIMPORT void (*GC_custom_finalize)(void);
20 #endif
21 #ifndef USE_SENORA_GC
22 extern int GC_is_marked(void *);
23 #endif
24 
25 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
26 THREAD_LOCAL_DECL(static Scheme_Hash_Table *place_local_symbol_table = NULL;)
27 THREAD_LOCAL_DECL(static Scheme_Hash_Table *place_local_keyword_table = NULL;)
28 THREAD_LOCAL_DECL(static Scheme_Hash_Table *place_local_parallel_symbol_table = NULL;)
29 #endif
30 
31 SHARED_OK static Scheme_Hash_Table *symbol_table = NULL;
32 SHARED_OK static Scheme_Hash_Table *keyword_table = NULL;
33 SHARED_OK static Scheme_Hash_Table *parallel_symbol_table = NULL;
34 
35 SHARED_OK static uintptr_t scheme_max_symbol_length;
36 
37 /* globals */
38 SHARED_OK int scheme_case_sensitive = 1;
39 THREAD_LOCAL_DECL(static int gensym_counter);
40 
scheme_set_case_sensitive(int v)41 void scheme_set_case_sensitive(int v) { scheme_case_sensitive =  v; }
42 
43 READ_ONLY Scheme_Object *scheme_symbol_p_proc;
44 READ_ONLY Scheme_Object *scheme_keyword_p_proc;
45 
46 /* locals */
47 static Scheme_Object *symbol_lt (int argc, Scheme_Object *argv[]);
48 static Scheme_Object *symbol_p_prim (int argc, Scheme_Object *argv[]);
49 static Scheme_Object *symbol_unreadable_p_prim (int argc, Scheme_Object *argv[]);
50 static Scheme_Object *string_to_symbol_prim (int argc, Scheme_Object *argv[]);
51 static Scheme_Object *string_to_uninterned_symbol_prim (int argc, Scheme_Object *argv[]);
52 static Scheme_Object *string_to_unreadable_symbol_prim (int argc, Scheme_Object *argv[]);
53 static Scheme_Object *symbol_to_string_prim (int argc, Scheme_Object *argv[]);
54 static Scheme_Object *symbol_to_immutable_string_prim (int argc, Scheme_Object *argv[]);
55 static Scheme_Object *keyword_p_prim (int argc, Scheme_Object *argv[]);
56 static Scheme_Object *keyword_lt (int argc, Scheme_Object *argv[]);
57 static Scheme_Object *string_to_keyword_prim (int argc, Scheme_Object *argv[]);
58 static Scheme_Object *keyword_to_string_prim (int argc, Scheme_Object *argv[]);
59 static Scheme_Object *keyword_to_immutable_string_prim (int argc, Scheme_Object *argv[]);
60 static Scheme_Object *gensym(int argc, Scheme_Object *argv[]);
61 
62 
63 /**************************************************************************/
64 
65 typedef uintptr_t hash_v_t;
66 #define HASH_SEED  0xF0E1D2C3
67 
68 #define SYMTAB_LOST_CELL scheme_false
69 
70 #ifdef MZ_PRECISE_GC
71 # define WEAK_ARRAY_HEADSIZE 4
72 #else
73 # define WEAK_ARRAY_HEADSIZE 0
74 #endif
75 
76 static Scheme_Object *rehash_symbol_bucket(Scheme_Hash_Table *table,
77                                            GC_CAN_IGNORE const char *key, uintptr_t length,
78                                            Scheme_Object *naya, int sym_type);
79 
80 /* Special hashing for symbols: */
symbol_bucket(Scheme_Hash_Table * table,GC_CAN_IGNORE const char * key,uintptr_t length,Scheme_Object * naya,int sym_type)81 static Scheme_Object *symbol_bucket(Scheme_Hash_Table *table,
82 				    GC_CAN_IGNORE const char *key, uintptr_t length,
83 				    Scheme_Object *naya, int sym_type)
84   XFORM_ASSERT_NO_CONVERSION
85 {
86   hash_v_t h, h2;
87   uintptr_t mask;
88   Scheme_Object *bucket;
89 
90   /* WARNING: key may be GC-misaligned... */
91   /* This function is designed to need no MZ_PRECISE_GC instrumentation.
92      To handle re-hashing, it tail-calls rehash_symbol_bucket. */
93 
94   mask = table->size - 1;
95 
96   {
97     uintptr_t i;
98     i = 0;
99     h = HASH_SEED + sym_type;
100     h2 = 0;
101 
102     while (i < length) {
103       int c = key[i++];
104        h ^= (h << 5) + (h >> 2) + c;
105        h2 += c;
106     }
107     /* post hash mixing helps for short symbols */
108     h ^= (h << 5) + (h >> 2) + 0xA0A0;
109     h ^= (h << 5) + (h >> 2) + 0x0505;
110 
111     if (naya) {
112       /* record hash code (or some fragment of it) for `equal?` hashing: */
113       scheme_install_symbol_hash_code(naya, h);
114     }
115 
116     h = h & mask;
117     h2 = h2 & mask;
118   }
119 
120   h2 |= 0x1;
121 
122   while ((bucket = table->keys[WEAK_ARRAY_HEADSIZE + h])) {
123     if (SAME_OBJ(bucket, SYMTAB_LOST_CELL)) {
124       if (naya) {
125 	/* We're re-using, so decrement count and it will be
126 	   re-incremented. */
127 	--table->count;
128 	break;
129       }
130     } else if (((int)length == SCHEME_SYM_LEN(bucket))
131 	       && !memcmp(key, SCHEME_SYM_VAL(bucket), length))
132       return bucket;
133     h = (h + h2) & mask;
134   }
135 
136   /* In case it's GC-misaligned: */
137   key = NULL;
138 
139   if (!naya)
140     return NULL;
141 
142   if (table->count * FILL_FACTOR >= table->size) {
143     return rehash_symbol_bucket(table, key, length, naya, sym_type);
144   }
145 
146   table->keys[WEAK_ARRAY_HEADSIZE + h] = naya;
147 
148   table->count++;
149 
150   return naya;
151 }
152 
rehash_symbol_bucket(Scheme_Hash_Table * table,GC_CAN_IGNORE const char * key,uintptr_t length,Scheme_Object * naya,int sym_type)153 static Scheme_Object *rehash_symbol_bucket(Scheme_Hash_Table *table,
154                                            GC_CAN_IGNORE const char *key, uintptr_t length,
155                                            Scheme_Object *naya, int sym_type)
156 {
157   int i, oldsize = table->size, newsize, lostc;
158   size_t asize;
159   Scheme_Object *cb;
160   Scheme_Object **old = table->keys;
161 
162   /* WARNING: key may be GC-misaligned... */
163 
164   /* Don't grow table if it's mostly lost cells (due to lots of
165      temporary symbols). */
166   lostc = 0;
167   for (i = 0; i < oldsize; i++) {
168     cb = old[WEAK_ARRAY_HEADSIZE + i];
169     if (cb == SYMTAB_LOST_CELL)
170       lostc++;
171   }
172   if ((lostc * 2) < table->count)
173     newsize = oldsize << 1;
174   else
175     newsize = oldsize;
176 
177   asize = (size_t)newsize * sizeof(Scheme_Object *);
178   {
179     Scheme_Object **ba;
180 #ifdef MZ_PRECISE_GC
181     ba = (Scheme_Object **)GC_malloc_weak_array(asize, SYMTAB_LOST_CELL);
182 #else
183     ba = MALLOC_N_ATOMIC(Scheme_Object *, newsize);
184     memset((char *)ba, 0, asize);
185 #endif
186     table->keys = ba;
187   }
188   table->size = newsize;
189 
190   table->count = 0;
191 
192   for (i = 0; i < oldsize; i++) {
193     cb = old[WEAK_ARRAY_HEADSIZE + i] ;
194     if (cb && (cb != SYMTAB_LOST_CELL))
195       symbol_bucket(table, SCHEME_SYM_VAL(cb), SCHEME_SYM_LEN(cb), cb, sym_type);
196   }
197 
198   /* Restore GC-misaligned key: */
199   key = SCHEME_SYM_VAL(naya);
200 
201   return symbol_bucket(table, key, length, naya, sym_type);
202 }
203 
204 #ifndef MZ_PRECISE_GC
clean_one_symbol_table(Scheme_Hash_Table * table)205 static void clean_one_symbol_table(Scheme_Hash_Table *table)
206 {
207   /* Clean the symbol table by removing pointers to collected
208      symbols. The correct way to do this is to install a GC
209      finalizer on symbol pointers, but that would be expensive. */
210 
211   if (table) {
212     Scheme_Object **buckets = (Scheme_Object **)table->keys;
213     int i = table->size;
214     void *b;
215 
216     while (i--) {
217       if (buckets[WEAK_ARRAY_HEADSIZE + i] && !SAME_OBJ(buckets[WEAK_ARRAY_HEADSIZE + i], SYMTAB_LOST_CELL)
218           && (!(b = GC_base(buckets[WEAK_ARRAY_HEADSIZE + i]))
219 #ifndef USE_SENORA_GC
220 	      || !GC_is_marked(b)
221 #endif
222 	      )) {
223 	buckets[WEAK_ARRAY_HEADSIZE + i] = SYMTAB_LOST_CELL;
224       }
225     }
226   }
227 }
228 
clean_symbol_table(void)229 static void clean_symbol_table(void)
230 {
231   clean_one_symbol_table(symbol_table);
232   clean_one_symbol_table(keyword_table);
233   clean_one_symbol_table(parallel_symbol_table);
234 
235   scheme_clear_ephemerons();
236 # ifdef MZ_USE_JIT
237   scheme_clean_native_symtab();
238 # endif
239 # ifndef MZ_PRECISE_GC
240   scheme_clean_cust_box_list();
241 # endif
242 # ifndef MZ_PRECISE_GC
243   scheme_notify_code_gc();
244 # endif
245 }
246 #endif
247 
248 /**************************************************************************/
249 
init_one_symbol_table()250 static Scheme_Hash_Table *init_one_symbol_table()
251 {
252   Scheme_Hash_Table *table;
253   int size;
254   Scheme_Object **ba;
255 
256   table = scheme_make_hash_table(SCHEME_hash_ptr);
257 
258   table->size = HASH_TABLE_INIT_SIZE;
259 
260   size = table->size * sizeof(Scheme_Object *);
261 #ifdef MZ_PRECISE_GC
262   ba = (Scheme_Object **)GC_malloc_weak_array(size, SYMTAB_LOST_CELL);
263 #else
264   ba = MALLOC_N_ATOMIC(Scheme_Object *, size);
265   memset((char *)ba, 0, size);
266 #endif
267   table->keys = ba;
268 
269   return table;
270 }
271 
272 void
scheme_init_symbol_table()273 scheme_init_symbol_table ()
274 {
275   REGISTER_SO(symbol_table);
276   REGISTER_SO(keyword_table);
277   REGISTER_SO(parallel_symbol_table);
278 
279   symbol_table = init_one_symbol_table();
280   keyword_table = init_one_symbol_table();
281   parallel_symbol_table = init_one_symbol_table();
282 
283 #ifndef MZ_PRECISE_GC
284   GC_custom_finalize = clean_symbol_table;
285 #endif
286 }
287 
288 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
289 void
scheme_init_place_local_symbol_table()290 scheme_init_place_local_symbol_table ()
291 {
292   REGISTER_SO(place_local_symbol_table);
293   REGISTER_SO(place_local_keyword_table);
294   REGISTER_SO(place_local_parallel_symbol_table);
295 
296   place_local_symbol_table = init_one_symbol_table();
297   place_local_keyword_table = init_one_symbol_table();
298   place_local_parallel_symbol_table = init_one_symbol_table();
299 }
300 #endif
301 
302 void
scheme_init_symbol_type(Scheme_Startup_Env * env)303 scheme_init_symbol_type (Scheme_Startup_Env *env)
304 {
305 }
306 
307 void
scheme_init_symbol(Scheme_Startup_Env * env)308 scheme_init_symbol (Scheme_Startup_Env *env)
309 {
310   Scheme_Object *p;
311 
312   REGISTER_SO(scheme_symbol_p_proc);
313   p = scheme_make_folding_prim(symbol_p_prim, "symbol?", 1, 1, 1);
314   SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
315                                                             | SCHEME_PRIM_IS_OMITABLE
316                                                             | SCHEME_PRIM_PRODUCES_BOOL);
317   scheme_symbol_p_proc = p;
318   scheme_addto_prim_instance("symbol?", p, env);
319 
320   p = scheme_make_folding_prim(symbol_unreadable_p_prim, "symbol-unreadable?", 1, 1, 1);
321   scheme_addto_prim_instance("symbol-unreadable?", p, env);
322 
323   p = scheme_make_folding_prim(scheme_checked_symbol_interned_p, "symbol-interned?", 1, 1, 1);
324   SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
325                                                             | SCHEME_PRIM_PRODUCES_BOOL);
326   scheme_addto_prim_instance("symbol-interned?", p, env);
327 
328   ADD_FOLDING_PRIM("symbol<?",                 symbol_lt,                       1, -1, 1, env);
329 
330   p = scheme_make_folding_prim(string_to_symbol_prim, "string->symbol", 1, 1, 1);
331   SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
332   scheme_addto_prim_instance("string->symbol", p, env);
333 
334   ADD_IMMED_PRIM("string->uninterned-symbol",  string_to_uninterned_symbol_prim, 1, 1, env);
335   ADD_IMMED_PRIM("string->unreadable-symbol",  string_to_unreadable_symbol_prim, 1, 1, env);
336 
337   p = scheme_make_folding_prim(symbol_to_string_prim, "symbol->string", 1, 1, 1);
338   SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
339   scheme_addto_prim_instance("symbol->string", p, env);
340 
341   p = scheme_make_folding_prim(symbol_to_immutable_string_prim, "symbol->immutable-string", 1, 1, 1);
342   SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
343   scheme_addto_prim_instance("symbol->immutable-string", p, env);
344 
345   REGISTER_SO(scheme_keyword_p_proc);
346   p = scheme_make_folding_prim(keyword_p_prim, "keyword?", 1, 1, 1);
347   SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
348                                                             | SCHEME_PRIM_IS_OMITABLE
349                                                             | SCHEME_PRIM_PRODUCES_BOOL);
350   scheme_keyword_p_proc = p;
351   scheme_addto_prim_instance("keyword?", p, env);
352 
353   ADD_FOLDING_PRIM("keyword<?",                keyword_lt,                       1, -1, 1, env);
354 
355   p = scheme_make_folding_prim(string_to_keyword_prim, "string->keyword", 1, 1, 1);
356   SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
357   scheme_addto_prim_instance("string->keyword", p, env);
358 
359   p = scheme_make_folding_prim(keyword_to_string_prim, "keyword->string", 1, 1, 1);
360   SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
361   scheme_addto_prim_instance("keyword->string", p, env);
362 
363   p = scheme_make_folding_prim(keyword_to_immutable_string_prim, "keyword->immutable-string", 1, 1, 1);
364   SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
365   scheme_addto_prim_instance("keyword->immutable-string", p, env);
366 
367   ADD_IMMED_PRIM("gensym",                     gensym,                           0, 1, env);
368 }
369 
scheme_get_max_symbol_length()370 uintptr_t scheme_get_max_symbol_length() {
371   /* x86, x86_64, and powerpc support aligned_atomic_loads_and_stores */
372   return scheme_max_symbol_length;
373 }
374 
scheme_ensure_max_symbol_length(uintptr_t len)375 void scheme_ensure_max_symbol_length(uintptr_t len)
376 {
377 #ifdef MZ_USE_PLACES
378   mzrt_ensure_max_cas(&scheme_max_symbol_length, len);
379 #else
380   if (len > scheme_max_symbol_length) {
381     scheme_max_symbol_length = len;
382   }
383 #endif
384 }
385 
386 
387 static Scheme_Object *
make_a_symbol(const char * name,uintptr_t len,int kind)388 make_a_symbol(const char *name, uintptr_t len, int kind)
389 {
390   Scheme_Symbol *sym;
391 
392   sym = (Scheme_Symbol *)scheme_malloc_atomic_tagged(sizeof(Scheme_Symbol) + len + 1 - mzFLEX4_DELTA);
393 
394   sym->iso.so.type = scheme_symbol_type;
395   MZ_OPT_HASH_KEY(&sym->iso) = kind;
396   sym->len = len;
397   memcpy(sym->s, name, len);
398   sym->s[len] = 0;
399 
400   scheme_ensure_max_symbol_length(len);
401 
402   return (Scheme_Object *)sym;
403 }
404 
405 Scheme_Object *
scheme_make_symbol(const char * name)406 scheme_make_symbol(const char *name)
407 {
408   return make_a_symbol(name, strlen(name), 0x1);
409 }
410 
411 Scheme_Object *
scheme_make_exact_symbol(const char * name,uintptr_t len)412 scheme_make_exact_symbol(const char *name, uintptr_t len)
413 {
414   return make_a_symbol(name, len, 0x1);
415 }
416 
417 Scheme_Object *
scheme_make_exact_char_symbol(const mzchar * name,uintptr_t len)418 scheme_make_exact_char_symbol(const mzchar *name, uintptr_t len)
419 {
420   char buf[64], *bs;
421   intptr_t blen;
422   bs = scheme_utf8_encode_to_buffer_len(name, len, buf, 64, &blen);
423   return make_a_symbol(bs, blen, 0x1);
424 }
425 
426 typedef enum {
427   enum_symbol,
428   enum_keyword,
429   enum_parallel_symbol,
430 } enum_symbol_table_type;
431 
432 static Scheme_Object *
intern_exact_symbol_in_table_worker(enum_symbol_table_type type,int kind,const char * name,uintptr_t len)433 intern_exact_symbol_in_table_worker(enum_symbol_table_type type, int kind, const char *name, uintptr_t len)
434 {
435   Scheme_Object *sym;
436   Scheme_Hash_Table *table;
437 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
438   Scheme_Hash_Table *place_local_table;
439 #endif
440 
441   sym = NULL;
442 
443   switch(type) {
444     case enum_symbol:
445       table = symbol_table;
446 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
447       place_local_table = place_local_symbol_table;
448 #endif
449       break;
450     case enum_keyword:
451       table = keyword_table;
452 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
453       place_local_table = place_local_keyword_table;
454 #endif
455       break;
456     case enum_parallel_symbol:
457       table = parallel_symbol_table;
458 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
459       place_local_table = place_local_parallel_symbol_table;
460 #endif
461       break;
462     default:
463       printf("Invalid enum_symbol_table_type %i\n", type);
464       abort();
465   }
466 
467 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
468   if (place_local_table) {
469     sym = symbol_bucket(place_local_table, name, len, NULL, type);
470   }
471 #endif
472   if (!sym && table) {
473     sym = symbol_bucket(table, name, len, NULL, type);
474   }
475   if (!sym) {
476     /* create symbol in symbol table unless a place local symbol table has been created */
477     /* once the first place has been create the symbol_table becomes read-only and
478        shouldn't be modified */
479 
480     Scheme_Object *newsymbol;
481     Scheme_Hash_Table *create_table;
482 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
483     create_table = place_local_table ? place_local_table : table;
484 #else
485     create_table = table;
486 #endif
487     newsymbol = make_a_symbol(name, len, kind);
488 
489     /* we must return the result of this symbol bucket call because another
490      * thread could have inserted the same symbol between the first
491      * symbol_bucket call above and this one */
492     sym = symbol_bucket(create_table, name, len, newsymbol, type);
493   }
494 
495   return sym;
496 }
497 
498 static Scheme_Object *
intern_exact_symbol_in_table(enum_symbol_table_type type,int kind,const char * name,uintptr_t len)499 intern_exact_symbol_in_table(enum_symbol_table_type type, int kind, const char *name, uintptr_t len)
500 {
501   return intern_exact_symbol_in_table_worker(type, kind, name, len);
502 }
503 
504 Scheme_Object *
scheme_intern_exact_symbol(const char * name,uintptr_t len)505 scheme_intern_exact_symbol(const char *name, uintptr_t len)
506 {
507   return intern_exact_symbol_in_table(enum_symbol, 0, name, len);
508 }
509 
510 Scheme_Object *
scheme_intern_exact_parallel_symbol(const char * name,uintptr_t len)511 scheme_intern_exact_parallel_symbol(const char *name, uintptr_t len)
512 {
513   return intern_exact_symbol_in_table(enum_parallel_symbol, 0x2, name, len);
514 }
515 
516 Scheme_Object *
scheme_intern_exact_char_symbol(const mzchar * name,uintptr_t len)517 scheme_intern_exact_char_symbol(const mzchar *name, uintptr_t len)
518 {
519   char buf[64], *bs;
520   intptr_t blen;
521   bs = scheme_utf8_encode_to_buffer_len(name, len, buf, 64, &blen);
522   return intern_exact_symbol_in_table(enum_symbol, 0, bs, blen);
523 }
524 
525 Scheme_Object *
scheme_intern_exact_keyword(const char * name,uintptr_t len)526 scheme_intern_exact_keyword(const char *name, uintptr_t len)
527 {
528   Scheme_Object *s;
529   s = intern_exact_symbol_in_table(enum_keyword, 0, name, len);
530   if (s->type == scheme_symbol_type)
531     s->type = scheme_keyword_type;
532   return s;
533 }
534 
scheme_intern_exact_char_keyword(const mzchar * name,uintptr_t len)535 Scheme_Object *scheme_intern_exact_char_keyword(const mzchar *name, uintptr_t len)
536 {
537   char buf[64], *bs;
538   intptr_t blen;
539   Scheme_Object *s;
540   bs = scheme_utf8_encode_to_buffer_len(name, len, buf, 64, &blen);
541   s = intern_exact_symbol_in_table(enum_keyword, 0, bs, blen);
542   if (s->type == scheme_symbol_type)
543     s->type = scheme_keyword_type;
544   return s;
545 }
546 
547 #define MAX_SYMBOL_SIZE 256
548 
549 Scheme_Object *
scheme_intern_symbol(const char * name)550 scheme_intern_symbol(const char *name)
551   /* `name' must be ASCII; this function is not suitable for non-ASCII
552      conversion, necause it assumes that downcasing each C char
553      is good enough to normalize the case. */
554 {
555   if (!scheme_case_sensitive) {
556     uintptr_t i, len;
557     char *naya;
558     char on_stack[MAX_SYMBOL_SIZE];
559 
560     len = strlen(name);
561     if (len >= MAX_SYMBOL_SIZE)
562       naya = (char *)scheme_malloc_atomic(len + 1);
563     else
564       naya = on_stack;
565 
566     for (i = 0; i < len; i++) {
567       int c = ((unsigned char *)name)[i];
568 
569       c = scheme_tolower(c);
570 
571       naya[i] = c;
572     }
573 
574     naya[len] = 0;
575 
576     return scheme_intern_exact_symbol(naya, len);
577   }
578 
579   return scheme_intern_exact_symbol(name, strlen(name));
580 }
581 
scheme_symbol_name_and_size(Scheme_Object * sym,uintptr_t * length,int flags)582 const char *scheme_symbol_name_and_size(Scheme_Object *sym, uintptr_t *length, int flags)
583 {
584   int has_space = 0, has_special = 0, has_pipe = 0, has_upper = 0, digit_start;
585   int dz;
586   uintptr_t i, len = SCHEME_SYM_LEN(sym), total_length;
587   int pipe_quote;
588   char buf[100];
589   char *s, *result;
590 
591   if ((flags & SCHEME_SNF_PIPE_QUOTE) || (flags & SCHEME_SNF_FOR_TS))
592     pipe_quote = 1;
593   else if (flags & SCHEME_SNF_NO_PIPE_QUOTE)
594     pipe_quote = 0;
595   else {
596     pipe_quote = SCHEME_TRUEP(scheme_get_param(scheme_current_config(), MZCONFIG_CAN_READ_PIPE_QUOTE));
597   }
598 
599   if (len < 100) {
600     s = buf;
601     memcpy(buf, SCHEME_SYM_VAL(sym), len + 1);
602   } else
603     s = scheme_symbol_val(sym);
604 
605 
606 #define isSpecial(ch) ((ch == '(') || (ch == '[') || (ch == '{')       \
607 		       || (ch == ')') || (ch == ']') || (ch == '}')    \
608 		       || (ch == ')') || (ch == '\\')   \
609 		       || (ch == '"') || (ch == '\'')   \
610 		       || (ch == '`') || (ch == ',')    \
611                        || (ch == ';')                   \
612                        || (((ch == '>') || (ch == '<')) \
613 			   && (flags & SCHEME_SNF_FOR_TS)))
614 
615   if (len) {
616     int ch = ((unsigned char *)s)[0];
617     digit_start = ((flags & SCHEME_SNF_KEYWORD)
618                    ? 0
619                    : (((ch < 128) && isdigit(ch)) || (ch == '.')
620                       || (ch == '+') || (ch == '-')));
621     if (ch == '#' && (len == 1 || s[1] != '%'))
622       has_special = 1;
623     if (ch == '.' && len == 1)
624       has_special = 1;
625   } else {
626     digit_start = 0;
627     if (!(flags & SCHEME_SNF_KEYWORD))
628       has_space = 1;
629   }
630 
631   for (i = 0; i < len; i++) {
632     int ch = ((unsigned char *)s)[i];
633 
634     if (ch > 127) {
635       /* Decode UTF-8. */
636       mzchar buf[2];
637       int ul = 2;
638       while (1) {
639         if (scheme_utf8_decode((unsigned char *)s, i, i + ul,
640                                buf, 0, 1,
641                                NULL, 0, 0) > 0)
642           break;
643         ul++;
644       }
645       ch = buf[0];
646       if ((flags & SCHEME_SNF_NEED_CASE) && scheme_isspecialcasing(ch)) {
647         mzchar *rc;
648         buf[1] = 0;
649         rc = scheme_string_recase(buf, 0, 1, 3, 1, NULL);
650         if ((rc != buf) || (rc[0] != ch))
651           has_upper = 1;
652         ch = 'a';
653       }
654       i += (ul - 1);
655     }
656 
657     if (scheme_isspace(ch)) { /* used to have || !isprint(ch) */
658       if ((flags & SCHEME_SNF_FOR_TS) && (ch == ' ')) {
659 	/* space is OK in type symbols */
660       } else
661 	has_space = 1;
662     } else if (isSpecial(ch)
663                || (ch == 0xFEFF)) /* BOM, treated as whitespace by the reader */
664       has_special = 1;
665     else if (ch == '|')
666       has_pipe = 1;
667     else if (flags & SCHEME_SNF_NEED_CASE) {
668       if (scheme_tofold(ch) != ch)
669 	has_upper = 1;
670     }
671   }
672 
673   result = NULL;
674   total_length = 0;
675 
676   if (!has_space && !has_special && (!pipe_quote || !has_pipe) && !has_upper) {
677     mzchar cbuf[100], *cs;
678     intptr_t clen;
679     dz = 0;
680     cs = scheme_utf8_decode_to_buffer_len((unsigned char *)s, len, cbuf, 100, &clen);
681     if (cs
682 	&& digit_start
683 	&& !(flags & SCHEME_SNF_FOR_TS)
684 	&& (SCHEME_TRUEP(scheme_read_number(cs, clen, 0, 0, 1, 10, 0, NULL, &dz, 1))
685 	    || dz)) {
686       /* Need quoting: */
687       if (!pipe_quote) {
688 	/* Just need a leading backslash: */
689 	result = (char *)scheme_malloc_atomic(len + 2);
690 	total_length = len + 1;
691 	memcpy(result + 1, s, len);
692 	result[0] = '\\';
693 	result[len + 1] = 0;
694       }
695     } else {
696       total_length = len;
697       result = s;
698     }
699   }
700 
701   if (!result) {
702     if (!has_pipe && pipe_quote) {
703       result = (char *)scheme_malloc_atomic(len + 3);
704       total_length = len + 2;
705       memcpy(result + 1, s, len);
706       result[0] = '|';
707       result[len + 1] = '|';
708       result[len + 2] = 0;
709     } else {
710       mzchar cbuf[100], *cs, *cresult;
711       intptr_t clen;
712       int p = 0;
713       intptr_t i = 0;
714       intptr_t rlen;
715 
716       dz = 0;
717       cs = scheme_utf8_decode_to_buffer_len((unsigned char *)s, len, cbuf, 100, &clen);
718 
719       cresult = (mzchar *)scheme_malloc_atomic(((2 * len) + 1) * sizeof(mzchar));
720 
721       for (i = 0; i < clen; i++) {
722         mzchar ch = cs[i];
723         if (scheme_isspace(ch)
724 	    || isSpecial(ch)
725 	    || ((ch == '|') && pipe_quote)
726 	    || (!i && s[0] == '#')
727 	    || (has_upper && (ch >= 'A') && (ch <= 'Z')))
728 	  cresult[p++] = '\\';
729 	cresult[p++] = ch;
730       }
731 
732       result = scheme_utf8_encode_to_buffer_len(cresult, p, NULL, 0, &rlen);
733       total_length = rlen;
734     }
735   }
736 
737   if (length)
738     *length = total_length;
739 
740   return (result == buf) ? scheme_symbol_val (sym) : result;
741 }
742 
scheme_symbol_name(Scheme_Object * sym)743 const char *scheme_symbol_name(Scheme_Object *sym)
744 {
745   return scheme_symbol_name_and_size(sym, NULL, 0);
746 }
747 
scheme_symbol_val(Scheme_Object * sym)748 char *scheme_symbol_val(Scheme_Object *sym)
749 {
750   char *s;
751   s = scheme_malloc_atomic(SCHEME_SYM_LEN(sym) + 1);
752   memcpy(s, SCHEME_SYM_VAL(sym), SCHEME_SYM_LEN(sym) + 1);
753   return s;
754 }
755 
756 /* locals */
757 
758 static Scheme_Object *
symbol_p_prim(int argc,Scheme_Object * argv[])759 symbol_p_prim (int argc, Scheme_Object *argv[])
760 {
761   return SCHEME_SYMBOLP(argv[0]) ? scheme_true : scheme_false;
762 }
763 
764 Scheme_Object *
scheme_checked_symbol_interned_p(int argc,Scheme_Object * argv[])765 scheme_checked_symbol_interned_p (int argc, Scheme_Object *argv[])
766 {
767   if (SCHEME_SYMBOLP(argv[0]))
768     return (SCHEME_SYM_WEIRDP(argv[0]) ? scheme_false : scheme_true);
769 
770   scheme_wrong_contract("symbol-interned?", "symbol?", 0, argc, argv);
771   return NULL;
772 }
773 
774 static Scheme_Object *
symbol_unreadable_p_prim(int argc,Scheme_Object * argv[])775 symbol_unreadable_p_prim (int argc, Scheme_Object *argv[])
776 {
777   if (SCHEME_SYMBOLP(argv[0]))
778     return (SCHEME_SYM_PARALLELP(argv[0]) ? scheme_true : scheme_false);
779 
780   scheme_wrong_contract("symbol-unreadable?", "symbol?", 0, argc, argv);
781   return NULL;
782 }
783 
784 static Scheme_Object *
string_to_symbol_prim(int argc,Scheme_Object * argv[])785 string_to_symbol_prim (int argc, Scheme_Object *argv[])
786 {
787   if (!SCHEME_CHAR_STRINGP(argv[0]))
788     scheme_wrong_contract("string->symbol", "string?", 0, argc, argv);
789   return scheme_intern_exact_char_symbol(SCHEME_CHAR_STR_VAL(argv[0]),
790 					 SCHEME_CHAR_STRTAG_VAL(argv[0]));
791 }
792 
793 static Scheme_Object *
string_to_uninterned_symbol_prim(int argc,Scheme_Object * argv[])794 string_to_uninterned_symbol_prim (int argc, Scheme_Object *argv[])
795 {
796   if (!SCHEME_CHAR_STRINGP(argv[0]))
797     scheme_wrong_contract("string->uninterned-symbol", "string?", 0, argc, argv);
798   return scheme_make_exact_char_symbol(SCHEME_CHAR_STR_VAL(argv[0]),
799 				       SCHEME_CHAR_STRTAG_VAL(argv[0]));
800 }
801 
802 static Scheme_Object *
string_to_unreadable_symbol_prim(int argc,Scheme_Object * argv[])803 string_to_unreadable_symbol_prim (int argc, Scheme_Object *argv[])
804 {
805   char buf[64], *bs;
806   intptr_t blen;
807 
808   if (!SCHEME_CHAR_STRINGP(argv[0]))
809     scheme_wrong_contract("string->unreadable-symbol", "string?", 0, argc, argv);
810 
811   bs = scheme_utf8_encode_to_buffer_len(SCHEME_CHAR_STR_VAL(argv[0]),
812                                         SCHEME_CHAR_STRTAG_VAL(argv[0]),
813                                         buf, 64, &blen);
814 
815   return scheme_intern_exact_parallel_symbol(bs, blen);
816 }
817 
scheme_symbol_to_string(Scheme_Object * sym)818 Scheme_Object *scheme_symbol_to_string(Scheme_Object *sym)
819 {
820   Scheme_Object *str;
821   GC_CAN_IGNORE unsigned char *s;
822   GC_CAN_IGNORE mzchar *s2;
823   intptr_t len, i;
824 
825   s = (unsigned char *)SCHEME_SYM_VAL(sym);
826   len = SCHEME_SYM_LEN(sym);
827   for (i = 0; i < len; i++) {
828     if (s[i] >= 128)
829       break;
830   }
831   s = NULL;
832 
833   if (i == len) {
834     /* ASCII */
835     str = scheme_alloc_char_string(len, 0);
836     s = (unsigned char *)SCHEME_SYM_VAL(sym);
837     s2 = SCHEME_CHAR_STR_VAL(str);
838     for (i = 0; i < len; i++) {
839       s2[i] = s[i];
840     }
841     return str;
842   } else {
843     return scheme_make_sized_offset_utf8_string((char *)sym,
844                                                 SCHEME_SYMSTR_OFFSET(sym),
845                                                 SCHEME_SYM_LEN(sym));
846   }
847 }
848 
849 static Scheme_Object *
symbol_to_string_prim(int argc,Scheme_Object * argv[])850 symbol_to_string_prim (int argc, Scheme_Object *argv[])
851 {
852   Scheme_Object *sym;
853 
854   sym = argv[0];
855 
856   if (!SCHEME_SYMBOLP(sym))
857     scheme_wrong_contract("symbol->string", "symbol?", 0, argc, argv);
858 
859   return scheme_symbol_to_string(sym);
860 }
861 
862 static Scheme_Object *
symbol_to_immutable_string_prim(int argc,Scheme_Object * argv[])863 symbol_to_immutable_string_prim (int argc, Scheme_Object *argv[])
864 {
865   Scheme_Object *sym, *str;
866 
867   sym = argv[0];
868 
869   if (!SCHEME_SYMBOLP(sym))
870     scheme_wrong_contract("symbol->immutable-string", "symbol?", 0, argc, argv);
871 
872   /* Could cache, but currently we don't */
873 
874   str = scheme_symbol_to_string(sym);
875   SCHEME_SET_CHAR_STRING_IMMUTABLE(str);
876 
877   return str;
878 }
879 
880 
881 static Scheme_Object *
keyword_p_prim(int argc,Scheme_Object * argv[])882 keyword_p_prim (int argc, Scheme_Object *argv[])
883 {
884   return SCHEME_KEYWORDP(argv[0]) ? scheme_true : scheme_false;
885 }
886 
symkey_lt(const char * who,Scheme_Type ty,const char * contract,int argc,Scheme_Object * argv[])887 static Scheme_Object *symkey_lt (const char *who, Scheme_Type ty, const char *contract,
888                                   int argc, Scheme_Object *argv[])
889 {
890   Scheme_Object *prev = argv[0], *kw;
891   GC_CAN_IGNORE unsigned char *a, *b;
892   int i, al, bl, t;
893 
894   if (!SAME_TYPE(SCHEME_TYPE(prev), ty))
895     scheme_wrong_contract(who, contract, 0, argc, argv);
896 
897   for (i = 1; i < argc; i++) {
898     kw = argv[i];
899     if (!SAME_TYPE(SCHEME_TYPE(kw), ty))
900       scheme_wrong_contract(who, contract, i, argc, argv);
901 
902     a = (unsigned char *)SCHEME_SYM_VAL(prev);
903     al = SCHEME_SYM_LEN(prev);
904     b = (unsigned char *)SCHEME_SYM_VAL(kw);
905     bl = SCHEME_SYM_LEN(kw);
906     t = ((al < bl) ? al : bl);
907     while (t--) {
908       if (*a < *b) {
909         al = 0;
910         bl = 1;
911         break;
912       } else if (*a > *b) {
913         al = bl = 0;
914         break;
915       } else {
916         a++;
917         b++;
918       }
919     }
920     a = b = NULL;
921 
922     if (al >= bl) {
923       /* Check remaining types */
924       for (i++; i < argc; i++) {
925         if (!SAME_TYPE(SCHEME_TYPE(argv[i]), ty))
926           scheme_wrong_contract(who, contract, i, argc, argv);
927       }
928       return scheme_false;
929     }
930 
931     prev = kw;
932   }
933 
934   return scheme_true;
935 }
936 
keyword_lt(int argc,Scheme_Object * argv[])937 static Scheme_Object *keyword_lt (int argc, Scheme_Object *argv[])
938 {
939   return symkey_lt("keyword<?", scheme_keyword_type, "keyword?", argc, argv);
940 }
941 
symbol_lt(int argc,Scheme_Object * argv[])942 static Scheme_Object *symbol_lt (int argc, Scheme_Object *argv[])
943 {
944   return symkey_lt("symbol<?", scheme_symbol_type, "symbol?", argc, argv);
945 }
946 
947 static Scheme_Object *
string_to_keyword_prim(int argc,Scheme_Object * argv[])948 string_to_keyword_prim (int argc, Scheme_Object *argv[])
949 {
950   if (!SCHEME_CHAR_STRINGP(argv[0]))
951     scheme_wrong_contract("string->keyword", "string?", 0, argc, argv);
952   return scheme_intern_exact_char_keyword(SCHEME_CHAR_STR_VAL(argv[0]),
953 					  SCHEME_CHAR_STRTAG_VAL(argv[0]));
954 }
955 
956 static Scheme_Object *
keyword_to_string_prim(int argc,Scheme_Object * argv[])957 keyword_to_string_prim (int argc, Scheme_Object *argv[])
958 {
959   if (!SCHEME_KEYWORDP(argv[0]))
960     scheme_wrong_contract("keyword->string", "keyword?", 0, argc, argv);
961 
962   return scheme_make_sized_offset_utf8_string((char *)(argv[0]),
963 					      SCHEME_SYMSTR_OFFSET(argv[0]),
964 					      SCHEME_SYM_LEN(argv[0]));
965 }
966 
967 static Scheme_Object *
keyword_to_immutable_string_prim(int argc,Scheme_Object * argv[])968 keyword_to_immutable_string_prim (int argc, Scheme_Object *argv[])
969 {
970   Scheme_Object *str;
971 
972   if (!SCHEME_KEYWORDP(argv[0]))
973     scheme_wrong_contract("keyword->immutable-string", "keyword?", 0, argc, argv);
974 
975   /* Could cache, but currently we don't */
976 
977   str = scheme_make_sized_offset_utf8_string((char *)(argv[0]),
978                                              SCHEME_SYMSTR_OFFSET(argv[0]),
979                                              SCHEME_SYM_LEN(argv[0]));
980 
981   SCHEME_SET_CHAR_STRING_IMMUTABLE(str);
982 
983   return str;
984 }
985 
gensym(int argc,Scheme_Object * argv[])986 static Scheme_Object *gensym(int argc, Scheme_Object *argv[])
987 {
988   char buffer[100], *str;
989   Scheme_Object *r;
990 
991   if (argc)
992     r = argv[0];
993   else
994     r = NULL;
995 
996   if (r && !SCHEME_SYMBOLP(r) && !SCHEME_CHAR_STRINGP(r))
997     scheme_wrong_contract("gensym", "(or/c symbol? string?)", 0, argc, argv);
998 
999   if (r) {
1000     char buf[64];
1001     if (SCHEME_CHAR_STRINGP(r)) {
1002       str = scheme_utf8_encode_to_buffer(SCHEME_CHAR_STR_VAL(r),
1003 					 SCHEME_CHAR_STRTAG_VAL(r),
1004 					 buf, 64);
1005     } else
1006       str = SCHEME_SYM_VAL(r);
1007     sprintf(buffer, "%.80s%d", str, gensym_counter++);
1008     str = NULL; /* because it might be GC-misaligned */
1009   } else
1010     sprintf(buffer, "g%d", gensym_counter++);
1011 
1012   r = scheme_make_symbol(buffer);
1013 
1014   return r;
1015 }
1016 
scheme_gensym(Scheme_Object * base)1017 Scheme_Object *scheme_gensym(Scheme_Object *base)
1018 {
1019   Scheme_Object *a[1];
1020   a[0] = base;
1021   return gensym(1, a);
1022 }
1023 
scheme_symbol_append(Scheme_Object * s1,Scheme_Object * s2)1024 Scheme_Object *scheme_symbol_append(Scheme_Object *s1, Scheme_Object *s2)
1025 {
1026   char *s;
1027   s = MALLOC_N_ATOMIC(char, SCHEME_SYM_LEN(s1) + SCHEME_SYM_LEN(s2) + 1);
1028   memcpy(s, SCHEME_SYM_VAL(s1), SCHEME_SYM_LEN(s1));
1029   memcpy(s + SCHEME_SYM_LEN(s1), SCHEME_SYM_VAL(s2), SCHEME_SYM_LEN(s2) + 1);
1030   if (SCHEME_SYM_UNINTERNEDP(s1) || SCHEME_SYM_UNINTERNEDP(s2))
1031     return scheme_make_exact_symbol(s, SCHEME_SYM_LEN(s1) + SCHEME_SYM_LEN(s2));
1032   else if (SCHEME_SYM_PARALLELP(s1) || SCHEME_SYM_PARALLELP(s2))
1033     return scheme_intern_exact_parallel_symbol(s, SCHEME_SYM_LEN(s1) + SCHEME_SYM_LEN(s2));
1034   else
1035     return scheme_intern_exact_symbol(s, SCHEME_SYM_LEN(s1) + SCHEME_SYM_LEN(s2));
1036 }
1037