1 /* Some copilers don't like re-def of GC_malloc in schemef.h: */
2 #ifndef MZ_PRECISE_GC
3 # define SCHEME_NO_GC_PROTO
4 #endif
5
6 #include "schpriv.h"
7 #include <string.h>
8 #include <ctype.h>
9 #include "schgc.h"
10
11 # define HASH_TABLE_INIT_SIZE 256
12 #ifdef SMALL_HASH_TABLES
13 # define FILL_FACTOR 1.30
14 #else
15 # define FILL_FACTOR 2
16 #endif
17
18 #ifndef MZ_PRECISE_GC
19 extern MZGC_DLLIMPORT void (*GC_custom_finalize)(void);
20 #endif
21 #ifndef USE_SENORA_GC
22 extern int GC_is_marked(void *);
23 #endif
24
25 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
26 THREAD_LOCAL_DECL(static Scheme_Hash_Table *place_local_symbol_table = NULL;)
27 THREAD_LOCAL_DECL(static Scheme_Hash_Table *place_local_keyword_table = NULL;)
28 THREAD_LOCAL_DECL(static Scheme_Hash_Table *place_local_parallel_symbol_table = NULL;)
29 #endif
30
31 SHARED_OK static Scheme_Hash_Table *symbol_table = NULL;
32 SHARED_OK static Scheme_Hash_Table *keyword_table = NULL;
33 SHARED_OK static Scheme_Hash_Table *parallel_symbol_table = NULL;
34
35 SHARED_OK static uintptr_t scheme_max_symbol_length;
36
37 /* globals */
38 SHARED_OK int scheme_case_sensitive = 1;
39 THREAD_LOCAL_DECL(static int gensym_counter);
40
scheme_set_case_sensitive(int v)41 void scheme_set_case_sensitive(int v) { scheme_case_sensitive = v; }
42
43 READ_ONLY Scheme_Object *scheme_symbol_p_proc;
44 READ_ONLY Scheme_Object *scheme_keyword_p_proc;
45
46 /* locals */
47 static Scheme_Object *symbol_lt (int argc, Scheme_Object *argv[]);
48 static Scheme_Object *symbol_p_prim (int argc, Scheme_Object *argv[]);
49 static Scheme_Object *symbol_unreadable_p_prim (int argc, Scheme_Object *argv[]);
50 static Scheme_Object *string_to_symbol_prim (int argc, Scheme_Object *argv[]);
51 static Scheme_Object *string_to_uninterned_symbol_prim (int argc, Scheme_Object *argv[]);
52 static Scheme_Object *string_to_unreadable_symbol_prim (int argc, Scheme_Object *argv[]);
53 static Scheme_Object *symbol_to_string_prim (int argc, Scheme_Object *argv[]);
54 static Scheme_Object *symbol_to_immutable_string_prim (int argc, Scheme_Object *argv[]);
55 static Scheme_Object *keyword_p_prim (int argc, Scheme_Object *argv[]);
56 static Scheme_Object *keyword_lt (int argc, Scheme_Object *argv[]);
57 static Scheme_Object *string_to_keyword_prim (int argc, Scheme_Object *argv[]);
58 static Scheme_Object *keyword_to_string_prim (int argc, Scheme_Object *argv[]);
59 static Scheme_Object *keyword_to_immutable_string_prim (int argc, Scheme_Object *argv[]);
60 static Scheme_Object *gensym(int argc, Scheme_Object *argv[]);
61
62
63 /**************************************************************************/
64
65 typedef uintptr_t hash_v_t;
66 #define HASH_SEED 0xF0E1D2C3
67
68 #define SYMTAB_LOST_CELL scheme_false
69
70 #ifdef MZ_PRECISE_GC
71 # define WEAK_ARRAY_HEADSIZE 4
72 #else
73 # define WEAK_ARRAY_HEADSIZE 0
74 #endif
75
76 static Scheme_Object *rehash_symbol_bucket(Scheme_Hash_Table *table,
77 GC_CAN_IGNORE const char *key, uintptr_t length,
78 Scheme_Object *naya, int sym_type);
79
80 /* Special hashing for symbols: */
symbol_bucket(Scheme_Hash_Table * table,GC_CAN_IGNORE const char * key,uintptr_t length,Scheme_Object * naya,int sym_type)81 static Scheme_Object *symbol_bucket(Scheme_Hash_Table *table,
82 GC_CAN_IGNORE const char *key, uintptr_t length,
83 Scheme_Object *naya, int sym_type)
84 XFORM_ASSERT_NO_CONVERSION
85 {
86 hash_v_t h, h2;
87 uintptr_t mask;
88 Scheme_Object *bucket;
89
90 /* WARNING: key may be GC-misaligned... */
91 /* This function is designed to need no MZ_PRECISE_GC instrumentation.
92 To handle re-hashing, it tail-calls rehash_symbol_bucket. */
93
94 mask = table->size - 1;
95
96 {
97 uintptr_t i;
98 i = 0;
99 h = HASH_SEED + sym_type;
100 h2 = 0;
101
102 while (i < length) {
103 int c = key[i++];
104 h ^= (h << 5) + (h >> 2) + c;
105 h2 += c;
106 }
107 /* post hash mixing helps for short symbols */
108 h ^= (h << 5) + (h >> 2) + 0xA0A0;
109 h ^= (h << 5) + (h >> 2) + 0x0505;
110
111 if (naya) {
112 /* record hash code (or some fragment of it) for `equal?` hashing: */
113 scheme_install_symbol_hash_code(naya, h);
114 }
115
116 h = h & mask;
117 h2 = h2 & mask;
118 }
119
120 h2 |= 0x1;
121
122 while ((bucket = table->keys[WEAK_ARRAY_HEADSIZE + h])) {
123 if (SAME_OBJ(bucket, SYMTAB_LOST_CELL)) {
124 if (naya) {
125 /* We're re-using, so decrement count and it will be
126 re-incremented. */
127 --table->count;
128 break;
129 }
130 } else if (((int)length == SCHEME_SYM_LEN(bucket))
131 && !memcmp(key, SCHEME_SYM_VAL(bucket), length))
132 return bucket;
133 h = (h + h2) & mask;
134 }
135
136 /* In case it's GC-misaligned: */
137 key = NULL;
138
139 if (!naya)
140 return NULL;
141
142 if (table->count * FILL_FACTOR >= table->size) {
143 return rehash_symbol_bucket(table, key, length, naya, sym_type);
144 }
145
146 table->keys[WEAK_ARRAY_HEADSIZE + h] = naya;
147
148 table->count++;
149
150 return naya;
151 }
152
rehash_symbol_bucket(Scheme_Hash_Table * table,GC_CAN_IGNORE const char * key,uintptr_t length,Scheme_Object * naya,int sym_type)153 static Scheme_Object *rehash_symbol_bucket(Scheme_Hash_Table *table,
154 GC_CAN_IGNORE const char *key, uintptr_t length,
155 Scheme_Object *naya, int sym_type)
156 {
157 int i, oldsize = table->size, newsize, lostc;
158 size_t asize;
159 Scheme_Object *cb;
160 Scheme_Object **old = table->keys;
161
162 /* WARNING: key may be GC-misaligned... */
163
164 /* Don't grow table if it's mostly lost cells (due to lots of
165 temporary symbols). */
166 lostc = 0;
167 for (i = 0; i < oldsize; i++) {
168 cb = old[WEAK_ARRAY_HEADSIZE + i];
169 if (cb == SYMTAB_LOST_CELL)
170 lostc++;
171 }
172 if ((lostc * 2) < table->count)
173 newsize = oldsize << 1;
174 else
175 newsize = oldsize;
176
177 asize = (size_t)newsize * sizeof(Scheme_Object *);
178 {
179 Scheme_Object **ba;
180 #ifdef MZ_PRECISE_GC
181 ba = (Scheme_Object **)GC_malloc_weak_array(asize, SYMTAB_LOST_CELL);
182 #else
183 ba = MALLOC_N_ATOMIC(Scheme_Object *, newsize);
184 memset((char *)ba, 0, asize);
185 #endif
186 table->keys = ba;
187 }
188 table->size = newsize;
189
190 table->count = 0;
191
192 for (i = 0; i < oldsize; i++) {
193 cb = old[WEAK_ARRAY_HEADSIZE + i] ;
194 if (cb && (cb != SYMTAB_LOST_CELL))
195 symbol_bucket(table, SCHEME_SYM_VAL(cb), SCHEME_SYM_LEN(cb), cb, sym_type);
196 }
197
198 /* Restore GC-misaligned key: */
199 key = SCHEME_SYM_VAL(naya);
200
201 return symbol_bucket(table, key, length, naya, sym_type);
202 }
203
204 #ifndef MZ_PRECISE_GC
clean_one_symbol_table(Scheme_Hash_Table * table)205 static void clean_one_symbol_table(Scheme_Hash_Table *table)
206 {
207 /* Clean the symbol table by removing pointers to collected
208 symbols. The correct way to do this is to install a GC
209 finalizer on symbol pointers, but that would be expensive. */
210
211 if (table) {
212 Scheme_Object **buckets = (Scheme_Object **)table->keys;
213 int i = table->size;
214 void *b;
215
216 while (i--) {
217 if (buckets[WEAK_ARRAY_HEADSIZE + i] && !SAME_OBJ(buckets[WEAK_ARRAY_HEADSIZE + i], SYMTAB_LOST_CELL)
218 && (!(b = GC_base(buckets[WEAK_ARRAY_HEADSIZE + i]))
219 #ifndef USE_SENORA_GC
220 || !GC_is_marked(b)
221 #endif
222 )) {
223 buckets[WEAK_ARRAY_HEADSIZE + i] = SYMTAB_LOST_CELL;
224 }
225 }
226 }
227 }
228
clean_symbol_table(void)229 static void clean_symbol_table(void)
230 {
231 clean_one_symbol_table(symbol_table);
232 clean_one_symbol_table(keyword_table);
233 clean_one_symbol_table(parallel_symbol_table);
234
235 scheme_clear_ephemerons();
236 # ifdef MZ_USE_JIT
237 scheme_clean_native_symtab();
238 # endif
239 # ifndef MZ_PRECISE_GC
240 scheme_clean_cust_box_list();
241 # endif
242 # ifndef MZ_PRECISE_GC
243 scheme_notify_code_gc();
244 # endif
245 }
246 #endif
247
248 /**************************************************************************/
249
init_one_symbol_table()250 static Scheme_Hash_Table *init_one_symbol_table()
251 {
252 Scheme_Hash_Table *table;
253 int size;
254 Scheme_Object **ba;
255
256 table = scheme_make_hash_table(SCHEME_hash_ptr);
257
258 table->size = HASH_TABLE_INIT_SIZE;
259
260 size = table->size * sizeof(Scheme_Object *);
261 #ifdef MZ_PRECISE_GC
262 ba = (Scheme_Object **)GC_malloc_weak_array(size, SYMTAB_LOST_CELL);
263 #else
264 ba = MALLOC_N_ATOMIC(Scheme_Object *, size);
265 memset((char *)ba, 0, size);
266 #endif
267 table->keys = ba;
268
269 return table;
270 }
271
272 void
scheme_init_symbol_table()273 scheme_init_symbol_table ()
274 {
275 REGISTER_SO(symbol_table);
276 REGISTER_SO(keyword_table);
277 REGISTER_SO(parallel_symbol_table);
278
279 symbol_table = init_one_symbol_table();
280 keyword_table = init_one_symbol_table();
281 parallel_symbol_table = init_one_symbol_table();
282
283 #ifndef MZ_PRECISE_GC
284 GC_custom_finalize = clean_symbol_table;
285 #endif
286 }
287
288 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
289 void
scheme_init_place_local_symbol_table()290 scheme_init_place_local_symbol_table ()
291 {
292 REGISTER_SO(place_local_symbol_table);
293 REGISTER_SO(place_local_keyword_table);
294 REGISTER_SO(place_local_parallel_symbol_table);
295
296 place_local_symbol_table = init_one_symbol_table();
297 place_local_keyword_table = init_one_symbol_table();
298 place_local_parallel_symbol_table = init_one_symbol_table();
299 }
300 #endif
301
302 void
scheme_init_symbol_type(Scheme_Startup_Env * env)303 scheme_init_symbol_type (Scheme_Startup_Env *env)
304 {
305 }
306
307 void
scheme_init_symbol(Scheme_Startup_Env * env)308 scheme_init_symbol (Scheme_Startup_Env *env)
309 {
310 Scheme_Object *p;
311
312 REGISTER_SO(scheme_symbol_p_proc);
313 p = scheme_make_folding_prim(symbol_p_prim, "symbol?", 1, 1, 1);
314 SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
315 | SCHEME_PRIM_IS_OMITABLE
316 | SCHEME_PRIM_PRODUCES_BOOL);
317 scheme_symbol_p_proc = p;
318 scheme_addto_prim_instance("symbol?", p, env);
319
320 p = scheme_make_folding_prim(symbol_unreadable_p_prim, "symbol-unreadable?", 1, 1, 1);
321 scheme_addto_prim_instance("symbol-unreadable?", p, env);
322
323 p = scheme_make_folding_prim(scheme_checked_symbol_interned_p, "symbol-interned?", 1, 1, 1);
324 SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
325 | SCHEME_PRIM_PRODUCES_BOOL);
326 scheme_addto_prim_instance("symbol-interned?", p, env);
327
328 ADD_FOLDING_PRIM("symbol<?", symbol_lt, 1, -1, 1, env);
329
330 p = scheme_make_folding_prim(string_to_symbol_prim, "string->symbol", 1, 1, 1);
331 SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
332 scheme_addto_prim_instance("string->symbol", p, env);
333
334 ADD_IMMED_PRIM("string->uninterned-symbol", string_to_uninterned_symbol_prim, 1, 1, env);
335 ADD_IMMED_PRIM("string->unreadable-symbol", string_to_unreadable_symbol_prim, 1, 1, env);
336
337 p = scheme_make_folding_prim(symbol_to_string_prim, "symbol->string", 1, 1, 1);
338 SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
339 scheme_addto_prim_instance("symbol->string", p, env);
340
341 p = scheme_make_folding_prim(symbol_to_immutable_string_prim, "symbol->immutable-string", 1, 1, 1);
342 SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
343 scheme_addto_prim_instance("symbol->immutable-string", p, env);
344
345 REGISTER_SO(scheme_keyword_p_proc);
346 p = scheme_make_folding_prim(keyword_p_prim, "keyword?", 1, 1, 1);
347 SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
348 | SCHEME_PRIM_IS_OMITABLE
349 | SCHEME_PRIM_PRODUCES_BOOL);
350 scheme_keyword_p_proc = p;
351 scheme_addto_prim_instance("keyword?", p, env);
352
353 ADD_FOLDING_PRIM("keyword<?", keyword_lt, 1, -1, 1, env);
354
355 p = scheme_make_folding_prim(string_to_keyword_prim, "string->keyword", 1, 1, 1);
356 SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
357 scheme_addto_prim_instance("string->keyword", p, env);
358
359 p = scheme_make_folding_prim(keyword_to_string_prim, "keyword->string", 1, 1, 1);
360 SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
361 scheme_addto_prim_instance("keyword->string", p, env);
362
363 p = scheme_make_folding_prim(keyword_to_immutable_string_prim, "keyword->immutable-string", 1, 1, 1);
364 SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
365 scheme_addto_prim_instance("keyword->immutable-string", p, env);
366
367 ADD_IMMED_PRIM("gensym", gensym, 0, 1, env);
368 }
369
scheme_get_max_symbol_length()370 uintptr_t scheme_get_max_symbol_length() {
371 /* x86, x86_64, and powerpc support aligned_atomic_loads_and_stores */
372 return scheme_max_symbol_length;
373 }
374
scheme_ensure_max_symbol_length(uintptr_t len)375 void scheme_ensure_max_symbol_length(uintptr_t len)
376 {
377 #ifdef MZ_USE_PLACES
378 mzrt_ensure_max_cas(&scheme_max_symbol_length, len);
379 #else
380 if (len > scheme_max_symbol_length) {
381 scheme_max_symbol_length = len;
382 }
383 #endif
384 }
385
386
387 static Scheme_Object *
make_a_symbol(const char * name,uintptr_t len,int kind)388 make_a_symbol(const char *name, uintptr_t len, int kind)
389 {
390 Scheme_Symbol *sym;
391
392 sym = (Scheme_Symbol *)scheme_malloc_atomic_tagged(sizeof(Scheme_Symbol) + len + 1 - mzFLEX4_DELTA);
393
394 sym->iso.so.type = scheme_symbol_type;
395 MZ_OPT_HASH_KEY(&sym->iso) = kind;
396 sym->len = len;
397 memcpy(sym->s, name, len);
398 sym->s[len] = 0;
399
400 scheme_ensure_max_symbol_length(len);
401
402 return (Scheme_Object *)sym;
403 }
404
405 Scheme_Object *
scheme_make_symbol(const char * name)406 scheme_make_symbol(const char *name)
407 {
408 return make_a_symbol(name, strlen(name), 0x1);
409 }
410
411 Scheme_Object *
scheme_make_exact_symbol(const char * name,uintptr_t len)412 scheme_make_exact_symbol(const char *name, uintptr_t len)
413 {
414 return make_a_symbol(name, len, 0x1);
415 }
416
417 Scheme_Object *
scheme_make_exact_char_symbol(const mzchar * name,uintptr_t len)418 scheme_make_exact_char_symbol(const mzchar *name, uintptr_t len)
419 {
420 char buf[64], *bs;
421 intptr_t blen;
422 bs = scheme_utf8_encode_to_buffer_len(name, len, buf, 64, &blen);
423 return make_a_symbol(bs, blen, 0x1);
424 }
425
426 typedef enum {
427 enum_symbol,
428 enum_keyword,
429 enum_parallel_symbol,
430 } enum_symbol_table_type;
431
432 static Scheme_Object *
intern_exact_symbol_in_table_worker(enum_symbol_table_type type,int kind,const char * name,uintptr_t len)433 intern_exact_symbol_in_table_worker(enum_symbol_table_type type, int kind, const char *name, uintptr_t len)
434 {
435 Scheme_Object *sym;
436 Scheme_Hash_Table *table;
437 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
438 Scheme_Hash_Table *place_local_table;
439 #endif
440
441 sym = NULL;
442
443 switch(type) {
444 case enum_symbol:
445 table = symbol_table;
446 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
447 place_local_table = place_local_symbol_table;
448 #endif
449 break;
450 case enum_keyword:
451 table = keyword_table;
452 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
453 place_local_table = place_local_keyword_table;
454 #endif
455 break;
456 case enum_parallel_symbol:
457 table = parallel_symbol_table;
458 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
459 place_local_table = place_local_parallel_symbol_table;
460 #endif
461 break;
462 default:
463 printf("Invalid enum_symbol_table_type %i\n", type);
464 abort();
465 }
466
467 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
468 if (place_local_table) {
469 sym = symbol_bucket(place_local_table, name, len, NULL, type);
470 }
471 #endif
472 if (!sym && table) {
473 sym = symbol_bucket(table, name, len, NULL, type);
474 }
475 if (!sym) {
476 /* create symbol in symbol table unless a place local symbol table has been created */
477 /* once the first place has been create the symbol_table becomes read-only and
478 shouldn't be modified */
479
480 Scheme_Object *newsymbol;
481 Scheme_Hash_Table *create_table;
482 #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
483 create_table = place_local_table ? place_local_table : table;
484 #else
485 create_table = table;
486 #endif
487 newsymbol = make_a_symbol(name, len, kind);
488
489 /* we must return the result of this symbol bucket call because another
490 * thread could have inserted the same symbol between the first
491 * symbol_bucket call above and this one */
492 sym = symbol_bucket(create_table, name, len, newsymbol, type);
493 }
494
495 return sym;
496 }
497
498 static Scheme_Object *
intern_exact_symbol_in_table(enum_symbol_table_type type,int kind,const char * name,uintptr_t len)499 intern_exact_symbol_in_table(enum_symbol_table_type type, int kind, const char *name, uintptr_t len)
500 {
501 return intern_exact_symbol_in_table_worker(type, kind, name, len);
502 }
503
504 Scheme_Object *
scheme_intern_exact_symbol(const char * name,uintptr_t len)505 scheme_intern_exact_symbol(const char *name, uintptr_t len)
506 {
507 return intern_exact_symbol_in_table(enum_symbol, 0, name, len);
508 }
509
510 Scheme_Object *
scheme_intern_exact_parallel_symbol(const char * name,uintptr_t len)511 scheme_intern_exact_parallel_symbol(const char *name, uintptr_t len)
512 {
513 return intern_exact_symbol_in_table(enum_parallel_symbol, 0x2, name, len);
514 }
515
516 Scheme_Object *
scheme_intern_exact_char_symbol(const mzchar * name,uintptr_t len)517 scheme_intern_exact_char_symbol(const mzchar *name, uintptr_t len)
518 {
519 char buf[64], *bs;
520 intptr_t blen;
521 bs = scheme_utf8_encode_to_buffer_len(name, len, buf, 64, &blen);
522 return intern_exact_symbol_in_table(enum_symbol, 0, bs, blen);
523 }
524
525 Scheme_Object *
scheme_intern_exact_keyword(const char * name,uintptr_t len)526 scheme_intern_exact_keyword(const char *name, uintptr_t len)
527 {
528 Scheme_Object *s;
529 s = intern_exact_symbol_in_table(enum_keyword, 0, name, len);
530 if (s->type == scheme_symbol_type)
531 s->type = scheme_keyword_type;
532 return s;
533 }
534
scheme_intern_exact_char_keyword(const mzchar * name,uintptr_t len)535 Scheme_Object *scheme_intern_exact_char_keyword(const mzchar *name, uintptr_t len)
536 {
537 char buf[64], *bs;
538 intptr_t blen;
539 Scheme_Object *s;
540 bs = scheme_utf8_encode_to_buffer_len(name, len, buf, 64, &blen);
541 s = intern_exact_symbol_in_table(enum_keyword, 0, bs, blen);
542 if (s->type == scheme_symbol_type)
543 s->type = scheme_keyword_type;
544 return s;
545 }
546
547 #define MAX_SYMBOL_SIZE 256
548
549 Scheme_Object *
scheme_intern_symbol(const char * name)550 scheme_intern_symbol(const char *name)
551 /* `name' must be ASCII; this function is not suitable for non-ASCII
552 conversion, necause it assumes that downcasing each C char
553 is good enough to normalize the case. */
554 {
555 if (!scheme_case_sensitive) {
556 uintptr_t i, len;
557 char *naya;
558 char on_stack[MAX_SYMBOL_SIZE];
559
560 len = strlen(name);
561 if (len >= MAX_SYMBOL_SIZE)
562 naya = (char *)scheme_malloc_atomic(len + 1);
563 else
564 naya = on_stack;
565
566 for (i = 0; i < len; i++) {
567 int c = ((unsigned char *)name)[i];
568
569 c = scheme_tolower(c);
570
571 naya[i] = c;
572 }
573
574 naya[len] = 0;
575
576 return scheme_intern_exact_symbol(naya, len);
577 }
578
579 return scheme_intern_exact_symbol(name, strlen(name));
580 }
581
scheme_symbol_name_and_size(Scheme_Object * sym,uintptr_t * length,int flags)582 const char *scheme_symbol_name_and_size(Scheme_Object *sym, uintptr_t *length, int flags)
583 {
584 int has_space = 0, has_special = 0, has_pipe = 0, has_upper = 0, digit_start;
585 int dz;
586 uintptr_t i, len = SCHEME_SYM_LEN(sym), total_length;
587 int pipe_quote;
588 char buf[100];
589 char *s, *result;
590
591 if ((flags & SCHEME_SNF_PIPE_QUOTE) || (flags & SCHEME_SNF_FOR_TS))
592 pipe_quote = 1;
593 else if (flags & SCHEME_SNF_NO_PIPE_QUOTE)
594 pipe_quote = 0;
595 else {
596 pipe_quote = SCHEME_TRUEP(scheme_get_param(scheme_current_config(), MZCONFIG_CAN_READ_PIPE_QUOTE));
597 }
598
599 if (len < 100) {
600 s = buf;
601 memcpy(buf, SCHEME_SYM_VAL(sym), len + 1);
602 } else
603 s = scheme_symbol_val(sym);
604
605
606 #define isSpecial(ch) ((ch == '(') || (ch == '[') || (ch == '{') \
607 || (ch == ')') || (ch == ']') || (ch == '}') \
608 || (ch == ')') || (ch == '\\') \
609 || (ch == '"') || (ch == '\'') \
610 || (ch == '`') || (ch == ',') \
611 || (ch == ';') \
612 || (((ch == '>') || (ch == '<')) \
613 && (flags & SCHEME_SNF_FOR_TS)))
614
615 if (len) {
616 int ch = ((unsigned char *)s)[0];
617 digit_start = ((flags & SCHEME_SNF_KEYWORD)
618 ? 0
619 : (((ch < 128) && isdigit(ch)) || (ch == '.')
620 || (ch == '+') || (ch == '-')));
621 if (ch == '#' && (len == 1 || s[1] != '%'))
622 has_special = 1;
623 if (ch == '.' && len == 1)
624 has_special = 1;
625 } else {
626 digit_start = 0;
627 if (!(flags & SCHEME_SNF_KEYWORD))
628 has_space = 1;
629 }
630
631 for (i = 0; i < len; i++) {
632 int ch = ((unsigned char *)s)[i];
633
634 if (ch > 127) {
635 /* Decode UTF-8. */
636 mzchar buf[2];
637 int ul = 2;
638 while (1) {
639 if (scheme_utf8_decode((unsigned char *)s, i, i + ul,
640 buf, 0, 1,
641 NULL, 0, 0) > 0)
642 break;
643 ul++;
644 }
645 ch = buf[0];
646 if ((flags & SCHEME_SNF_NEED_CASE) && scheme_isspecialcasing(ch)) {
647 mzchar *rc;
648 buf[1] = 0;
649 rc = scheme_string_recase(buf, 0, 1, 3, 1, NULL);
650 if ((rc != buf) || (rc[0] != ch))
651 has_upper = 1;
652 ch = 'a';
653 }
654 i += (ul - 1);
655 }
656
657 if (scheme_isspace(ch)) { /* used to have || !isprint(ch) */
658 if ((flags & SCHEME_SNF_FOR_TS) && (ch == ' ')) {
659 /* space is OK in type symbols */
660 } else
661 has_space = 1;
662 } else if (isSpecial(ch)
663 || (ch == 0xFEFF)) /* BOM, treated as whitespace by the reader */
664 has_special = 1;
665 else if (ch == '|')
666 has_pipe = 1;
667 else if (flags & SCHEME_SNF_NEED_CASE) {
668 if (scheme_tofold(ch) != ch)
669 has_upper = 1;
670 }
671 }
672
673 result = NULL;
674 total_length = 0;
675
676 if (!has_space && !has_special && (!pipe_quote || !has_pipe) && !has_upper) {
677 mzchar cbuf[100], *cs;
678 intptr_t clen;
679 dz = 0;
680 cs = scheme_utf8_decode_to_buffer_len((unsigned char *)s, len, cbuf, 100, &clen);
681 if (cs
682 && digit_start
683 && !(flags & SCHEME_SNF_FOR_TS)
684 && (SCHEME_TRUEP(scheme_read_number(cs, clen, 0, 0, 1, 10, 0, NULL, &dz, 1))
685 || dz)) {
686 /* Need quoting: */
687 if (!pipe_quote) {
688 /* Just need a leading backslash: */
689 result = (char *)scheme_malloc_atomic(len + 2);
690 total_length = len + 1;
691 memcpy(result + 1, s, len);
692 result[0] = '\\';
693 result[len + 1] = 0;
694 }
695 } else {
696 total_length = len;
697 result = s;
698 }
699 }
700
701 if (!result) {
702 if (!has_pipe && pipe_quote) {
703 result = (char *)scheme_malloc_atomic(len + 3);
704 total_length = len + 2;
705 memcpy(result + 1, s, len);
706 result[0] = '|';
707 result[len + 1] = '|';
708 result[len + 2] = 0;
709 } else {
710 mzchar cbuf[100], *cs, *cresult;
711 intptr_t clen;
712 int p = 0;
713 intptr_t i = 0;
714 intptr_t rlen;
715
716 dz = 0;
717 cs = scheme_utf8_decode_to_buffer_len((unsigned char *)s, len, cbuf, 100, &clen);
718
719 cresult = (mzchar *)scheme_malloc_atomic(((2 * len) + 1) * sizeof(mzchar));
720
721 for (i = 0; i < clen; i++) {
722 mzchar ch = cs[i];
723 if (scheme_isspace(ch)
724 || isSpecial(ch)
725 || ((ch == '|') && pipe_quote)
726 || (!i && s[0] == '#')
727 || (has_upper && (ch >= 'A') && (ch <= 'Z')))
728 cresult[p++] = '\\';
729 cresult[p++] = ch;
730 }
731
732 result = scheme_utf8_encode_to_buffer_len(cresult, p, NULL, 0, &rlen);
733 total_length = rlen;
734 }
735 }
736
737 if (length)
738 *length = total_length;
739
740 return (result == buf) ? scheme_symbol_val (sym) : result;
741 }
742
scheme_symbol_name(Scheme_Object * sym)743 const char *scheme_symbol_name(Scheme_Object *sym)
744 {
745 return scheme_symbol_name_and_size(sym, NULL, 0);
746 }
747
scheme_symbol_val(Scheme_Object * sym)748 char *scheme_symbol_val(Scheme_Object *sym)
749 {
750 char *s;
751 s = scheme_malloc_atomic(SCHEME_SYM_LEN(sym) + 1);
752 memcpy(s, SCHEME_SYM_VAL(sym), SCHEME_SYM_LEN(sym) + 1);
753 return s;
754 }
755
756 /* locals */
757
758 static Scheme_Object *
symbol_p_prim(int argc,Scheme_Object * argv[])759 symbol_p_prim (int argc, Scheme_Object *argv[])
760 {
761 return SCHEME_SYMBOLP(argv[0]) ? scheme_true : scheme_false;
762 }
763
764 Scheme_Object *
scheme_checked_symbol_interned_p(int argc,Scheme_Object * argv[])765 scheme_checked_symbol_interned_p (int argc, Scheme_Object *argv[])
766 {
767 if (SCHEME_SYMBOLP(argv[0]))
768 return (SCHEME_SYM_WEIRDP(argv[0]) ? scheme_false : scheme_true);
769
770 scheme_wrong_contract("symbol-interned?", "symbol?", 0, argc, argv);
771 return NULL;
772 }
773
774 static Scheme_Object *
symbol_unreadable_p_prim(int argc,Scheme_Object * argv[])775 symbol_unreadable_p_prim (int argc, Scheme_Object *argv[])
776 {
777 if (SCHEME_SYMBOLP(argv[0]))
778 return (SCHEME_SYM_PARALLELP(argv[0]) ? scheme_true : scheme_false);
779
780 scheme_wrong_contract("symbol-unreadable?", "symbol?", 0, argc, argv);
781 return NULL;
782 }
783
784 static Scheme_Object *
string_to_symbol_prim(int argc,Scheme_Object * argv[])785 string_to_symbol_prim (int argc, Scheme_Object *argv[])
786 {
787 if (!SCHEME_CHAR_STRINGP(argv[0]))
788 scheme_wrong_contract("string->symbol", "string?", 0, argc, argv);
789 return scheme_intern_exact_char_symbol(SCHEME_CHAR_STR_VAL(argv[0]),
790 SCHEME_CHAR_STRTAG_VAL(argv[0]));
791 }
792
793 static Scheme_Object *
string_to_uninterned_symbol_prim(int argc,Scheme_Object * argv[])794 string_to_uninterned_symbol_prim (int argc, Scheme_Object *argv[])
795 {
796 if (!SCHEME_CHAR_STRINGP(argv[0]))
797 scheme_wrong_contract("string->uninterned-symbol", "string?", 0, argc, argv);
798 return scheme_make_exact_char_symbol(SCHEME_CHAR_STR_VAL(argv[0]),
799 SCHEME_CHAR_STRTAG_VAL(argv[0]));
800 }
801
802 static Scheme_Object *
string_to_unreadable_symbol_prim(int argc,Scheme_Object * argv[])803 string_to_unreadable_symbol_prim (int argc, Scheme_Object *argv[])
804 {
805 char buf[64], *bs;
806 intptr_t blen;
807
808 if (!SCHEME_CHAR_STRINGP(argv[0]))
809 scheme_wrong_contract("string->unreadable-symbol", "string?", 0, argc, argv);
810
811 bs = scheme_utf8_encode_to_buffer_len(SCHEME_CHAR_STR_VAL(argv[0]),
812 SCHEME_CHAR_STRTAG_VAL(argv[0]),
813 buf, 64, &blen);
814
815 return scheme_intern_exact_parallel_symbol(bs, blen);
816 }
817
scheme_symbol_to_string(Scheme_Object * sym)818 Scheme_Object *scheme_symbol_to_string(Scheme_Object *sym)
819 {
820 Scheme_Object *str;
821 GC_CAN_IGNORE unsigned char *s;
822 GC_CAN_IGNORE mzchar *s2;
823 intptr_t len, i;
824
825 s = (unsigned char *)SCHEME_SYM_VAL(sym);
826 len = SCHEME_SYM_LEN(sym);
827 for (i = 0; i < len; i++) {
828 if (s[i] >= 128)
829 break;
830 }
831 s = NULL;
832
833 if (i == len) {
834 /* ASCII */
835 str = scheme_alloc_char_string(len, 0);
836 s = (unsigned char *)SCHEME_SYM_VAL(sym);
837 s2 = SCHEME_CHAR_STR_VAL(str);
838 for (i = 0; i < len; i++) {
839 s2[i] = s[i];
840 }
841 return str;
842 } else {
843 return scheme_make_sized_offset_utf8_string((char *)sym,
844 SCHEME_SYMSTR_OFFSET(sym),
845 SCHEME_SYM_LEN(sym));
846 }
847 }
848
849 static Scheme_Object *
symbol_to_string_prim(int argc,Scheme_Object * argv[])850 symbol_to_string_prim (int argc, Scheme_Object *argv[])
851 {
852 Scheme_Object *sym;
853
854 sym = argv[0];
855
856 if (!SCHEME_SYMBOLP(sym))
857 scheme_wrong_contract("symbol->string", "symbol?", 0, argc, argv);
858
859 return scheme_symbol_to_string(sym);
860 }
861
862 static Scheme_Object *
symbol_to_immutable_string_prim(int argc,Scheme_Object * argv[])863 symbol_to_immutable_string_prim (int argc, Scheme_Object *argv[])
864 {
865 Scheme_Object *sym, *str;
866
867 sym = argv[0];
868
869 if (!SCHEME_SYMBOLP(sym))
870 scheme_wrong_contract("symbol->immutable-string", "symbol?", 0, argc, argv);
871
872 /* Could cache, but currently we don't */
873
874 str = scheme_symbol_to_string(sym);
875 SCHEME_SET_CHAR_STRING_IMMUTABLE(str);
876
877 return str;
878 }
879
880
881 static Scheme_Object *
keyword_p_prim(int argc,Scheme_Object * argv[])882 keyword_p_prim (int argc, Scheme_Object *argv[])
883 {
884 return SCHEME_KEYWORDP(argv[0]) ? scheme_true : scheme_false;
885 }
886
symkey_lt(const char * who,Scheme_Type ty,const char * contract,int argc,Scheme_Object * argv[])887 static Scheme_Object *symkey_lt (const char *who, Scheme_Type ty, const char *contract,
888 int argc, Scheme_Object *argv[])
889 {
890 Scheme_Object *prev = argv[0], *kw;
891 GC_CAN_IGNORE unsigned char *a, *b;
892 int i, al, bl, t;
893
894 if (!SAME_TYPE(SCHEME_TYPE(prev), ty))
895 scheme_wrong_contract(who, contract, 0, argc, argv);
896
897 for (i = 1; i < argc; i++) {
898 kw = argv[i];
899 if (!SAME_TYPE(SCHEME_TYPE(kw), ty))
900 scheme_wrong_contract(who, contract, i, argc, argv);
901
902 a = (unsigned char *)SCHEME_SYM_VAL(prev);
903 al = SCHEME_SYM_LEN(prev);
904 b = (unsigned char *)SCHEME_SYM_VAL(kw);
905 bl = SCHEME_SYM_LEN(kw);
906 t = ((al < bl) ? al : bl);
907 while (t--) {
908 if (*a < *b) {
909 al = 0;
910 bl = 1;
911 break;
912 } else if (*a > *b) {
913 al = bl = 0;
914 break;
915 } else {
916 a++;
917 b++;
918 }
919 }
920 a = b = NULL;
921
922 if (al >= bl) {
923 /* Check remaining types */
924 for (i++; i < argc; i++) {
925 if (!SAME_TYPE(SCHEME_TYPE(argv[i]), ty))
926 scheme_wrong_contract(who, contract, i, argc, argv);
927 }
928 return scheme_false;
929 }
930
931 prev = kw;
932 }
933
934 return scheme_true;
935 }
936
keyword_lt(int argc,Scheme_Object * argv[])937 static Scheme_Object *keyword_lt (int argc, Scheme_Object *argv[])
938 {
939 return symkey_lt("keyword<?", scheme_keyword_type, "keyword?", argc, argv);
940 }
941
symbol_lt(int argc,Scheme_Object * argv[])942 static Scheme_Object *symbol_lt (int argc, Scheme_Object *argv[])
943 {
944 return symkey_lt("symbol<?", scheme_symbol_type, "symbol?", argc, argv);
945 }
946
947 static Scheme_Object *
string_to_keyword_prim(int argc,Scheme_Object * argv[])948 string_to_keyword_prim (int argc, Scheme_Object *argv[])
949 {
950 if (!SCHEME_CHAR_STRINGP(argv[0]))
951 scheme_wrong_contract("string->keyword", "string?", 0, argc, argv);
952 return scheme_intern_exact_char_keyword(SCHEME_CHAR_STR_VAL(argv[0]),
953 SCHEME_CHAR_STRTAG_VAL(argv[0]));
954 }
955
956 static Scheme_Object *
keyword_to_string_prim(int argc,Scheme_Object * argv[])957 keyword_to_string_prim (int argc, Scheme_Object *argv[])
958 {
959 if (!SCHEME_KEYWORDP(argv[0]))
960 scheme_wrong_contract("keyword->string", "keyword?", 0, argc, argv);
961
962 return scheme_make_sized_offset_utf8_string((char *)(argv[0]),
963 SCHEME_SYMSTR_OFFSET(argv[0]),
964 SCHEME_SYM_LEN(argv[0]));
965 }
966
967 static Scheme_Object *
keyword_to_immutable_string_prim(int argc,Scheme_Object * argv[])968 keyword_to_immutable_string_prim (int argc, Scheme_Object *argv[])
969 {
970 Scheme_Object *str;
971
972 if (!SCHEME_KEYWORDP(argv[0]))
973 scheme_wrong_contract("keyword->immutable-string", "keyword?", 0, argc, argv);
974
975 /* Could cache, but currently we don't */
976
977 str = scheme_make_sized_offset_utf8_string((char *)(argv[0]),
978 SCHEME_SYMSTR_OFFSET(argv[0]),
979 SCHEME_SYM_LEN(argv[0]));
980
981 SCHEME_SET_CHAR_STRING_IMMUTABLE(str);
982
983 return str;
984 }
985
gensym(int argc,Scheme_Object * argv[])986 static Scheme_Object *gensym(int argc, Scheme_Object *argv[])
987 {
988 char buffer[100], *str;
989 Scheme_Object *r;
990
991 if (argc)
992 r = argv[0];
993 else
994 r = NULL;
995
996 if (r && !SCHEME_SYMBOLP(r) && !SCHEME_CHAR_STRINGP(r))
997 scheme_wrong_contract("gensym", "(or/c symbol? string?)", 0, argc, argv);
998
999 if (r) {
1000 char buf[64];
1001 if (SCHEME_CHAR_STRINGP(r)) {
1002 str = scheme_utf8_encode_to_buffer(SCHEME_CHAR_STR_VAL(r),
1003 SCHEME_CHAR_STRTAG_VAL(r),
1004 buf, 64);
1005 } else
1006 str = SCHEME_SYM_VAL(r);
1007 sprintf(buffer, "%.80s%d", str, gensym_counter++);
1008 str = NULL; /* because it might be GC-misaligned */
1009 } else
1010 sprintf(buffer, "g%d", gensym_counter++);
1011
1012 r = scheme_make_symbol(buffer);
1013
1014 return r;
1015 }
1016
scheme_gensym(Scheme_Object * base)1017 Scheme_Object *scheme_gensym(Scheme_Object *base)
1018 {
1019 Scheme_Object *a[1];
1020 a[0] = base;
1021 return gensym(1, a);
1022 }
1023
scheme_symbol_append(Scheme_Object * s1,Scheme_Object * s2)1024 Scheme_Object *scheme_symbol_append(Scheme_Object *s1, Scheme_Object *s2)
1025 {
1026 char *s;
1027 s = MALLOC_N_ATOMIC(char, SCHEME_SYM_LEN(s1) + SCHEME_SYM_LEN(s2) + 1);
1028 memcpy(s, SCHEME_SYM_VAL(s1), SCHEME_SYM_LEN(s1));
1029 memcpy(s + SCHEME_SYM_LEN(s1), SCHEME_SYM_VAL(s2), SCHEME_SYM_LEN(s2) + 1);
1030 if (SCHEME_SYM_UNINTERNEDP(s1) || SCHEME_SYM_UNINTERNEDP(s2))
1031 return scheme_make_exact_symbol(s, SCHEME_SYM_LEN(s1) + SCHEME_SYM_LEN(s2));
1032 else if (SCHEME_SYM_PARALLELP(s1) || SCHEME_SYM_PARALLELP(s2))
1033 return scheme_intern_exact_parallel_symbol(s, SCHEME_SYM_LEN(s1) + SCHEME_SYM_LEN(s2));
1034 else
1035 return scheme_intern_exact_symbol(s, SCHEME_SYM_LEN(s1) + SCHEME_SYM_LEN(s2));
1036 }
1037