1 /*
2 * symbol.c - symbol implementation
3 *
4 * Copyright (c) 2000-2020 Shiro Kawai <shiro@acm.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the authors nor the names of its contributors
18 * may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #define LIBGAUCHE_BODY
35 #include "gauche.h"
36 #include "gauche/priv/builtin-syms.h"
37 #include "gauche/priv/moduleP.h"
38
39 /*-----------------------------------------------------------
40 * Symbols
41 */
42
43 static void symbol_print(ScmObj obj, ScmPort *port, ScmWriteContext *);
44 static int symbol_compare(ScmObj x, ScmObj y, int equalp);
45
46 SCM_DEFINE_BUILTIN_CLASS(Scm_SymbolClass, symbol_print, symbol_compare,
47 NULL, NULL, NULL);
48
49 static ScmClass *keyword_cpl[] = {
50 SCM_CLASS_STATIC_PTR(Scm_SymbolClass),
51 SCM_CLASS_STATIC_PTR(Scm_TopClass),
52 NULL
53 };
54
55 SCM_DEFINE_BUILTIN_CLASS(Scm_KeywordClass, symbol_print, symbol_compare,
56 NULL, NULL, keyword_cpl);
57
58 /* name -> symbol mapper */
59 static ScmInternalMutex obtable_mutex = SCM_INTERNAL_MUTEX_INITIALIZER;
60 static ScmHashTable *obtable = NULL;
61
62 #if GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION
63 /* Global keyword table. */
64 static struct {
65 ScmHashTable *table;
66 ScmInternalMutex mutex;
67 } keywords;
68
69 static int keyword_disjoint_p = FALSE;
70 #endif /*!GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
71
72 /* internal constructor. NAME must be an immutable string. */
make_sym(ScmClass * klass,ScmString * name,int interned)73 static ScmSymbol *make_sym(ScmClass *klass, ScmString *name, int interned)
74 {
75 if (interned) {
76 /* fast path */
77 SCM_INTERNAL_MUTEX_LOCK(obtable_mutex);
78 ScmObj e = Scm_HashTableRef(obtable, SCM_OBJ(name), SCM_FALSE);
79 SCM_INTERNAL_MUTEX_UNLOCK(obtable_mutex);
80 if (!SCM_FALSEP(e)) return SCM_SYMBOL(e);
81 }
82
83 ScmSymbol *sym = SCM_NEW(ScmSymbol);
84 SCM_SET_CLASS(sym, klass);
85 sym->name = name;
86 sym->flags = interned? SCM_SYMBOL_FLAG_INTERNED : 0;
87
88 if (!interned) {
89 return sym;
90 } else {
91 /* Using SCM_DICT_NO_OVERWRITE ensures that if another thread interns
92 the same name symbol between above HashTableRef and here, we'll
93 get the already interned symbol. */
94 SCM_INTERNAL_MUTEX_LOCK(obtable_mutex);
95 ScmObj r = Scm_HashTableSet(obtable, SCM_OBJ(name), SCM_OBJ(sym),
96 SCM_DICT_NO_OVERWRITE);
97 SCM_INTERNAL_MUTEX_UNLOCK(obtable_mutex);
98 return SCM_UNBOUNDP(r)? sym : SCM_SYMBOL(r);
99 }
100 }
101
102 /* Intern */
Scm_MakeSymbol(ScmString * name,int interned)103 ScmObj Scm_MakeSymbol(ScmString *name, int interned)
104 {
105 ScmObj sname = Scm_CopyStringWithFlags(name, SCM_STRING_IMMUTABLE,
106 SCM_STRING_IMMUTABLE);
107 return SCM_OBJ(make_sym(SCM_CLASS_SYMBOL, SCM_STRING(sname), interned));
108 }
109
110 /* Keyword prefix. */
111 static SCM_DEFINE_STRING_CONST(keyword_prefix, ":", 1, 1);
112
113 /* In unified keyword, we include preceding ':' to the name. */
Scm_MakeKeyword(ScmString * name)114 ScmObj Scm_MakeKeyword(ScmString *name)
115 {
116 #if GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION
117 if (keyword_disjoint_p) {
118 (void)SCM_INTERNAL_MUTEX_LOCK(keywords.mutex);
119 ScmObj r = Scm_HashTableRef(keywords.table, SCM_OBJ(name), SCM_FALSE);
120 (void)SCM_INTERNAL_MUTEX_UNLOCK(keywords.mutex);
121
122 if (SCM_KEYWORDP(r)) return r;
123
124 ScmKeyword *k = SCM_NEW(ScmKeyword);
125 SCM_SET_CLASS(k, SCM_CLASS_KEYWORD);
126 k->name = SCM_STRING(Scm_CopyString(name));
127 (void)SCM_INTERNAL_MUTEX_LOCK(keywords.mutex);
128 r = Scm_HashTableSet(keywords.table, SCM_OBJ(name), SCM_OBJ(k),
129 SCM_DICT_NO_OVERWRITE);
130 (void)SCM_INTERNAL_MUTEX_UNLOCK(keywords.mutex);
131 return SCM_UNBOUNDP(r)? SCM_OBJ(k) : r ;
132 }
133 #endif /*GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
134 ScmObj sname = Scm_StringAppend2(&keyword_prefix, name);
135 ScmSymbol *s = make_sym(SCM_CLASS_KEYWORD, SCM_STRING(sname), TRUE);
136 Scm_DefineConst(Scm__GaucheKeywordModule(), s, SCM_OBJ(s));
137 return SCM_OBJ(s);
138 }
139
Scm_KeywordToString(ScmKeyword * k)140 ScmObj Scm_KeywordToString(ScmKeyword *k)
141 {
142 #if GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION
143 if (keyword_disjoint_p) {
144 return SCM_OBJ(k->name);
145 } else {
146 return Scm_Substring(k->name, 1, -1, FALSE);
147 }
148 #else /*!GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
149 return Scm_Substring(k->name, 1, -1, FALSE);
150 #endif /*!GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
151 }
152
153 /* Default prefix string. */
154 static SCM_DEFINE_STRING_CONST(default_prefix, "G", 1, 1);
155
156 /* Returns uninterned symbol. PREFIX can be NULL */
Scm_Gensym(ScmString * prefix)157 ScmObj Scm_Gensym(ScmString *prefix)
158 {
159 char numbuf[50];
160 /* We don't need mutex for this variable, since the race on it is
161 tolerated---multiple threads may get the same name symbols,
162 but they are uninterned and never be eq? to each other. */
163 static intptr_t gensym_count = 0;
164
165 if (prefix == NULL) prefix = &default_prefix;
166 int nc = snprintf(numbuf, 49, "%"PRIdPTR, gensym_count++);
167 numbuf[49] = '\0';
168 ScmObj name = Scm_StringAppendC(prefix, numbuf, nc, nc);
169 ScmSymbol *sym = make_sym(SCM_CLASS_SYMBOL, SCM_STRING(name), FALSE);
170 return SCM_OBJ(sym);
171 }
172
173 /* If symbol S has a prefix P, returns a symbol without the prefix.
174 Otherwise, returns #f. */
Scm_SymbolSansPrefix(ScmSymbol * s,ScmSymbol * p)175 ScmObj Scm_SymbolSansPrefix(ScmSymbol *s, ScmSymbol *p)
176 {
177 const ScmStringBody *bp = SCM_STRING_BODY(SCM_SYMBOL_NAME(p));
178 const ScmStringBody *bs = SCM_STRING_BODY(SCM_SYMBOL_NAME(s));
179 int zp = SCM_STRING_BODY_SIZE(bp);
180 int zs = SCM_STRING_BODY_SIZE(bs);
181 const char *cp = SCM_STRING_BODY_START(bp);
182 const char *cs = SCM_STRING_BODY_START(bs);
183
184 if (zp > zs || memcmp(cp, cs, zp) != 0) return SCM_FALSE;
185 return Scm_Intern(SCM_STRING(Scm_MakeString(cs + zp, zs - zp, -1,
186 SCM_STRING_IMMUTABLE)));
187 }
188
189 /*
190 * Print
191 */
192
escape_required_p(const ScmStringBody * b,u_int flags,int casefold)193 static int escape_required_p(const ScmStringBody *b, u_int flags, int casefold)
194 {
195 int siz = SCM_STRING_BODY_SIZE(b);
196 if (siz == 0) return !(flags & SCM_SYMBOL_WRITER_NOESCAPE_EMPTY);
197
198 const char *p = SCM_STRING_BODY_START(b);
199 const char *e = p + siz;
200 ScmChar ch;
201
202 if (flags & SCM_SYMBOL_WRITER_NOESCAPE_INITIAL) goto subsequent;
203
204 if (keyword_disjoint_p && *p == ':') return TRUE;
205
206 if (*p == '+' || *p == '-') {
207 p++;
208 if (p == e) return FALSE;
209 if (*p == '.') goto dot_subsequent;
210 /* detect special numeric constants */
211 if (siz == 2 && (*p == 'i' || *p == 'I')) return TRUE;
212 if (siz >= 6 && p[3] == '.' && p[4] == '0') {
213 if ((p[0] == 'n' || p[0] == 'N')
214 && (p[1] == 'a' || p[1] == 'A')
215 && (p[2] == 'n' || p[2] == 'N')) return TRUE;
216 if ((p[0] == 'i' || p[0] == 'I')
217 && (p[1] == 'n' || p[1] == 'N')
218 && (p[2] == 'f' || p[2] == 'F')) return TRUE;
219 }
220 SCM_CHAR_GET(p, ch);
221 if (!Scm_CharLexerCategoryP(ch, SCM_CHAR_SIGN_SUBSEQUENT)) return TRUE;
222 p += SCM_CHAR_NBYTES(ch);
223 goto subsequent;
224 }
225 if (*p == '.') {
226 dot_subsequent:
227 p++;
228 if (p == e) return TRUE;
229 SCM_CHAR_GET(p, ch);
230 if (ch != '.' && !Scm_CharLexerCategoryP(ch, SCM_CHAR_SIGN_SUBSEQUENT))
231 return TRUE;
232 p += SCM_CHAR_NBYTES(ch);
233 goto subsequent;
234 }
235 SCM_CHAR_GET(p, ch);
236 if (!Scm_CharLexerCategoryP(ch, SCM_CHAR_INITIAL)) return TRUE;
237 if (casefold && ch >= 'A' && ch <= 'Z') return TRUE;
238 p += SCM_CHAR_NBYTES(ch);
239
240 subsequent:
241 while (p < e) {
242 SCM_CHAR_GET(p, ch);
243 if (!Scm_CharLexerCategoryP(ch, SCM_CHAR_SUBSEQUENT)) return TRUE;
244 if (casefold && ch >= 'A' && ch <= 'Z') return TRUE;
245 p += SCM_CHAR_NBYTES(ch);
246 }
247 return FALSE;
248 }
249
250 /* internal function to write symbol name, with proper escaping */
Scm_WriteSymbolName(ScmString * snam,ScmPort * port,ScmWriteContext * ctx,u_int flags)251 void Scm_WriteSymbolName(ScmString *snam, ScmPort *port, ScmWriteContext *ctx,
252 u_int flags)
253 {
254 /* See if we have special characters, and use |-escape if necessary. */
255 /* TODO: For now, we regard chars over 0x80 is all "printable".
256 Need a more consistent mechanism. */
257 const ScmStringBody *b = SCM_STRING_BODY(snam);
258 const char *p = SCM_STRING_BODY_START(b);
259 int siz = SCM_STRING_BODY_SIZE(b);
260 int casefold = (Scm_WriteContextCase(ctx) == SCM_WRITE_CASE_FOLD);
261
262 if (siz == 0) { /* special case */
263 if (!(flags & SCM_SYMBOL_WRITER_NOESCAPE_EMPTY)) {
264 SCM_PUTZ("||", -1, port);
265 }
266 return;
267 }
268 if (escape_required_p(b, flags, casefold)) {
269 SCM_PUTC('|', port);
270 for (const char *q=p; q<p+siz; ) {
271 unsigned int ch;
272 SCM_CHAR_GET(q, ch);
273 q += SCM_CHAR_NBYTES(ch);
274 if (ch < 128) {
275 if (ch == '|' || ch == '\\') {
276 SCM_PUTC('\\', port);
277 SCM_PUTC(ch, port);
278 } else if (ch < 0x20 || ch == 0x7f) {
279 Scm_Printf(port, "\\x%02x;", ch);
280 } else {
281 SCM_PUTC(ch, port);
282 }
283 } else {
284 SCM_PUTC(ch, port);
285 }
286 }
287 SCM_PUTC('|', port);
288 } else {
289 SCM_PUTS(snam, port);
290 }
291 }
292
293 /* Symbol printer.
294 NB: Uninterned symbols are treated as sharable objects (can be written
295 with #n= syntax). It is handled by upper layer (write.c) so we don't
296 worry about it in this routine.
297 */
symbol_print(ScmObj obj,ScmPort * port,ScmWriteContext * ctx)298 static void symbol_print(ScmObj obj, ScmPort *port, ScmWriteContext *ctx)
299 {
300 if (Scm_WriteContextMode(ctx) == SCM_WRITE_DISPLAY) {
301 SCM_PUTS(SCM_SYMBOL_NAME(obj), port);
302 } else {
303 #if GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION
304 if (SCM_KEYWORDP(obj) && keyword_disjoint_p) {
305 Scm_Putc(':', port);
306 /* We basically print keyword names in the same way as symbols
307 (i.e. using |-escape if necessary). However, as a convention,
308 two things are different from the default symbol writer.
309 (1) We don't check the noninitials; :1 is unambiguously a
310 keyword, so we don't need to print :|1|.
311 (2) A keyword with an empty name can be printed just as :,
312 instead of :||.
313 These conventions are useful if we pass the S-expression with
314 these keywords to other Scheme implementations that don't support
315 CL-style keywords; they would just read those ones as symbols.
316 */
317 Scm_WriteSymbolName(SCM_KEYWORD(obj)->name, port, ctx,
318 (SCM_SYMBOL_WRITER_NOESCAPE_INITIAL
319 |SCM_SYMBOL_WRITER_NOESCAPE_EMPTY));
320 return;
321 }
322 #endif /*GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
323 if (!SCM_SYMBOL_INTERNED(obj)) SCM_PUTZ("#:", -1, port);
324 Scm_WriteSymbolName(SCM_SYMBOL_NAME(obj), port, ctx, 0);
325 }
326 }
327
328 /* Symbol comparison procedure.
329 Will be used via 'compare' procedure. Following srfi-114, we compare
330 by name, but takes extra care of intern/unintern distinction; if the
331 names are the same, interned symbol is less, and if both are
332 uninterned, we compare addresses.
333 */
symbol_compare(ScmObj x,ScmObj y,int equalp)334 static int symbol_compare(ScmObj x, ScmObj y, int equalp)
335 {
336 if (equalp) {
337 /* Symbol equality test is handled in Scm_Eq* and will never come
338 here, but just in case. */
339 return SCM_EQ(x, y)? 0:1;
340 } else if (SCM_EQ(x, y)) {
341 return 0;
342 } else {
343 int r = Scm_StringCmp(SCM_SYMBOL_NAME(x), SCM_SYMBOL_NAME(y));
344 if (r != 0) return r;
345 if (SCM_SYMBOL_INTERNED(x)) return -1; /* y must be uninterned */
346 if (SCM_SYMBOL_INTERNED(y)) return 1; /* x must be uninterned */
347 return (x < y)? -1 : 1; /* both are uninterned */
348 }
349 }
350
351 /*
352 * Keyword Utilities
353 * The names are historical; KEY doesn't need to be a keyword at all;
354 * anything that can be compared by eq? do.
355 */
356
Scm_GetKeyword(ScmObj key,ScmObj list,ScmObj fallback)357 ScmObj Scm_GetKeyword(ScmObj key, ScmObj list, ScmObj fallback)
358 {
359 ScmObj cp;
360 SCM_FOR_EACH(cp, list) {
361 if (!SCM_PAIRP(SCM_CDR(cp))) {
362 Scm_Error("incomplete key list: %S", list);
363 }
364 if (key == SCM_CAR(cp)) return SCM_CADR(cp);
365 cp = SCM_CDR(cp);
366 }
367 if (SCM_UNBOUNDP(fallback)) {
368 Scm_Error("value for key %S is not provided: %S", key, list);
369 }
370 return fallback;
371 }
372
Scm_DeleteKeyword(ScmObj key,ScmObj list)373 ScmObj Scm_DeleteKeyword(ScmObj key, ScmObj list)
374 {
375 ScmObj cp;
376 SCM_FOR_EACH(cp, list) {
377 if (!SCM_PAIRP(SCM_CDR(cp))) {
378 Scm_Error("incomplete key list: %S", list);
379 }
380 if (key == SCM_CAR(cp)) {
381 /* found */
382 ScmObj h = SCM_NIL, t = SCM_NIL;
383 ScmObj tail = Scm_DeleteKeyword(key, SCM_CDR(SCM_CDR(cp)));
384 ScmObj cp2;
385 SCM_FOR_EACH(cp2, list) {
386 if (cp2 == cp) {
387 SCM_APPEND(h, t, tail);
388 return h;
389 } else {
390 SCM_APPEND1(h, t, SCM_CAR(cp2));
391 }
392 }
393 }
394 cp = SCM_CDR(cp);
395 }
396 return list;
397 }
398
Scm_DeleteKeywordX(ScmObj key,ScmObj list)399 ScmObj Scm_DeleteKeywordX(ScmObj key, ScmObj list)
400 {
401 ScmObj cp, prev = SCM_FALSE;
402 SCM_FOR_EACH(cp, list) {
403 if (!SCM_PAIRP(SCM_CDR(cp))) {
404 Scm_Error("incomplete key list: %S", list);
405 }
406 if (key == SCM_CAR(cp)) {
407 /* found */
408 if (SCM_FALSEP(prev)) {
409 /* we're at the head of list */
410 return Scm_DeleteKeywordX(key, SCM_CDR(SCM_CDR(cp)));
411 } else {
412 ScmObj tail = Scm_DeleteKeywordX(key, SCM_CDR(SCM_CDR(cp)));
413 Scm_SetCdr(prev, tail);
414 return list;
415 }
416 }
417 cp = SCM_CDR(cp);
418 prev = cp;
419 }
420 return list;
421 }
422
423 /* Scan kv-list to look for keywords in the array *KEYS. Saves the first
424 * found value in the corresponding *VALS. Returns a kv-list with all
425 * the keys deleted. For unfound keys, the corresponding VAL is set with
426 * FALLBACK.
427 * May throw an error if kv-list isn't even.
428 */
Scm_ExtractKeywords(ScmObj kv_list,const ScmObj * keys,int numKeys,ScmObj * vals,ScmObj fallback)429 ScmObj Scm_ExtractKeywords(ScmObj kv_list,
430 const ScmObj *keys,
431 int numKeys,
432 ScmObj *vals,
433 ScmObj fallback)
434 {
435 ScmObj cp, h = SCM_NIL, t = SCM_NIL;
436 int i;
437 for (i=0; i<numKeys; i++) vals[i] = SCM_UNBOUND;
438 SCM_FOR_EACH(cp, kv_list) {
439 if (!SCM_PAIRP(SCM_CDR(cp))) {
440 Scm_Error("keyword list not even: %S", kv_list);
441 }
442 for (i=0; i<numKeys; i++) {
443 if (SCM_EQ(keys[i], SCM_CAR(cp))) {
444 if (SCM_UNBOUNDP(vals[i])) {
445 vals[i] = SCM_CADR(cp);
446 }
447 break;
448 }
449 }
450 if (i == numKeys) {
451 SCM_APPEND1(h, t, SCM_CAR(cp));
452 SCM_APPEND1(h, t, SCM_CADR(cp));
453 }
454 cp = SCM_CDR(cp);
455 }
456 if (!SCM_UNBOUNDP(fallback)) {
457 for (i=0; i<numKeys; i++) {
458 if (vals[i] == SCM_UNBOUND) {
459 vals[i] = fallback;
460 }
461 }
462 }
463 return h;
464 }
465
466 /*
467 * Initialization
468 */
469
470 #include "builtin-syms.c"
471
Scm__InitSymbol(void)472 void Scm__InitSymbol(void)
473 {
474 SCM_INTERNAL_MUTEX_INIT(obtable_mutex);
475 obtable = SCM_HASH_TABLE(Scm_MakeHashTableSimple(SCM_HASH_STRING, 4096));
476 init_builtin_syms();
477 #if GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION
478 (void)SCM_INTERNAL_MUTEX_INIT(keywords.mutex);
479 keywords.table = SCM_HASH_TABLE(Scm_MakeHashTableSimple(SCM_HASH_STRING, 256));
480 /* Preset keyword class precedence list, depending on the value of
481 GAUCHE_KEYWORD_DISJOINT or GAUCHE_KEYWORD_IS_SYMBOL */
482 const char *disjoint = Scm_GetEnv("GAUCHE_KEYWORD_DISJOINT");
483 const char *issymbol = Scm_GetEnv("GAUCHE_KEYWORD_IS_SYMBOL");
484 if (disjoint != NULL) {
485 keyword_disjoint_p = TRUE;
486 } else if (issymbol != NULL) {
487 keyword_disjoint_p = FALSE;
488 } else {
489 keyword_disjoint_p = FALSE; /* This determines the default */
490 }
491 if (keyword_disjoint_p) {
492 Scm_KeywordClass.cpa = &(keyword_cpl[1]);
493 /* The class is initialized later in class.c */
494 }
495 #endif /*!GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
496 }
497