1 /*
2  * symbol.c - symbol implementation
3  *
4  *   Copyright (c) 2000-2020  Shiro Kawai  <shiro@acm.org>
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *   1. Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *
13  *   2. Redistributions in binary form must reproduce the above copyright
14  *      notice, this list of conditions and the following disclaimer in the
15  *      documentation and/or other materials provided with the distribution.
16  *
17  *   3. Neither the name of the authors nor the names of its contributors
18  *      may be used to endorse or promote products derived from this
19  *      software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27  *   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  *   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  *   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  *   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  *   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #define LIBGAUCHE_BODY
35 #include "gauche.h"
36 #include "gauche/priv/builtin-syms.h"
37 #include "gauche/priv/moduleP.h"
38 
39 /*-----------------------------------------------------------
40  * Symbols
41  */
42 
43 static void symbol_print(ScmObj obj, ScmPort *port, ScmWriteContext *);
44 static int symbol_compare(ScmObj x, ScmObj y, int equalp);
45 
46 SCM_DEFINE_BUILTIN_CLASS(Scm_SymbolClass, symbol_print, symbol_compare,
47                          NULL, NULL, NULL);
48 
49 static ScmClass *keyword_cpl[] = {
50     SCM_CLASS_STATIC_PTR(Scm_SymbolClass),
51     SCM_CLASS_STATIC_PTR(Scm_TopClass),
52     NULL
53 };
54 
55 SCM_DEFINE_BUILTIN_CLASS(Scm_KeywordClass, symbol_print, symbol_compare,
56                          NULL, NULL, keyword_cpl);
57 
58 /* name -> symbol mapper */
59 static ScmInternalMutex obtable_mutex = SCM_INTERNAL_MUTEX_INITIALIZER;
60 static ScmHashTable *obtable = NULL;
61 
62 #if GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION
63 /* Global keyword table. */
64 static struct {
65     ScmHashTable *table;
66     ScmInternalMutex mutex;
67 } keywords;
68 
69 static int keyword_disjoint_p = FALSE;
70 #endif /*!GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
71 
72 /* internal constructor.  NAME must be an immutable string. */
make_sym(ScmClass * klass,ScmString * name,int interned)73 static ScmSymbol *make_sym(ScmClass *klass, ScmString *name, int interned)
74 {
75     if (interned) {
76         /* fast path */
77         SCM_INTERNAL_MUTEX_LOCK(obtable_mutex);
78         ScmObj e = Scm_HashTableRef(obtable, SCM_OBJ(name), SCM_FALSE);
79         SCM_INTERNAL_MUTEX_UNLOCK(obtable_mutex);
80         if (!SCM_FALSEP(e)) return SCM_SYMBOL(e);
81     }
82 
83     ScmSymbol *sym = SCM_NEW(ScmSymbol);
84     SCM_SET_CLASS(sym, klass);
85     sym->name = name;
86     sym->flags = interned? SCM_SYMBOL_FLAG_INTERNED : 0;
87 
88     if (!interned) {
89         return sym;
90     } else {
91         /* Using SCM_DICT_NO_OVERWRITE ensures that if another thread interns
92            the same name symbol between above HashTableRef and here, we'll
93            get the already interned symbol. */
94         SCM_INTERNAL_MUTEX_LOCK(obtable_mutex);
95         ScmObj r = Scm_HashTableSet(obtable, SCM_OBJ(name), SCM_OBJ(sym),
96                                     SCM_DICT_NO_OVERWRITE);
97         SCM_INTERNAL_MUTEX_UNLOCK(obtable_mutex);
98         return SCM_UNBOUNDP(r)? sym : SCM_SYMBOL(r);
99     }
100 }
101 
102 /* Intern */
Scm_MakeSymbol(ScmString * name,int interned)103 ScmObj Scm_MakeSymbol(ScmString *name, int interned)
104 {
105     ScmObj sname = Scm_CopyStringWithFlags(name, SCM_STRING_IMMUTABLE,
106                                            SCM_STRING_IMMUTABLE);
107     return SCM_OBJ(make_sym(SCM_CLASS_SYMBOL, SCM_STRING(sname), interned));
108 }
109 
110 /* Keyword prefix. */
111 static SCM_DEFINE_STRING_CONST(keyword_prefix, ":", 1, 1);
112 
113 /* In unified keyword, we include preceding ':' to the name. */
Scm_MakeKeyword(ScmString * name)114 ScmObj Scm_MakeKeyword(ScmString *name)
115 {
116 #if GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION
117     if (keyword_disjoint_p) {
118         (void)SCM_INTERNAL_MUTEX_LOCK(keywords.mutex);
119         ScmObj r = Scm_HashTableRef(keywords.table, SCM_OBJ(name), SCM_FALSE);
120         (void)SCM_INTERNAL_MUTEX_UNLOCK(keywords.mutex);
121 
122         if (SCM_KEYWORDP(r)) return r;
123 
124         ScmKeyword *k = SCM_NEW(ScmKeyword);
125         SCM_SET_CLASS(k, SCM_CLASS_KEYWORD);
126         k->name = SCM_STRING(Scm_CopyString(name));
127         (void)SCM_INTERNAL_MUTEX_LOCK(keywords.mutex);
128         r = Scm_HashTableSet(keywords.table, SCM_OBJ(name), SCM_OBJ(k),
129                              SCM_DICT_NO_OVERWRITE);
130         (void)SCM_INTERNAL_MUTEX_UNLOCK(keywords.mutex);
131         return SCM_UNBOUNDP(r)? SCM_OBJ(k) : r ;
132     }
133 #endif /*GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
134     ScmObj sname = Scm_StringAppend2(&keyword_prefix, name);
135     ScmSymbol *s = make_sym(SCM_CLASS_KEYWORD, SCM_STRING(sname), TRUE);
136     Scm_DefineConst(Scm__GaucheKeywordModule(), s, SCM_OBJ(s));
137     return SCM_OBJ(s);
138 }
139 
Scm_KeywordToString(ScmKeyword * k)140 ScmObj Scm_KeywordToString(ScmKeyword *k)
141 {
142 #if GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION
143     if (keyword_disjoint_p) {
144         return SCM_OBJ(k->name);
145     } else {
146         return Scm_Substring(k->name, 1, -1, FALSE);
147     }
148 #else  /*!GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
149     return Scm_Substring(k->name, 1, -1, FALSE);
150 #endif /*!GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
151 }
152 
153 /* Default prefix string. */
154 static SCM_DEFINE_STRING_CONST(default_prefix, "G", 1, 1);
155 
156 /* Returns uninterned symbol.   PREFIX can be NULL */
Scm_Gensym(ScmString * prefix)157 ScmObj Scm_Gensym(ScmString *prefix)
158 {
159     char numbuf[50];
160     /* We don't need mutex for this variable, since the race on it is
161        tolerated---multiple threads may get the same name symbols,
162        but they are uninterned and never be eq? to each other. */
163     static intptr_t gensym_count = 0;
164 
165     if (prefix == NULL) prefix = &default_prefix;
166     int nc = snprintf(numbuf, 49, "%"PRIdPTR, gensym_count++);
167     numbuf[49] = '\0';
168     ScmObj name = Scm_StringAppendC(prefix, numbuf, nc, nc);
169     ScmSymbol *sym = make_sym(SCM_CLASS_SYMBOL, SCM_STRING(name), FALSE);
170     return SCM_OBJ(sym);
171 }
172 
173 /* If symbol S has a prefix P, returns a symbol without the prefix.
174    Otherwise, returns #f. */
Scm_SymbolSansPrefix(ScmSymbol * s,ScmSymbol * p)175 ScmObj Scm_SymbolSansPrefix(ScmSymbol *s, ScmSymbol *p)
176 {
177     const ScmStringBody *bp = SCM_STRING_BODY(SCM_SYMBOL_NAME(p));
178     const ScmStringBody *bs = SCM_STRING_BODY(SCM_SYMBOL_NAME(s));
179     int zp = SCM_STRING_BODY_SIZE(bp);
180     int zs = SCM_STRING_BODY_SIZE(bs);
181     const char *cp = SCM_STRING_BODY_START(bp);
182     const char *cs = SCM_STRING_BODY_START(bs);
183 
184     if (zp > zs || memcmp(cp, cs, zp) != 0) return SCM_FALSE;
185     return Scm_Intern(SCM_STRING(Scm_MakeString(cs + zp, zs - zp, -1,
186                                                 SCM_STRING_IMMUTABLE)));
187 }
188 
189 /*
190  * Print
191  */
192 
escape_required_p(const ScmStringBody * b,u_int flags,int casefold)193 static int escape_required_p(const ScmStringBody *b, u_int flags, int casefold)
194 {
195     int siz = SCM_STRING_BODY_SIZE(b);
196     if (siz == 0) return !(flags & SCM_SYMBOL_WRITER_NOESCAPE_EMPTY);
197 
198     const char *p = SCM_STRING_BODY_START(b);
199     const char *e = p + siz;
200     ScmChar ch;
201 
202     if (flags & SCM_SYMBOL_WRITER_NOESCAPE_INITIAL) goto subsequent;
203 
204     if (keyword_disjoint_p && *p == ':') return TRUE;
205 
206     if (*p == '+' || *p == '-') {
207         p++;
208         if (p == e) return FALSE;
209         if (*p == '.') goto dot_subsequent;
210         /* detect special numeric constants */
211         if (siz == 2 && (*p == 'i' || *p == 'I')) return TRUE;
212         if (siz >= 6 && p[3] == '.' && p[4] == '0') {
213             if ((p[0] == 'n' || p[0] == 'N')
214                 && (p[1] == 'a' || p[1] == 'A')
215                 && (p[2] == 'n' || p[2] == 'N')) return TRUE;
216             if ((p[0] == 'i' || p[0] == 'I')
217                 && (p[1] == 'n' || p[1] == 'N')
218                 && (p[2] == 'f' || p[2] == 'F')) return TRUE;
219         }
220         SCM_CHAR_GET(p, ch);
221         if (!Scm_CharLexerCategoryP(ch, SCM_CHAR_SIGN_SUBSEQUENT)) return TRUE;
222         p += SCM_CHAR_NBYTES(ch);
223         goto subsequent;
224     }
225     if (*p == '.') {
226     dot_subsequent:
227         p++;
228         if (p == e) return TRUE;
229         SCM_CHAR_GET(p, ch);
230         if (ch != '.' && !Scm_CharLexerCategoryP(ch, SCM_CHAR_SIGN_SUBSEQUENT))
231             return TRUE;
232         p += SCM_CHAR_NBYTES(ch);
233         goto subsequent;
234     }
235     SCM_CHAR_GET(p, ch);
236     if (!Scm_CharLexerCategoryP(ch, SCM_CHAR_INITIAL)) return TRUE;
237     if (casefold && ch >= 'A' && ch <= 'Z') return TRUE;
238     p += SCM_CHAR_NBYTES(ch);
239 
240  subsequent:
241     while (p < e) {
242         SCM_CHAR_GET(p, ch);
243         if (!Scm_CharLexerCategoryP(ch, SCM_CHAR_SUBSEQUENT)) return TRUE;
244         if (casefold && ch >= 'A' && ch <= 'Z') return TRUE;
245         p += SCM_CHAR_NBYTES(ch);
246     }
247     return FALSE;
248 }
249 
250 /* internal function to write symbol name, with proper escaping */
Scm_WriteSymbolName(ScmString * snam,ScmPort * port,ScmWriteContext * ctx,u_int flags)251 void Scm_WriteSymbolName(ScmString *snam, ScmPort *port, ScmWriteContext *ctx,
252                          u_int flags)
253 {
254     /* See if we have special characters, and use |-escape if necessary. */
255     /* TODO: For now, we regard chars over 0x80 is all "printable".
256        Need a more consistent mechanism. */
257     const ScmStringBody *b = SCM_STRING_BODY(snam);
258     const char *p = SCM_STRING_BODY_START(b);
259     int siz = SCM_STRING_BODY_SIZE(b);
260     int casefold = (Scm_WriteContextCase(ctx) == SCM_WRITE_CASE_FOLD);
261 
262     if (siz == 0) {         /* special case */
263         if (!(flags & SCM_SYMBOL_WRITER_NOESCAPE_EMPTY)) {
264             SCM_PUTZ("||", -1, port);
265         }
266         return;
267     }
268     if (escape_required_p(b, flags, casefold)) {
269         SCM_PUTC('|', port);
270         for (const char *q=p; q<p+siz; ) {
271             unsigned int ch;
272             SCM_CHAR_GET(q, ch);
273             q += SCM_CHAR_NBYTES(ch);
274             if (ch < 128) {
275                 if (ch == '|' || ch == '\\') {
276                     SCM_PUTC('\\', port);
277                     SCM_PUTC(ch, port);
278                 } else if (ch < 0x20 || ch == 0x7f) {
279                     Scm_Printf(port, "\\x%02x;", ch);
280                 } else {
281                     SCM_PUTC(ch, port);
282                 }
283             } else {
284                 SCM_PUTC(ch, port);
285             }
286         }
287         SCM_PUTC('|', port);
288     } else {
289         SCM_PUTS(snam, port);
290     }
291 }
292 
293 /* Symbol printer.
294    NB: Uninterned symbols are treated as sharable objects (can be written
295    with #n= syntax).  It is handled by upper layer (write.c) so we don't
296    worry about it in this routine.
297  */
symbol_print(ScmObj obj,ScmPort * port,ScmWriteContext * ctx)298 static void symbol_print(ScmObj obj, ScmPort *port, ScmWriteContext *ctx)
299 {
300     if (Scm_WriteContextMode(ctx) == SCM_WRITE_DISPLAY) {
301         SCM_PUTS(SCM_SYMBOL_NAME(obj), port);
302     } else {
303 #if GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION
304         if (SCM_KEYWORDP(obj) && keyword_disjoint_p) {
305             Scm_Putc(':', port);
306             /* We basically print keyword names in the same way as symbols
307                (i.e. using |-escape if necessary).  However, as a convention,
308                two things are different from the default symbol writer.
309                (1) We don't check the noninitials; :1 is unambiguously a
310                keyword, so we don't need to print :|1|.
311                (2) A keyword with an empty name can be printed just as :,
312                instead of :||.
313                These conventions are useful if we pass the S-expression with
314                these keywords to other Scheme implementations that don't support
315                CL-style keywords; they would just read those ones as symbols.
316             */
317             Scm_WriteSymbolName(SCM_KEYWORD(obj)->name, port, ctx,
318                                 (SCM_SYMBOL_WRITER_NOESCAPE_INITIAL
319                                  |SCM_SYMBOL_WRITER_NOESCAPE_EMPTY));
320             return;
321         }
322 #endif /*GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
323         if (!SCM_SYMBOL_INTERNED(obj)) SCM_PUTZ("#:", -1, port);
324         Scm_WriteSymbolName(SCM_SYMBOL_NAME(obj), port, ctx, 0);
325     }
326 }
327 
328 /* Symbol comparison procedure.
329    Will be used via 'compare' procedure.  Following srfi-114, we compare
330    by name, but takes extra care of intern/unintern distinction; if the
331    names are the same, interned symbol is less, and if both are
332    uninterned, we compare addresses.
333  */
symbol_compare(ScmObj x,ScmObj y,int equalp)334 static int symbol_compare(ScmObj x, ScmObj y, int equalp)
335 {
336     if (equalp) {
337         /* Symbol equality test is handled in Scm_Eq* and will never come
338            here, but just in case.  */
339         return SCM_EQ(x, y)? 0:1;
340     } else if (SCM_EQ(x, y)) {
341         return 0;
342     } else {
343         int r = Scm_StringCmp(SCM_SYMBOL_NAME(x), SCM_SYMBOL_NAME(y));
344         if (r != 0) return r;
345         if (SCM_SYMBOL_INTERNED(x)) return -1; /* y must be uninterned */
346         if (SCM_SYMBOL_INTERNED(y)) return  1; /* x must be uninterned */
347         return (x < y)? -1 : 1;                /* both are uninterned */
348     }
349 }
350 
351 /*
352  * Keyword Utilities
353  *   The names are historical; KEY doesn't need to be a keyword at all;
354  *   anything that can be compared by eq? do.
355  */
356 
Scm_GetKeyword(ScmObj key,ScmObj list,ScmObj fallback)357 ScmObj Scm_GetKeyword(ScmObj key, ScmObj list, ScmObj fallback)
358 {
359     ScmObj cp;
360     SCM_FOR_EACH(cp, list) {
361         if (!SCM_PAIRP(SCM_CDR(cp))) {
362             Scm_Error("incomplete key list: %S", list);
363         }
364         if (key == SCM_CAR(cp)) return SCM_CADR(cp);
365         cp = SCM_CDR(cp);
366     }
367     if (SCM_UNBOUNDP(fallback)) {
368         Scm_Error("value for key %S is not provided: %S", key, list);
369     }
370     return fallback;
371 }
372 
Scm_DeleteKeyword(ScmObj key,ScmObj list)373 ScmObj Scm_DeleteKeyword(ScmObj key, ScmObj list)
374 {
375     ScmObj cp;
376     SCM_FOR_EACH(cp, list) {
377         if (!SCM_PAIRP(SCM_CDR(cp))) {
378             Scm_Error("incomplete key list: %S", list);
379         }
380         if (key == SCM_CAR(cp)) {
381             /* found */
382             ScmObj h = SCM_NIL, t = SCM_NIL;
383             ScmObj tail = Scm_DeleteKeyword(key, SCM_CDR(SCM_CDR(cp)));
384             ScmObj cp2;
385             SCM_FOR_EACH(cp2, list) {
386                 if (cp2 == cp) {
387                     SCM_APPEND(h, t, tail);
388                     return h;
389                 } else {
390                     SCM_APPEND1(h, t, SCM_CAR(cp2));
391                 }
392             }
393         }
394         cp = SCM_CDR(cp);
395     }
396     return list;
397 }
398 
Scm_DeleteKeywordX(ScmObj key,ScmObj list)399 ScmObj Scm_DeleteKeywordX(ScmObj key, ScmObj list)
400 {
401     ScmObj cp, prev = SCM_FALSE;
402     SCM_FOR_EACH(cp, list) {
403         if (!SCM_PAIRP(SCM_CDR(cp))) {
404             Scm_Error("incomplete key list: %S", list);
405         }
406         if (key == SCM_CAR(cp)) {
407             /* found */
408             if (SCM_FALSEP(prev)) {
409                 /* we're at the head of list */
410                 return Scm_DeleteKeywordX(key, SCM_CDR(SCM_CDR(cp)));
411             } else {
412                 ScmObj tail = Scm_DeleteKeywordX(key, SCM_CDR(SCM_CDR(cp)));
413                 Scm_SetCdr(prev, tail);
414                 return list;
415             }
416         }
417         cp = SCM_CDR(cp);
418         prev = cp;
419     }
420     return list;
421 }
422 
423 /* Scan kv-list to look for keywords in the array *KEYS.  Saves the first
424  * found value in the corresponding *VALS.  Returns a kv-list with all
425  * the keys deleted.  For unfound keys, the corresponding VAL is set with
426  * FALLBACK.
427  * May throw an error if kv-list isn't even.
428  */
Scm_ExtractKeywords(ScmObj kv_list,const ScmObj * keys,int numKeys,ScmObj * vals,ScmObj fallback)429 ScmObj Scm_ExtractKeywords(ScmObj kv_list,
430                            const ScmObj *keys,
431                            int numKeys,
432                            ScmObj *vals,
433                            ScmObj fallback)
434 {
435     ScmObj cp, h = SCM_NIL, t = SCM_NIL;
436     int i;
437     for (i=0; i<numKeys; i++) vals[i] = SCM_UNBOUND;
438     SCM_FOR_EACH(cp, kv_list) {
439         if (!SCM_PAIRP(SCM_CDR(cp))) {
440             Scm_Error("keyword list not even: %S", kv_list);
441         }
442         for (i=0; i<numKeys; i++) {
443             if (SCM_EQ(keys[i], SCM_CAR(cp))) {
444                 if (SCM_UNBOUNDP(vals[i])) {
445                     vals[i] = SCM_CADR(cp);
446                 }
447                 break;
448             }
449         }
450         if (i == numKeys) {
451             SCM_APPEND1(h, t, SCM_CAR(cp));
452             SCM_APPEND1(h, t, SCM_CADR(cp));
453         }
454         cp = SCM_CDR(cp);
455     }
456     if (!SCM_UNBOUNDP(fallback)) {
457         for (i=0; i<numKeys; i++) {
458             if (vals[i] == SCM_UNBOUND) {
459                 vals[i] = fallback;
460             }
461         }
462     }
463     return h;
464 }
465 
466 /*
467  * Initialization
468  */
469 
470 #include "builtin-syms.c"
471 
Scm__InitSymbol(void)472 void Scm__InitSymbol(void)
473 {
474     SCM_INTERNAL_MUTEX_INIT(obtable_mutex);
475     obtable = SCM_HASH_TABLE(Scm_MakeHashTableSimple(SCM_HASH_STRING, 4096));
476     init_builtin_syms();
477 #if GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION
478     (void)SCM_INTERNAL_MUTEX_INIT(keywords.mutex);
479     keywords.table = SCM_HASH_TABLE(Scm_MakeHashTableSimple(SCM_HASH_STRING, 256));
480     /* Preset keyword class precedence list, depending on the value of
481        GAUCHE_KEYWORD_DISJOINT or GAUCHE_KEYWORD_IS_SYMBOL */
482     const char *disjoint = Scm_GetEnv("GAUCHE_KEYWORD_DISJOINT");
483     const char *issymbol = Scm_GetEnv("GAUCHE_KEYWORD_IS_SYMBOL");
484     if (disjoint != NULL) {
485         keyword_disjoint_p = TRUE;
486     } else if (issymbol != NULL) {
487         keyword_disjoint_p = FALSE;
488     } else {
489         keyword_disjoint_p = FALSE; /* This determines the default */
490     }
491     if (keyword_disjoint_p) {
492         Scm_KeywordClass.cpa = &(keyword_cpl[1]);
493         /* The class is initialized later in class.c */
494     }
495 #endif /*!GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
496 }
497