1 /**********************************************************************
2 
3   symbol.h -
4 
5   $Author$
6   created at: Tue Jul  8 15:49:54 JST 2014
7 
8   Copyright (C) 2014 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/encoding.h"
13 #include "ruby/st.h"
14 #include "internal.h"
15 #include "symbol.h"
16 #include "gc.h"
17 #include "probes.h"
18 
19 #ifndef SYMBOL_DEBUG
20 # define SYMBOL_DEBUG 0
21 #endif
22 #ifndef CHECK_ID_SERIAL
23 # define CHECK_ID_SERIAL SYMBOL_DEBUG
24 #endif
25 
26 #define SYMBOL_PINNED_P(sym) (RSYMBOL(sym)->id&~ID_SCOPE_MASK)
27 
28 #define STATIC_SYM2ID(sym) RSHIFT((unsigned long)(sym), RUBY_SPECIAL_SHIFT)
29 
30 static ID register_static_symid(ID, const char *, long, rb_encoding *);
31 static ID register_static_symid_str(ID, VALUE);
32 #define REGISTER_SYMID(id, name) register_static_symid((id), (name), strlen(name), enc)
33 #include "id.c"
34 
35 #define is_identchar(p,e,enc) (ISALNUM((unsigned char)*(p)) || (*(p)) == '_' || !ISASCII(*(p)))
36 
37 #define op_tbl_count numberof(op_tbl)
38 STATIC_ASSERT(op_tbl_name_size, sizeof(op_tbl[0].name) == 3);
39 #define op_tbl_len(i) (!op_tbl[i].name[1] ? 1 : !op_tbl[i].name[2] ? 2 : 3)
40 
41 static void
Init_op_tbl(void)42 Init_op_tbl(void)
43 {
44     int i;
45     rb_encoding *const enc = rb_usascii_encoding();
46 
47     for (i = '!'; i <= '~'; ++i) {
48 	if (!ISALNUM(i) && i != '_') {
49 	    char c = (char)i;
50 	    register_static_symid(i, &c, 1, enc);
51 	}
52     }
53     for (i = 0; i < op_tbl_count; ++i) {
54 	register_static_symid(op_tbl[i].token, op_tbl[i].name, op_tbl_len(i), enc);
55     }
56 }
57 
58 static const int ID_ENTRY_UNIT = 512;
59 
60 enum id_entry_type {
61     ID_ENTRY_STR,
62     ID_ENTRY_SYM,
63     ID_ENTRY_SIZE
64 };
65 
66 static struct symbols {
67     rb_id_serial_t last_id;
68     st_table *str_sym;
69     VALUE ids;
70     VALUE dsymbol_fstr_hash;
71 } global_symbols = {tNEXT_ID-1};
72 
73 static const struct st_hash_type symhash = {
74     rb_str_hash_cmp,
75     rb_str_hash,
76 };
77 
78 void
Init_sym(void)79 Init_sym(void)
80 {
81     VALUE dsym_fstrs = rb_ident_hash_new();
82     global_symbols.dsymbol_fstr_hash = dsym_fstrs;
83     rb_gc_register_mark_object(dsym_fstrs);
84     rb_obj_hide(dsym_fstrs);
85 
86     global_symbols.str_sym = st_init_table_with_size(&symhash, 1000);
87     global_symbols.ids = rb_ary_tmp_new(0);
88     rb_gc_register_mark_object(global_symbols.ids);
89 
90     Init_op_tbl();
91     Init_id();
92 }
93 
94 WARN_UNUSED_RESULT(static VALUE dsymbol_alloc(const VALUE klass, const VALUE str, rb_encoding *const enc, const ID type));
95 WARN_UNUSED_RESULT(static VALUE dsymbol_check(const VALUE sym));
96 WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str));
97 WARN_UNUSED_RESULT(static VALUE lookup_str_sym(const VALUE str));
98 WARN_UNUSED_RESULT(static VALUE lookup_id_str(ID id));
99 WARN_UNUSED_RESULT(static ID attrsetname_to_attr(VALUE name));
100 WARN_UNUSED_RESULT(static ID attrsetname_to_attr_id(VALUE name));
101 WARN_UNUSED_RESULT(static ID intern_str(VALUE str, int mutable));
102 
103 ID
rb_id_attrset(ID id)104 rb_id_attrset(ID id)
105 {
106     VALUE str, sym;
107     int scope;
108 
109     if (!is_notop_id(id)) {
110 	switch (id) {
111 	  case tAREF: case tASET:
112 	    return tASET;	/* only exception */
113 	}
114 	rb_name_error(id, "cannot make operator ID :%"PRIsVALUE" attrset",
115 		      rb_id2str(id));
116     }
117     else {
118 	scope = id_type(id);
119 	switch (scope) {
120 	  case ID_LOCAL: case ID_INSTANCE: case ID_GLOBAL:
121 	  case ID_CONST: case ID_CLASS: case ID_JUNK:
122 	    break;
123 	  case ID_ATTRSET:
124 	    return id;
125 	  default:
126 	    {
127 		if ((str = lookup_id_str(id)) != 0) {
128 		    rb_name_error(id, "cannot make unknown type ID %d:%"PRIsVALUE" attrset",
129 				  scope, str);
130 		}
131 		else {
132 		    rb_name_error_str(Qnil, "cannot make unknown type anonymous ID %d:%"PRIxVALUE" attrset",
133 				      scope, (VALUE)id);
134 		}
135 	    }
136 	}
137     }
138 
139     /* make new symbol and ID */
140     if (!(str = lookup_id_str(id))) {
141 	static const char id_types[][8] = {
142 	    "local",
143 	    "instance",
144 	    "invalid",
145 	    "global",
146 	    "attrset",
147 	    "const",
148 	    "class",
149 	    "junk",
150 	};
151 	rb_name_error(id, "cannot make anonymous %.*s ID %"PRIxVALUE" attrset",
152 		      (int)sizeof(id_types[0]), id_types[scope], (VALUE)id);
153     }
154     str = rb_str_dup(str);
155     rb_str_cat(str, "=", 1);
156     sym = lookup_str_sym(str);
157     id = sym ? rb_sym2id(sym) : intern_str(str, 1);
158     return id;
159 }
160 
161 ID
rb_id_attrget(ID id)162 rb_id_attrget(ID id)
163 {
164     return attrsetname_to_attr(rb_id2str(id));
165 }
166 
167 static int
is_special_global_name(const char * m,const char * e,rb_encoding * enc)168 is_special_global_name(const char *m, const char *e, rb_encoding *enc)
169 {
170     int mb = 0;
171 
172     if (m >= e) return 0;
173     if (is_global_name_punct(*m)) {
174 	++m;
175     }
176     else if (*m == '-') {
177 	if (++m >= e) return 0;
178 	if (is_identchar(m, e, enc)) {
179 	    if (!ISASCII(*m)) mb = 1;
180 	    m += rb_enc_mbclen(m, e, enc);
181 	}
182     }
183     else {
184 	if (!ISDIGIT(*m)) return 0;
185 	do {
186 	    if (!ISASCII(*m)) mb = 1;
187 	    ++m;
188 	} while (m < e && ISDIGIT(*m));
189     }
190     return m == e ? mb + 1 : 0;
191 }
192 
193 int
rb_symname_p(const char * name)194 rb_symname_p(const char *name)
195 {
196     return rb_enc_symname_p(name, rb_ascii8bit_encoding());
197 }
198 
199 int
rb_enc_symname_p(const char * name,rb_encoding * enc)200 rb_enc_symname_p(const char *name, rb_encoding *enc)
201 {
202     return rb_enc_symname2_p(name, strlen(name), enc);
203 }
204 
205 static int
rb_sym_constant_char_p(const char * name,long nlen,rb_encoding * enc)206 rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc)
207 {
208     int c, len;
209     const char *end = name + nlen;
210 
211     if (nlen < 1) return FALSE;
212     if (ISASCII(*name)) return ISUPPER(*name);
213     c = rb_enc_precise_mbclen(name, end, enc);
214     if (!MBCLEN_CHARFOUND_P(c)) return FALSE;
215     len = MBCLEN_CHARFOUND_LEN(c);
216     c = rb_enc_mbc_to_codepoint(name, end, enc);
217     if (ONIGENC_IS_UNICODE(enc)) {
218 	static int ctype_titlecase = 0;
219 	if (rb_enc_isupper(c, enc)) return TRUE;
220 	if (rb_enc_islower(c, enc)) return FALSE;
221 	if (!ctype_titlecase) {
222 	    static const UChar cname[] = "titlecaseletter";
223 	    static const UChar *const end = cname + sizeof(cname) - 1;
224 	    ctype_titlecase = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, cname, end);
225 	}
226 	if (rb_enc_isctype(c, ctype_titlecase, enc)) return TRUE;
227     }
228     else {
229 	/* fallback to case-folding */
230 	OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
231 	const OnigUChar *beg = (const OnigUChar *)name;
232 	int r = enc->mbc_case_fold(ONIGENC_CASE_FOLD,
233 				   &beg, (const OnigUChar *)end,
234 				   fold, enc);
235 	if (r > 0 && (r != len || memcmp(fold, name, r)))
236 	    return TRUE;
237     }
238     return FALSE;
239 }
240 
241 #define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST))
242 #define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET))
243 
244 int
rb_enc_symname_type(const char * name,long len,rb_encoding * enc,unsigned int allowed_attrset)245 rb_enc_symname_type(const char *name, long len, rb_encoding *enc, unsigned int allowed_attrset)
246 {
247     const char *m = name;
248     const char *e = m + len;
249     int type = ID_JUNK;
250 
251     if (!rb_enc_asciicompat(enc)) return -1;
252     if (!m || len <= 0) return -1;
253     switch (*m) {
254       case '\0':
255 	return -1;
256 
257       case '$':
258 	type = ID_GLOBAL;
259 	if (is_special_global_name(++m, e, enc)) return type;
260 	goto id;
261 
262       case '@':
263 	type = ID_INSTANCE;
264 	if (*++m == '@') {
265 	    ++m;
266 	    type = ID_CLASS;
267 	}
268 	goto id;
269 
270       case '<':
271 	switch (*++m) {
272 	  case '<': ++m; break;
273 	  case '=': if (*++m == '>') ++m; break;
274 	  default: break;
275 	}
276 	break;
277 
278       case '>':
279 	switch (*++m) {
280 	  case '>': case '=': ++m; break;
281 	}
282 	break;
283 
284       case '=':
285 	switch (*++m) {
286 	  case '~': ++m; break;
287 	  case '=': if (*++m == '=') ++m; break;
288 	  default: return -1;
289 	}
290 	break;
291 
292       case '*':
293 	if (*++m == '*') ++m;
294 	break;
295 
296       case '+': case '-':
297 	if (*++m == '@') ++m;
298 	break;
299 
300       case '|': case '^': case '&': case '/': case '%': case '~': case '`':
301 	++m;
302 	break;
303 
304       case '[':
305 	if (m[1] != ']') goto id;
306 	++m;
307 	if (*++m == '=') ++m;
308 	break;
309 
310       case '!':
311 	if (len == 1) return ID_JUNK;
312 	switch (*++m) {
313 	  case '=': case '~': ++m; break;
314 	  default:
315 	    if (allowed_attrset & (1U << ID_JUNK)) goto id;
316 	    return -1;
317 	}
318 	break;
319 
320       default:
321 	type = rb_sym_constant_char_p(m, e-m, enc) ? ID_CONST : ID_LOCAL;
322       id:
323 	if (m >= e || (*m != '_' && !ISALPHA(*m) && ISASCII(*m))) {
324 	    if (len > 1 && *(e-1) == '=') {
325 		type = rb_enc_symname_type(name, len-1, enc, allowed_attrset);
326 		if (type != ID_ATTRSET) return ID_ATTRSET;
327 	    }
328 	    return -1;
329 	}
330 	while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
331 	if (m >= e) break;
332 	switch (*m) {
333 	  case '!': case '?':
334 	    if (type == ID_GLOBAL || type == ID_CLASS || type == ID_INSTANCE) return -1;
335 	    type = ID_JUNK;
336 	    ++m;
337 	    if (m + 1 < e || *m != '=') break;
338 	    /* fall through */
339 	  case '=':
340 	    if (!(allowed_attrset & (1U << type))) return -1;
341 	    type = ID_ATTRSET;
342 	    ++m;
343 	    break;
344 	}
345 	break;
346     }
347     return m == e ? type : -1;
348 }
349 
350 int
rb_enc_symname2_p(const char * name,long len,rb_encoding * enc)351 rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
352 {
353     return rb_enc_symname_type(name, len, enc, IDSET_ATTRSET_FOR_SYNTAX) != -1;
354 }
355 
356 static int
rb_str_symname_type(VALUE name,unsigned int allowed_attrset)357 rb_str_symname_type(VALUE name, unsigned int allowed_attrset)
358 {
359     const char *ptr = StringValuePtr(name);
360     long len = RSTRING_LEN(name);
361     int type = rb_enc_symname_type(ptr, len, rb_enc_get(name), allowed_attrset);
362     RB_GC_GUARD(name);
363     return type;
364 }
365 
366 static void
set_id_entry(rb_id_serial_t num,VALUE str,VALUE sym)367 set_id_entry(rb_id_serial_t num, VALUE str, VALUE sym)
368 {
369     size_t idx = num / ID_ENTRY_UNIT;
370     VALUE ary, ids = global_symbols.ids;
371     if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
372 	ary = rb_ary_tmp_new(ID_ENTRY_UNIT * ID_ENTRY_SIZE);
373 	rb_ary_store(ids, (long)idx, ary);
374     }
375     idx = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
376     rb_ary_store(ary, (long)idx + ID_ENTRY_STR, str);
377     rb_ary_store(ary, (long)idx + ID_ENTRY_SYM, sym);
378 }
379 
380 static VALUE
get_id_serial_entry(rb_id_serial_t num,ID id,const enum id_entry_type t)381 get_id_serial_entry(rb_id_serial_t num, ID id, const enum id_entry_type t)
382 {
383     if (num && num <= global_symbols.last_id) {
384 	size_t idx = num / ID_ENTRY_UNIT;
385 	VALUE ids = global_symbols.ids;
386 	VALUE ary;
387 	if (idx < (size_t)RARRAY_LEN(ids) && !NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
388             long pos = (long)(num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
389             VALUE result = rb_ary_entry(ary, pos + t);
390             if (NIL_P(result)) return 0;
391 #if CHECK_ID_SERIAL
392             if (id) {
393                 VALUE sym = result;
394                 if (t != ID_ENTRY_SYM)
395                     sym = rb_ary_entry(ary, pos + ID_ENTRY_SYM);
396                 if (STATIC_SYM_P(sym)) {
397                     if (STATIC_SYM2ID(sym) != id) return 0;
398                 }
399                 else {
400                     if (RSYMBOL(sym)->id != id) return 0;
401                 }
402             }
403 #endif
404             return result;
405 	}
406     }
407     return 0;
408 }
409 
410 static VALUE
get_id_entry(ID id,const enum id_entry_type t)411 get_id_entry(ID id, const enum id_entry_type t)
412 {
413     return get_id_serial_entry(rb_id_to_serial(id), id, t);
414 }
415 
416 static inline ID
417 #ifdef __GNUC__
418 __attribute__((unused))
419 #endif
rb_id_serial_to_id(rb_id_serial_t num)420 rb_id_serial_to_id(rb_id_serial_t num)
421 {
422     if (is_notop_id((ID)num)) {
423         VALUE sym = get_id_serial_entry(num, 0, ID_ENTRY_SYM);
424 	return SYM2ID(sym);
425     }
426     else {
427 	return (ID)num;
428     }
429 }
430 
431 #if SYMBOL_DEBUG
432 static int
register_sym_update_callback(st_data_t * key,st_data_t * value,st_data_t arg,int existing)433 register_sym_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
434 {
435     if (existing) {
436 	rb_fatal("symbol :% "PRIsVALUE" is already registered with %"PRIxVALUE,
437 		 (VALUE)*key, (VALUE)*value);
438     }
439     *value = arg;
440     return ST_CONTINUE;
441 }
442 #endif
443 
444 static void
register_sym(VALUE str,VALUE sym)445 register_sym(VALUE str, VALUE sym)
446 {
447 #if SYMBOL_DEBUG
448     st_update(global_symbols.str_sym, (st_data_t)str,
449 	      register_sym_update_callback, (st_data_t)sym);
450 #else
451     st_add_direct(global_symbols.str_sym, (st_data_t)str, (st_data_t)sym);
452 #endif
453 }
454 
455 static void
unregister_sym(VALUE str,VALUE sym)456 unregister_sym(VALUE str, VALUE sym)
457 {
458     st_data_t str_data = (st_data_t)str;
459     if (!st_delete(global_symbols.str_sym, &str_data, NULL)) {
460 	rb_bug("%p can't remove str from str_id (%s)", (void *)sym, RSTRING_PTR(str));
461     }
462 }
463 
464 static ID
register_static_symid(ID id,const char * name,long len,rb_encoding * enc)465 register_static_symid(ID id, const char *name, long len, rb_encoding *enc)
466 {
467     VALUE str = rb_enc_str_new(name, len, enc);
468     return register_static_symid_str(id, str);
469 }
470 
471 static ID
register_static_symid_str(ID id,VALUE str)472 register_static_symid_str(ID id, VALUE str)
473 {
474     rb_id_serial_t num = rb_id_to_serial(id);
475     VALUE sym = STATIC_ID2SYM(id);
476 
477     OBJ_FREEZE(str);
478     str = rb_fstring(str);
479 
480     RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(str));
481 
482     register_sym(str, sym);
483     set_id_entry(num, str, sym);
484 
485     return id;
486 }
487 
488 static int
sym_check_asciionly(VALUE str)489 sym_check_asciionly(VALUE str)
490 {
491     if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE;
492     switch (rb_enc_str_coderange(str)) {
493       case ENC_CODERANGE_BROKEN:
494 	rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE,
495 		 rb_enc_name(rb_enc_get(str)), str);
496       case ENC_CODERANGE_7BIT:
497 	return TRUE;
498     }
499     return FALSE;
500 }
501 
502 #if 0
503 /*
504  * _str_ itself will be registered at the global symbol table.  _str_
505  * can be modified before the registration, since the encoding will be
506  * set to ASCII-8BIT if it is a special global name.
507  */
508 
509 static inline void
510 must_be_dynamic_symbol(VALUE x)
511 {
512     if (UNLIKELY(!DYNAMIC_SYM_P(x))) {
513 	if (STATIC_SYM_P(x)) {
514 	    VALUE str = lookup_id_str(RSHIFT((unsigned long)(x),RUBY_SPECIAL_SHIFT));
515 
516 	    if (str) {
517 		rb_bug("wrong argument: %s (inappropriate Symbol)", RSTRING_PTR(str));
518 	    }
519 	    else {
520 		rb_bug("wrong argument: inappropriate Symbol (%p)", (void *)x);
521 	    }
522 	}
523 	else {
524 	    rb_bug("wrong argument type %s (expected Symbol)", rb_builtin_class_name(x));
525 	}
526     }
527 }
528 #endif
529 
530 static VALUE
dsymbol_alloc(const VALUE klass,const VALUE str,rb_encoding * const enc,const ID type)531 dsymbol_alloc(const VALUE klass, const VALUE str, rb_encoding * const enc, const ID type)
532 {
533     const VALUE dsym = rb_newobj_of(klass, T_SYMBOL | FL_WB_PROTECTED);
534     long hashval;
535 
536     rb_enc_set_index(dsym, rb_enc_to_index(enc));
537     OBJ_FREEZE(dsym);
538     RB_OBJ_WRITE(dsym, &RSYMBOL(dsym)->fstr, str);
539     RSYMBOL(dsym)->id = type;
540 
541     /* we want hashval to be in Fixnum range [ruby-core:15713] r15672 */
542     hashval = (long)rb_str_hash(str);
543     RSYMBOL(dsym)->hashval = RSHIFT((long)hashval, 1);
544 
545     register_sym(str, dsym);
546     rb_hash_aset(global_symbols.dsymbol_fstr_hash, str, Qtrue);
547 
548     RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(RSYMBOL(dsym)->fstr));
549 
550     return dsym;
551 }
552 
553 static inline VALUE
dsymbol_check(const VALUE sym)554 dsymbol_check(const VALUE sym)
555 {
556     if (UNLIKELY(rb_objspace_garbage_object_p(sym))) {
557 	const VALUE fstr = RSYMBOL(sym)->fstr;
558 	const ID type = RSYMBOL(sym)->id & ID_SCOPE_MASK;
559 	RSYMBOL(sym)->fstr = 0;
560 
561 	unregister_sym(fstr, sym);
562 	return dsymbol_alloc(rb_cSymbol, fstr, rb_enc_get(fstr), type);
563     }
564     else {
565 	return sym;
566     }
567 }
568 
569 static ID
lookup_str_id(VALUE str)570 lookup_str_id(VALUE str)
571 {
572     st_data_t sym_data;
573     if (st_lookup(global_symbols.str_sym, (st_data_t)str, &sym_data)) {
574 	const VALUE sym = (VALUE)sym_data;
575 
576 	if (STATIC_SYM_P(sym)) {
577 	    return STATIC_SYM2ID(sym);
578 	}
579 	else if (DYNAMIC_SYM_P(sym)) {
580 	    ID id = RSYMBOL(sym)->id;
581 	    if (id & ~ID_SCOPE_MASK) return id;
582 	}
583 	else {
584 	    rb_bug("non-symbol object %s:%"PRIxVALUE" for %"PRIsVALUE" in symbol table",
585 		   rb_builtin_class_name(sym), sym, str);
586 	}
587     }
588     return (ID)0;
589 }
590 
591 static VALUE
lookup_str_sym(const VALUE str)592 lookup_str_sym(const VALUE str)
593 {
594     st_data_t sym_data;
595     if (st_lookup(global_symbols.str_sym, (st_data_t)str, &sym_data)) {
596 	VALUE sym = (VALUE)sym_data;
597 
598 	if (DYNAMIC_SYM_P(sym)) {
599 	    sym = dsymbol_check(sym);
600 	}
601 	return sym;
602     }
603     else {
604 	return (VALUE)0;
605     }
606 }
607 
608 static VALUE
lookup_id_str(ID id)609 lookup_id_str(ID id)
610 {
611     return get_id_entry(id, ID_ENTRY_STR);
612 }
613 
614 ID
rb_intern3(const char * name,long len,rb_encoding * enc)615 rb_intern3(const char *name, long len, rb_encoding *enc)
616 {
617     VALUE sym;
618     struct RString fake_str;
619     VALUE str = rb_setup_fake_str(&fake_str, name, len, enc);
620     OBJ_FREEZE(str);
621 
622     sym = lookup_str_sym(str);
623     if (sym) return rb_sym2id(sym);
624     str = rb_enc_str_new(name, len, enc); /* make true string */
625     return intern_str(str, 1);
626 }
627 
628 static ID
next_id_base(void)629 next_id_base(void)
630 {
631     rb_id_serial_t next_serial = global_symbols.last_id + 1;
632 
633     if (next_serial == 0) {
634 	return (ID)-1;
635     }
636     else {
637 	const size_t num = ++global_symbols.last_id;
638 	return num << ID_SCOPE_SHIFT;
639     }
640 }
641 
642 static ID
intern_str(VALUE str,int mutable)643 intern_str(VALUE str, int mutable)
644 {
645     ID id;
646     ID nid;
647 
648     id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
649     if (id == (ID)-1) id = ID_JUNK;
650     if (sym_check_asciionly(str)) {
651 	if (!mutable) str = rb_str_dup(str);
652 	rb_enc_associate(str, rb_usascii_encoding());
653     }
654     if ((nid = next_id_base()) == (ID)-1) {
655 	str = rb_str_ellipsize(str, 20);
656 	rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %"PRIsVALUE")",
657 		 str);
658     }
659     id |= nid;
660     id |= ID_STATIC_SYM;
661     return register_static_symid_str(id, str);
662 }
663 
664 ID
rb_intern2(const char * name,long len)665 rb_intern2(const char *name, long len)
666 {
667     return rb_intern3(name, len, rb_usascii_encoding());
668 }
669 
670 #undef rb_intern
671 ID
rb_intern(const char * name)672 rb_intern(const char *name)
673 {
674     return rb_intern2(name, strlen(name));
675 }
676 
677 ID
rb_intern_str(VALUE str)678 rb_intern_str(VALUE str)
679 {
680     VALUE sym = lookup_str_sym(str);
681 
682     if (sym) {
683 	return SYM2ID(sym);
684     }
685 
686     return intern_str(str, 0);
687 }
688 
689 void
rb_gc_free_dsymbol(VALUE sym)690 rb_gc_free_dsymbol(VALUE sym)
691 {
692     VALUE str = RSYMBOL(sym)->fstr;
693 
694     if (str) {
695 	RSYMBOL(sym)->fstr = 0;
696 	unregister_sym(str, sym);
697 	rb_hash_delete_entry(global_symbols.dsymbol_fstr_hash, str);
698     }
699 }
700 
701 /*
702  *  call-seq:
703  *     str.intern   -> symbol
704  *     str.to_sym   -> symbol
705  *
706  *  Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
707  *  symbol if it did not previously exist. See <code>Symbol#id2name</code>.
708  *
709  *     "Koala".intern         #=> :Koala
710  *     s = 'cat'.to_sym       #=> :cat
711  *     s == :cat              #=> true
712  *     s = '@cat'.to_sym      #=> :@cat
713  *     s == :@cat             #=> true
714  *
715  *  This can also be used to create symbols that cannot be represented using the
716  *  <code>:xxx</code> notation.
717  *
718  *     'cat and dog'.to_sym   #=> :"cat and dog"
719  */
720 
721 VALUE
rb_str_intern(VALUE str)722 rb_str_intern(VALUE str)
723 {
724 #if USE_SYMBOL_GC
725     rb_encoding *enc, *ascii;
726     int type;
727 #else
728     ID id;
729 #endif
730     VALUE sym = lookup_str_sym(str);
731 
732     if (sym) {
733 	return sym;
734     }
735 
736 #if USE_SYMBOL_GC
737     enc = rb_enc_get(str);
738     ascii = rb_usascii_encoding();
739     if (enc != ascii && sym_check_asciionly(str)) {
740 	str = rb_str_dup(str);
741 	rb_enc_associate(str, ascii);
742 	OBJ_FREEZE(str);
743 	enc = ascii;
744     }
745     else {
746         str = rb_str_dup(str);
747         OBJ_FREEZE(str);
748     }
749     str = rb_fstring(str);
750     type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
751     if (type < 0) type = ID_JUNK;
752     return dsymbol_alloc(rb_cSymbol, str, enc, type);
753 #else
754     id = intern_str(str, 0);
755     return ID2SYM(id);
756 #endif
757 }
758 
759 ID
rb_sym2id(VALUE sym)760 rb_sym2id(VALUE sym)
761 {
762     ID id;
763     if (STATIC_SYM_P(sym)) {
764 	id = STATIC_SYM2ID(sym);
765     }
766     else if (DYNAMIC_SYM_P(sym)) {
767 	sym = dsymbol_check(sym);
768 	id = RSYMBOL(sym)->id;
769 	if (UNLIKELY(!(id & ~ID_SCOPE_MASK))) {
770 	    VALUE fstr = RSYMBOL(sym)->fstr;
771 	    ID num = next_id_base();
772 
773 	    RSYMBOL(sym)->id = id |= num;
774 	    /* make it permanent object */
775 	    set_id_entry(rb_id_to_serial(num), fstr, sym);
776 	    rb_hash_delete_entry(global_symbols.dsymbol_fstr_hash, fstr);
777 	}
778     }
779     else {
780 	rb_raise(rb_eTypeError, "wrong argument type %s (expected Symbol)",
781 		 rb_builtin_class_name(sym));
782     }
783     return id;
784 }
785 
786 #undef rb_id2sym
787 VALUE
rb_id2sym(ID x)788 rb_id2sym(ID x)
789 {
790     if (!DYNAMIC_ID_P(x)) return STATIC_ID2SYM(x);
791     return get_id_entry(x, ID_ENTRY_SYM);
792 }
793 
794 
795 VALUE
rb_sym2str(VALUE sym)796 rb_sym2str(VALUE sym)
797 {
798     if (DYNAMIC_SYM_P(sym)) {
799 	return RSYMBOL(sym)->fstr;
800     }
801     else {
802 	return rb_id2str(STATIC_SYM2ID(sym));
803     }
804 }
805 
806 VALUE
rb_id2str(ID id)807 rb_id2str(ID id)
808 {
809     return lookup_id_str(id);
810 }
811 
812 const char *
rb_id2name(ID id)813 rb_id2name(ID id)
814 {
815     VALUE str = rb_id2str(id);
816 
817     if (!str) return 0;
818     return RSTRING_PTR(str);
819 }
820 
821 ID
rb_make_internal_id(void)822 rb_make_internal_id(void)
823 {
824     return next_id_base() | ID_INTERNAL | ID_STATIC_SYM;
825 }
826 
827 static int
symbols_i(st_data_t key,st_data_t value,st_data_t arg)828 symbols_i(st_data_t key, st_data_t value, st_data_t arg)
829 {
830     VALUE ary = (VALUE)arg;
831     VALUE sym = (VALUE)value;
832 
833     if (STATIC_SYM_P(sym)) {
834 	rb_ary_push(ary, sym);
835 	return ST_CONTINUE;
836     }
837     else if (!DYNAMIC_SYM_P(sym)) {
838 	rb_bug("invalid symbol: %s", RSTRING_PTR((VALUE)key));
839     }
840     else if (!SYMBOL_PINNED_P(sym) && rb_objspace_garbage_object_p(sym)) {
841 	RSYMBOL(sym)->fstr = 0;
842 	return ST_DELETE;
843     }
844     else {
845 	rb_ary_push(ary, sym);
846 	return ST_CONTINUE;
847     }
848 
849 }
850 
851 /*
852  *  call-seq:
853  *     Symbol.all_symbols    => array
854  *
855  *  Returns an array of all the symbols currently in Ruby's symbol
856  *  table.
857  *
858  *     Symbol.all_symbols.size    #=> 903
859  *     Symbol.all_symbols[1,20]   #=> [:floor, :ARGV, :Binding, :symlink,
860  *                                     :chown, :EOFError, :$;, :String,
861  *                                     :LOCK_SH, :"setuid?", :$<,
862  *                                     :default_proc, :compact, :extend,
863  *                                     :Tms, :getwd, :$=, :ThreadGroup,
864  *                                     :wait2, :$>]
865  */
866 
867 VALUE
rb_sym_all_symbols(void)868 rb_sym_all_symbols(void)
869 {
870     VALUE ary = rb_ary_new2(global_symbols.str_sym->num_entries);
871     st_foreach(global_symbols.str_sym, symbols_i, ary);
872     return ary;
873 }
874 
875 size_t
rb_sym_immortal_count(void)876 rb_sym_immortal_count(void)
877 {
878     return (size_t)global_symbols.last_id;
879 }
880 
881 int
rb_is_const_id(ID id)882 rb_is_const_id(ID id)
883 {
884     return is_const_id(id);
885 }
886 
887 int
rb_is_class_id(ID id)888 rb_is_class_id(ID id)
889 {
890     return is_class_id(id);
891 }
892 
893 int
rb_is_global_id(ID id)894 rb_is_global_id(ID id)
895 {
896     return is_global_id(id);
897 }
898 
899 int
rb_is_instance_id(ID id)900 rb_is_instance_id(ID id)
901 {
902     return is_instance_id(id);
903 }
904 
905 int
rb_is_attrset_id(ID id)906 rb_is_attrset_id(ID id)
907 {
908     return is_attrset_id(id);
909 }
910 
911 int
rb_is_local_id(ID id)912 rb_is_local_id(ID id)
913 {
914     return is_local_id(id);
915 }
916 
917 int
rb_is_junk_id(ID id)918 rb_is_junk_id(ID id)
919 {
920     return is_junk_id(id);
921 }
922 
923 int
rb_is_const_sym(VALUE sym)924 rb_is_const_sym(VALUE sym)
925 {
926     return is_const_sym(sym);
927 }
928 
929 int
rb_is_class_sym(VALUE sym)930 rb_is_class_sym(VALUE sym)
931 {
932     return is_class_sym(sym);
933 }
934 
935 int
rb_is_global_sym(VALUE sym)936 rb_is_global_sym(VALUE sym)
937 {
938     return is_global_sym(sym);
939 }
940 
941 int
rb_is_instance_sym(VALUE sym)942 rb_is_instance_sym(VALUE sym)
943 {
944     return is_instance_sym(sym);
945 }
946 
947 int
rb_is_attrset_sym(VALUE sym)948 rb_is_attrset_sym(VALUE sym)
949 {
950     return is_attrset_sym(sym);
951 }
952 
953 int
rb_is_local_sym(VALUE sym)954 rb_is_local_sym(VALUE sym)
955 {
956     return is_local_sym(sym);
957 }
958 
959 int
rb_is_junk_sym(VALUE sym)960 rb_is_junk_sym(VALUE sym)
961 {
962     return is_junk_sym(sym);
963 }
964 
965 /**
966  * Returns ID for the given name if it is interned already, or 0.
967  *
968  * \param namep   the pointer to the name object
969  * \return        the ID for *namep
970  * \pre           the object referred by \p namep must be a Symbol or
971  *                a String, or possible to convert with to_str method.
972  * \post          the object referred by \p namep is a Symbol or a
973  *                String if non-zero value is returned, or is a String
974  *                if 0 is returned.
975  */
976 ID
rb_check_id(volatile VALUE * namep)977 rb_check_id(volatile VALUE *namep)
978 {
979     VALUE tmp;
980     VALUE name = *namep;
981 
982     if (STATIC_SYM_P(name)) {
983 	return STATIC_SYM2ID(name);
984     }
985     else if (DYNAMIC_SYM_P(name)) {
986 	if (SYMBOL_PINNED_P(name)) {
987 	    return RSYMBOL(name)->id;
988 	}
989 	else {
990 	    *namep = RSYMBOL(name)->fstr;
991 	    return 0;
992 	}
993     }
994     else if (!RB_TYPE_P(name, T_STRING)) {
995 	tmp = rb_check_string_type(name);
996 	if (NIL_P(tmp)) {
997 	    rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
998 		     name);
999 	}
1000 	name = tmp;
1001 	*namep = name;
1002     }
1003 
1004     sym_check_asciionly(name);
1005 
1006     return lookup_str_id(name);
1007 }
1008 
1009 VALUE
rb_check_symbol(volatile VALUE * namep)1010 rb_check_symbol(volatile VALUE *namep)
1011 {
1012     VALUE sym;
1013     VALUE tmp;
1014     VALUE name = *namep;
1015 
1016     if (STATIC_SYM_P(name)) {
1017 	return name;
1018     }
1019     else if (DYNAMIC_SYM_P(name)) {
1020 	if (!SYMBOL_PINNED_P(name)) {
1021 	    name = dsymbol_check(name);
1022 	    *namep = name;
1023 	}
1024 	return name;
1025     }
1026     else if (!RB_TYPE_P(name, T_STRING)) {
1027 	tmp = rb_check_string_type(name);
1028 	if (NIL_P(tmp)) {
1029 	    rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1030 		     name);
1031 	}
1032 	name = tmp;
1033 	*namep = name;
1034     }
1035 
1036     sym_check_asciionly(name);
1037 
1038     if ((sym = lookup_str_sym(name)) != 0) {
1039 	return sym;
1040     }
1041 
1042     return Qnil;
1043 }
1044 
1045 ID
rb_check_id_cstr(const char * ptr,long len,rb_encoding * enc)1046 rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
1047 {
1048     struct RString fake_str;
1049     const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1050 
1051     sym_check_asciionly(name);
1052 
1053     return lookup_str_id(name);
1054 }
1055 
1056 VALUE
rb_check_symbol_cstr(const char * ptr,long len,rb_encoding * enc)1057 rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
1058 {
1059     VALUE sym;
1060     struct RString fake_str;
1061     const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1062 
1063     sym_check_asciionly(name);
1064 
1065     if ((sym = lookup_str_sym(name)) != 0) {
1066 	return sym;
1067     }
1068 
1069     return Qnil;
1070 }
1071 
1072 #undef rb_sym_intern_cstr
1073 #undef rb_sym_intern_ascii_cstr
1074 #ifdef __clang__
1075 NOINLINE(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1076 #else
1077 FUNC_MINIMIZED(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1078 FUNC_MINIMIZED(VALUE rb_sym_intern_cstr(const char *ptr, rb_encoding *enc));
1079 FUNC_MINIMIZED(VALUE rb_sym_intern_ascii(const char *ptr, long len));
1080 FUNC_MINIMIZED(VALUE rb_sym_intern_ascii_cstr(const char *ptr));
1081 #endif
1082 
1083 VALUE
rb_sym_intern(const char * ptr,long len,rb_encoding * enc)1084 rb_sym_intern(const char *ptr, long len, rb_encoding *enc)
1085 {
1086     struct RString fake_str;
1087     const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1088     return rb_str_intern(name);
1089 }
1090 
1091 VALUE
rb_sym_intern_cstr(const char * ptr,rb_encoding * enc)1092 rb_sym_intern_cstr(const char *ptr, rb_encoding *enc)
1093 {
1094     return rb_sym_intern(ptr, strlen(ptr), enc);
1095 }
1096 
1097 VALUE
rb_sym_intern_ascii(const char * ptr,long len)1098 rb_sym_intern_ascii(const char *ptr, long len)
1099 {
1100     return rb_sym_intern(ptr, len, rb_usascii_encoding());
1101 }
1102 
1103 VALUE
rb_sym_intern_ascii_cstr(const char * ptr)1104 rb_sym_intern_ascii_cstr(const char *ptr)
1105 {
1106     return rb_sym_intern_ascii(ptr, strlen(ptr));
1107 }
1108 
1109 VALUE
rb_to_symbol_type(VALUE obj)1110 rb_to_symbol_type(VALUE obj)
1111 {
1112     return rb_convert_type_with_id(obj, T_SYMBOL, "Symbol", idTo_sym);
1113 }
1114 
1115 static ID
attrsetname_to_attr_id(VALUE name)1116 attrsetname_to_attr_id(VALUE name)
1117 {
1118     ID id;
1119     struct RString fake_str;
1120     /* make local name by chopping '=' */
1121     const VALUE localname = rb_setup_fake_str(&fake_str,
1122 					      RSTRING_PTR(name), RSTRING_LEN(name) - 1,
1123 					      rb_enc_get(name));
1124     OBJ_FREEZE(localname);
1125 
1126     if ((id = lookup_str_id(localname)) != 0) {
1127 	return id;
1128     }
1129     RB_GC_GUARD(name);
1130     return (ID)0;
1131 }
1132 
1133 static ID
attrsetname_to_attr(VALUE name)1134 attrsetname_to_attr(VALUE name)
1135 {
1136     if (rb_is_attrset_name(name)) {
1137 	return attrsetname_to_attr_id(name);
1138     }
1139 
1140     return (ID)0;
1141 }
1142 
1143 int
rb_is_const_name(VALUE name)1144 rb_is_const_name(VALUE name)
1145 {
1146     return rb_str_symname_type(name, 0) == ID_CONST;
1147 }
1148 
1149 int
rb_is_class_name(VALUE name)1150 rb_is_class_name(VALUE name)
1151 {
1152     return rb_str_symname_type(name, 0) == ID_CLASS;
1153 }
1154 
1155 int
rb_is_global_name(VALUE name)1156 rb_is_global_name(VALUE name)
1157 {
1158     return rb_str_symname_type(name, 0) == ID_GLOBAL;
1159 }
1160 
1161 int
rb_is_instance_name(VALUE name)1162 rb_is_instance_name(VALUE name)
1163 {
1164     return rb_str_symname_type(name, 0) == ID_INSTANCE;
1165 }
1166 
1167 int
rb_is_attrset_name(VALUE name)1168 rb_is_attrset_name(VALUE name)
1169 {
1170     return rb_str_symname_type(name, IDSET_ATTRSET_FOR_INTERN) == ID_ATTRSET;
1171 }
1172 
1173 int
rb_is_local_name(VALUE name)1174 rb_is_local_name(VALUE name)
1175 {
1176     return rb_str_symname_type(name, 0) == ID_LOCAL;
1177 }
1178 
1179 int
rb_is_method_name(VALUE name)1180 rb_is_method_name(VALUE name)
1181 {
1182     switch (rb_str_symname_type(name, 0)) {
1183       case ID_LOCAL: case ID_ATTRSET: case ID_JUNK:
1184 	return TRUE;
1185     }
1186     return FALSE;
1187 }
1188 
1189 int
rb_is_junk_name(VALUE name)1190 rb_is_junk_name(VALUE name)
1191 {
1192     return rb_str_symname_type(name, IDSET_ATTRSET_FOR_SYNTAX) == -1;
1193 }
1194 
1195 #include "id_table.c"
1196