1 /**********************************************************************
2
3 symbol.h -
4
5 $Author$
6 created at: Tue Jul 8 15:49:54 JST 2014
7
8 Copyright (C) 2014 Yukihiro Matsumoto
9
10 **********************************************************************/
11
12 #include "ruby/encoding.h"
13 #include "ruby/st.h"
14 #include "internal.h"
15 #include "symbol.h"
16 #include "gc.h"
17 #include "probes.h"
18
19 #ifndef SYMBOL_DEBUG
20 # define SYMBOL_DEBUG 0
21 #endif
22 #ifndef CHECK_ID_SERIAL
23 # define CHECK_ID_SERIAL SYMBOL_DEBUG
24 #endif
25
26 #define SYMBOL_PINNED_P(sym) (RSYMBOL(sym)->id&~ID_SCOPE_MASK)
27
28 #define STATIC_SYM2ID(sym) RSHIFT((unsigned long)(sym), RUBY_SPECIAL_SHIFT)
29
30 static ID register_static_symid(ID, const char *, long, rb_encoding *);
31 static ID register_static_symid_str(ID, VALUE);
32 #define REGISTER_SYMID(id, name) register_static_symid((id), (name), strlen(name), enc)
33 #include "id.c"
34
35 #define is_identchar(p,e,enc) (ISALNUM((unsigned char)*(p)) || (*(p)) == '_' || !ISASCII(*(p)))
36
37 #define op_tbl_count numberof(op_tbl)
38 STATIC_ASSERT(op_tbl_name_size, sizeof(op_tbl[0].name) == 3);
39 #define op_tbl_len(i) (!op_tbl[i].name[1] ? 1 : !op_tbl[i].name[2] ? 2 : 3)
40
41 static void
Init_op_tbl(void)42 Init_op_tbl(void)
43 {
44 int i;
45 rb_encoding *const enc = rb_usascii_encoding();
46
47 for (i = '!'; i <= '~'; ++i) {
48 if (!ISALNUM(i) && i != '_') {
49 char c = (char)i;
50 register_static_symid(i, &c, 1, enc);
51 }
52 }
53 for (i = 0; i < op_tbl_count; ++i) {
54 register_static_symid(op_tbl[i].token, op_tbl[i].name, op_tbl_len(i), enc);
55 }
56 }
57
58 static const int ID_ENTRY_UNIT = 512;
59
60 enum id_entry_type {
61 ID_ENTRY_STR,
62 ID_ENTRY_SYM,
63 ID_ENTRY_SIZE
64 };
65
66 static struct symbols {
67 rb_id_serial_t last_id;
68 st_table *str_sym;
69 VALUE ids;
70 VALUE dsymbol_fstr_hash;
71 } global_symbols = {tNEXT_ID-1};
72
73 static const struct st_hash_type symhash = {
74 rb_str_hash_cmp,
75 rb_str_hash,
76 };
77
78 void
Init_sym(void)79 Init_sym(void)
80 {
81 VALUE dsym_fstrs = rb_ident_hash_new();
82 global_symbols.dsymbol_fstr_hash = dsym_fstrs;
83 rb_gc_register_mark_object(dsym_fstrs);
84 rb_obj_hide(dsym_fstrs);
85
86 global_symbols.str_sym = st_init_table_with_size(&symhash, 1000);
87 global_symbols.ids = rb_ary_tmp_new(0);
88 rb_gc_register_mark_object(global_symbols.ids);
89
90 Init_op_tbl();
91 Init_id();
92 }
93
94 WARN_UNUSED_RESULT(static VALUE dsymbol_alloc(const VALUE klass, const VALUE str, rb_encoding *const enc, const ID type));
95 WARN_UNUSED_RESULT(static VALUE dsymbol_check(const VALUE sym));
96 WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str));
97 WARN_UNUSED_RESULT(static VALUE lookup_str_sym(const VALUE str));
98 WARN_UNUSED_RESULT(static VALUE lookup_id_str(ID id));
99 WARN_UNUSED_RESULT(static ID attrsetname_to_attr(VALUE name));
100 WARN_UNUSED_RESULT(static ID attrsetname_to_attr_id(VALUE name));
101 WARN_UNUSED_RESULT(static ID intern_str(VALUE str, int mutable));
102
103 ID
rb_id_attrset(ID id)104 rb_id_attrset(ID id)
105 {
106 VALUE str, sym;
107 int scope;
108
109 if (!is_notop_id(id)) {
110 switch (id) {
111 case tAREF: case tASET:
112 return tASET; /* only exception */
113 }
114 rb_name_error(id, "cannot make operator ID :%"PRIsVALUE" attrset",
115 rb_id2str(id));
116 }
117 else {
118 scope = id_type(id);
119 switch (scope) {
120 case ID_LOCAL: case ID_INSTANCE: case ID_GLOBAL:
121 case ID_CONST: case ID_CLASS: case ID_JUNK:
122 break;
123 case ID_ATTRSET:
124 return id;
125 default:
126 {
127 if ((str = lookup_id_str(id)) != 0) {
128 rb_name_error(id, "cannot make unknown type ID %d:%"PRIsVALUE" attrset",
129 scope, str);
130 }
131 else {
132 rb_name_error_str(Qnil, "cannot make unknown type anonymous ID %d:%"PRIxVALUE" attrset",
133 scope, (VALUE)id);
134 }
135 }
136 }
137 }
138
139 /* make new symbol and ID */
140 if (!(str = lookup_id_str(id))) {
141 static const char id_types[][8] = {
142 "local",
143 "instance",
144 "invalid",
145 "global",
146 "attrset",
147 "const",
148 "class",
149 "junk",
150 };
151 rb_name_error(id, "cannot make anonymous %.*s ID %"PRIxVALUE" attrset",
152 (int)sizeof(id_types[0]), id_types[scope], (VALUE)id);
153 }
154 str = rb_str_dup(str);
155 rb_str_cat(str, "=", 1);
156 sym = lookup_str_sym(str);
157 id = sym ? rb_sym2id(sym) : intern_str(str, 1);
158 return id;
159 }
160
161 ID
rb_id_attrget(ID id)162 rb_id_attrget(ID id)
163 {
164 return attrsetname_to_attr(rb_id2str(id));
165 }
166
167 static int
is_special_global_name(const char * m,const char * e,rb_encoding * enc)168 is_special_global_name(const char *m, const char *e, rb_encoding *enc)
169 {
170 int mb = 0;
171
172 if (m >= e) return 0;
173 if (is_global_name_punct(*m)) {
174 ++m;
175 }
176 else if (*m == '-') {
177 if (++m >= e) return 0;
178 if (is_identchar(m, e, enc)) {
179 if (!ISASCII(*m)) mb = 1;
180 m += rb_enc_mbclen(m, e, enc);
181 }
182 }
183 else {
184 if (!ISDIGIT(*m)) return 0;
185 do {
186 if (!ISASCII(*m)) mb = 1;
187 ++m;
188 } while (m < e && ISDIGIT(*m));
189 }
190 return m == e ? mb + 1 : 0;
191 }
192
193 int
rb_symname_p(const char * name)194 rb_symname_p(const char *name)
195 {
196 return rb_enc_symname_p(name, rb_ascii8bit_encoding());
197 }
198
199 int
rb_enc_symname_p(const char * name,rb_encoding * enc)200 rb_enc_symname_p(const char *name, rb_encoding *enc)
201 {
202 return rb_enc_symname2_p(name, strlen(name), enc);
203 }
204
205 static int
rb_sym_constant_char_p(const char * name,long nlen,rb_encoding * enc)206 rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc)
207 {
208 int c, len;
209 const char *end = name + nlen;
210
211 if (nlen < 1) return FALSE;
212 if (ISASCII(*name)) return ISUPPER(*name);
213 c = rb_enc_precise_mbclen(name, end, enc);
214 if (!MBCLEN_CHARFOUND_P(c)) return FALSE;
215 len = MBCLEN_CHARFOUND_LEN(c);
216 c = rb_enc_mbc_to_codepoint(name, end, enc);
217 if (ONIGENC_IS_UNICODE(enc)) {
218 static int ctype_titlecase = 0;
219 if (rb_enc_isupper(c, enc)) return TRUE;
220 if (rb_enc_islower(c, enc)) return FALSE;
221 if (!ctype_titlecase) {
222 static const UChar cname[] = "titlecaseletter";
223 static const UChar *const end = cname + sizeof(cname) - 1;
224 ctype_titlecase = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, cname, end);
225 }
226 if (rb_enc_isctype(c, ctype_titlecase, enc)) return TRUE;
227 }
228 else {
229 /* fallback to case-folding */
230 OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
231 const OnigUChar *beg = (const OnigUChar *)name;
232 int r = enc->mbc_case_fold(ONIGENC_CASE_FOLD,
233 &beg, (const OnigUChar *)end,
234 fold, enc);
235 if (r > 0 && (r != len || memcmp(fold, name, r)))
236 return TRUE;
237 }
238 return FALSE;
239 }
240
241 #define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST))
242 #define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET))
243
244 int
rb_enc_symname_type(const char * name,long len,rb_encoding * enc,unsigned int allowed_attrset)245 rb_enc_symname_type(const char *name, long len, rb_encoding *enc, unsigned int allowed_attrset)
246 {
247 const char *m = name;
248 const char *e = m + len;
249 int type = ID_JUNK;
250
251 if (!rb_enc_asciicompat(enc)) return -1;
252 if (!m || len <= 0) return -1;
253 switch (*m) {
254 case '\0':
255 return -1;
256
257 case '$':
258 type = ID_GLOBAL;
259 if (is_special_global_name(++m, e, enc)) return type;
260 goto id;
261
262 case '@':
263 type = ID_INSTANCE;
264 if (*++m == '@') {
265 ++m;
266 type = ID_CLASS;
267 }
268 goto id;
269
270 case '<':
271 switch (*++m) {
272 case '<': ++m; break;
273 case '=': if (*++m == '>') ++m; break;
274 default: break;
275 }
276 break;
277
278 case '>':
279 switch (*++m) {
280 case '>': case '=': ++m; break;
281 }
282 break;
283
284 case '=':
285 switch (*++m) {
286 case '~': ++m; break;
287 case '=': if (*++m == '=') ++m; break;
288 default: return -1;
289 }
290 break;
291
292 case '*':
293 if (*++m == '*') ++m;
294 break;
295
296 case '+': case '-':
297 if (*++m == '@') ++m;
298 break;
299
300 case '|': case '^': case '&': case '/': case '%': case '~': case '`':
301 ++m;
302 break;
303
304 case '[':
305 if (m[1] != ']') goto id;
306 ++m;
307 if (*++m == '=') ++m;
308 break;
309
310 case '!':
311 if (len == 1) return ID_JUNK;
312 switch (*++m) {
313 case '=': case '~': ++m; break;
314 default:
315 if (allowed_attrset & (1U << ID_JUNK)) goto id;
316 return -1;
317 }
318 break;
319
320 default:
321 type = rb_sym_constant_char_p(m, e-m, enc) ? ID_CONST : ID_LOCAL;
322 id:
323 if (m >= e || (*m != '_' && !ISALPHA(*m) && ISASCII(*m))) {
324 if (len > 1 && *(e-1) == '=') {
325 type = rb_enc_symname_type(name, len-1, enc, allowed_attrset);
326 if (type != ID_ATTRSET) return ID_ATTRSET;
327 }
328 return -1;
329 }
330 while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
331 if (m >= e) break;
332 switch (*m) {
333 case '!': case '?':
334 if (type == ID_GLOBAL || type == ID_CLASS || type == ID_INSTANCE) return -1;
335 type = ID_JUNK;
336 ++m;
337 if (m + 1 < e || *m != '=') break;
338 /* fall through */
339 case '=':
340 if (!(allowed_attrset & (1U << type))) return -1;
341 type = ID_ATTRSET;
342 ++m;
343 break;
344 }
345 break;
346 }
347 return m == e ? type : -1;
348 }
349
350 int
rb_enc_symname2_p(const char * name,long len,rb_encoding * enc)351 rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
352 {
353 return rb_enc_symname_type(name, len, enc, IDSET_ATTRSET_FOR_SYNTAX) != -1;
354 }
355
356 static int
rb_str_symname_type(VALUE name,unsigned int allowed_attrset)357 rb_str_symname_type(VALUE name, unsigned int allowed_attrset)
358 {
359 const char *ptr = StringValuePtr(name);
360 long len = RSTRING_LEN(name);
361 int type = rb_enc_symname_type(ptr, len, rb_enc_get(name), allowed_attrset);
362 RB_GC_GUARD(name);
363 return type;
364 }
365
366 static void
set_id_entry(rb_id_serial_t num,VALUE str,VALUE sym)367 set_id_entry(rb_id_serial_t num, VALUE str, VALUE sym)
368 {
369 size_t idx = num / ID_ENTRY_UNIT;
370 VALUE ary, ids = global_symbols.ids;
371 if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
372 ary = rb_ary_tmp_new(ID_ENTRY_UNIT * ID_ENTRY_SIZE);
373 rb_ary_store(ids, (long)idx, ary);
374 }
375 idx = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
376 rb_ary_store(ary, (long)idx + ID_ENTRY_STR, str);
377 rb_ary_store(ary, (long)idx + ID_ENTRY_SYM, sym);
378 }
379
380 static VALUE
get_id_serial_entry(rb_id_serial_t num,ID id,const enum id_entry_type t)381 get_id_serial_entry(rb_id_serial_t num, ID id, const enum id_entry_type t)
382 {
383 if (num && num <= global_symbols.last_id) {
384 size_t idx = num / ID_ENTRY_UNIT;
385 VALUE ids = global_symbols.ids;
386 VALUE ary;
387 if (idx < (size_t)RARRAY_LEN(ids) && !NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
388 long pos = (long)(num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
389 VALUE result = rb_ary_entry(ary, pos + t);
390 if (NIL_P(result)) return 0;
391 #if CHECK_ID_SERIAL
392 if (id) {
393 VALUE sym = result;
394 if (t != ID_ENTRY_SYM)
395 sym = rb_ary_entry(ary, pos + ID_ENTRY_SYM);
396 if (STATIC_SYM_P(sym)) {
397 if (STATIC_SYM2ID(sym) != id) return 0;
398 }
399 else {
400 if (RSYMBOL(sym)->id != id) return 0;
401 }
402 }
403 #endif
404 return result;
405 }
406 }
407 return 0;
408 }
409
410 static VALUE
get_id_entry(ID id,const enum id_entry_type t)411 get_id_entry(ID id, const enum id_entry_type t)
412 {
413 return get_id_serial_entry(rb_id_to_serial(id), id, t);
414 }
415
416 static inline ID
417 #ifdef __GNUC__
418 __attribute__((unused))
419 #endif
rb_id_serial_to_id(rb_id_serial_t num)420 rb_id_serial_to_id(rb_id_serial_t num)
421 {
422 if (is_notop_id((ID)num)) {
423 VALUE sym = get_id_serial_entry(num, 0, ID_ENTRY_SYM);
424 return SYM2ID(sym);
425 }
426 else {
427 return (ID)num;
428 }
429 }
430
431 #if SYMBOL_DEBUG
432 static int
register_sym_update_callback(st_data_t * key,st_data_t * value,st_data_t arg,int existing)433 register_sym_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
434 {
435 if (existing) {
436 rb_fatal("symbol :% "PRIsVALUE" is already registered with %"PRIxVALUE,
437 (VALUE)*key, (VALUE)*value);
438 }
439 *value = arg;
440 return ST_CONTINUE;
441 }
442 #endif
443
444 static void
register_sym(VALUE str,VALUE sym)445 register_sym(VALUE str, VALUE sym)
446 {
447 #if SYMBOL_DEBUG
448 st_update(global_symbols.str_sym, (st_data_t)str,
449 register_sym_update_callback, (st_data_t)sym);
450 #else
451 st_add_direct(global_symbols.str_sym, (st_data_t)str, (st_data_t)sym);
452 #endif
453 }
454
455 static void
unregister_sym(VALUE str,VALUE sym)456 unregister_sym(VALUE str, VALUE sym)
457 {
458 st_data_t str_data = (st_data_t)str;
459 if (!st_delete(global_symbols.str_sym, &str_data, NULL)) {
460 rb_bug("%p can't remove str from str_id (%s)", (void *)sym, RSTRING_PTR(str));
461 }
462 }
463
464 static ID
register_static_symid(ID id,const char * name,long len,rb_encoding * enc)465 register_static_symid(ID id, const char *name, long len, rb_encoding *enc)
466 {
467 VALUE str = rb_enc_str_new(name, len, enc);
468 return register_static_symid_str(id, str);
469 }
470
471 static ID
register_static_symid_str(ID id,VALUE str)472 register_static_symid_str(ID id, VALUE str)
473 {
474 rb_id_serial_t num = rb_id_to_serial(id);
475 VALUE sym = STATIC_ID2SYM(id);
476
477 OBJ_FREEZE(str);
478 str = rb_fstring(str);
479
480 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(str));
481
482 register_sym(str, sym);
483 set_id_entry(num, str, sym);
484
485 return id;
486 }
487
488 static int
sym_check_asciionly(VALUE str)489 sym_check_asciionly(VALUE str)
490 {
491 if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE;
492 switch (rb_enc_str_coderange(str)) {
493 case ENC_CODERANGE_BROKEN:
494 rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE,
495 rb_enc_name(rb_enc_get(str)), str);
496 case ENC_CODERANGE_7BIT:
497 return TRUE;
498 }
499 return FALSE;
500 }
501
502 #if 0
503 /*
504 * _str_ itself will be registered at the global symbol table. _str_
505 * can be modified before the registration, since the encoding will be
506 * set to ASCII-8BIT if it is a special global name.
507 */
508
509 static inline void
510 must_be_dynamic_symbol(VALUE x)
511 {
512 if (UNLIKELY(!DYNAMIC_SYM_P(x))) {
513 if (STATIC_SYM_P(x)) {
514 VALUE str = lookup_id_str(RSHIFT((unsigned long)(x),RUBY_SPECIAL_SHIFT));
515
516 if (str) {
517 rb_bug("wrong argument: %s (inappropriate Symbol)", RSTRING_PTR(str));
518 }
519 else {
520 rb_bug("wrong argument: inappropriate Symbol (%p)", (void *)x);
521 }
522 }
523 else {
524 rb_bug("wrong argument type %s (expected Symbol)", rb_builtin_class_name(x));
525 }
526 }
527 }
528 #endif
529
530 static VALUE
dsymbol_alloc(const VALUE klass,const VALUE str,rb_encoding * const enc,const ID type)531 dsymbol_alloc(const VALUE klass, const VALUE str, rb_encoding * const enc, const ID type)
532 {
533 const VALUE dsym = rb_newobj_of(klass, T_SYMBOL | FL_WB_PROTECTED);
534 long hashval;
535
536 rb_enc_set_index(dsym, rb_enc_to_index(enc));
537 OBJ_FREEZE(dsym);
538 RB_OBJ_WRITE(dsym, &RSYMBOL(dsym)->fstr, str);
539 RSYMBOL(dsym)->id = type;
540
541 /* we want hashval to be in Fixnum range [ruby-core:15713] r15672 */
542 hashval = (long)rb_str_hash(str);
543 RSYMBOL(dsym)->hashval = RSHIFT((long)hashval, 1);
544
545 register_sym(str, dsym);
546 rb_hash_aset(global_symbols.dsymbol_fstr_hash, str, Qtrue);
547
548 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(RSYMBOL(dsym)->fstr));
549
550 return dsym;
551 }
552
553 static inline VALUE
dsymbol_check(const VALUE sym)554 dsymbol_check(const VALUE sym)
555 {
556 if (UNLIKELY(rb_objspace_garbage_object_p(sym))) {
557 const VALUE fstr = RSYMBOL(sym)->fstr;
558 const ID type = RSYMBOL(sym)->id & ID_SCOPE_MASK;
559 RSYMBOL(sym)->fstr = 0;
560
561 unregister_sym(fstr, sym);
562 return dsymbol_alloc(rb_cSymbol, fstr, rb_enc_get(fstr), type);
563 }
564 else {
565 return sym;
566 }
567 }
568
569 static ID
lookup_str_id(VALUE str)570 lookup_str_id(VALUE str)
571 {
572 st_data_t sym_data;
573 if (st_lookup(global_symbols.str_sym, (st_data_t)str, &sym_data)) {
574 const VALUE sym = (VALUE)sym_data;
575
576 if (STATIC_SYM_P(sym)) {
577 return STATIC_SYM2ID(sym);
578 }
579 else if (DYNAMIC_SYM_P(sym)) {
580 ID id = RSYMBOL(sym)->id;
581 if (id & ~ID_SCOPE_MASK) return id;
582 }
583 else {
584 rb_bug("non-symbol object %s:%"PRIxVALUE" for %"PRIsVALUE" in symbol table",
585 rb_builtin_class_name(sym), sym, str);
586 }
587 }
588 return (ID)0;
589 }
590
591 static VALUE
lookup_str_sym(const VALUE str)592 lookup_str_sym(const VALUE str)
593 {
594 st_data_t sym_data;
595 if (st_lookup(global_symbols.str_sym, (st_data_t)str, &sym_data)) {
596 VALUE sym = (VALUE)sym_data;
597
598 if (DYNAMIC_SYM_P(sym)) {
599 sym = dsymbol_check(sym);
600 }
601 return sym;
602 }
603 else {
604 return (VALUE)0;
605 }
606 }
607
608 static VALUE
lookup_id_str(ID id)609 lookup_id_str(ID id)
610 {
611 return get_id_entry(id, ID_ENTRY_STR);
612 }
613
614 ID
rb_intern3(const char * name,long len,rb_encoding * enc)615 rb_intern3(const char *name, long len, rb_encoding *enc)
616 {
617 VALUE sym;
618 struct RString fake_str;
619 VALUE str = rb_setup_fake_str(&fake_str, name, len, enc);
620 OBJ_FREEZE(str);
621
622 sym = lookup_str_sym(str);
623 if (sym) return rb_sym2id(sym);
624 str = rb_enc_str_new(name, len, enc); /* make true string */
625 return intern_str(str, 1);
626 }
627
628 static ID
next_id_base(void)629 next_id_base(void)
630 {
631 rb_id_serial_t next_serial = global_symbols.last_id + 1;
632
633 if (next_serial == 0) {
634 return (ID)-1;
635 }
636 else {
637 const size_t num = ++global_symbols.last_id;
638 return num << ID_SCOPE_SHIFT;
639 }
640 }
641
642 static ID
intern_str(VALUE str,int mutable)643 intern_str(VALUE str, int mutable)
644 {
645 ID id;
646 ID nid;
647
648 id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
649 if (id == (ID)-1) id = ID_JUNK;
650 if (sym_check_asciionly(str)) {
651 if (!mutable) str = rb_str_dup(str);
652 rb_enc_associate(str, rb_usascii_encoding());
653 }
654 if ((nid = next_id_base()) == (ID)-1) {
655 str = rb_str_ellipsize(str, 20);
656 rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %"PRIsVALUE")",
657 str);
658 }
659 id |= nid;
660 id |= ID_STATIC_SYM;
661 return register_static_symid_str(id, str);
662 }
663
664 ID
rb_intern2(const char * name,long len)665 rb_intern2(const char *name, long len)
666 {
667 return rb_intern3(name, len, rb_usascii_encoding());
668 }
669
670 #undef rb_intern
671 ID
rb_intern(const char * name)672 rb_intern(const char *name)
673 {
674 return rb_intern2(name, strlen(name));
675 }
676
677 ID
rb_intern_str(VALUE str)678 rb_intern_str(VALUE str)
679 {
680 VALUE sym = lookup_str_sym(str);
681
682 if (sym) {
683 return SYM2ID(sym);
684 }
685
686 return intern_str(str, 0);
687 }
688
689 void
rb_gc_free_dsymbol(VALUE sym)690 rb_gc_free_dsymbol(VALUE sym)
691 {
692 VALUE str = RSYMBOL(sym)->fstr;
693
694 if (str) {
695 RSYMBOL(sym)->fstr = 0;
696 unregister_sym(str, sym);
697 rb_hash_delete_entry(global_symbols.dsymbol_fstr_hash, str);
698 }
699 }
700
701 /*
702 * call-seq:
703 * str.intern -> symbol
704 * str.to_sym -> symbol
705 *
706 * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
707 * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
708 *
709 * "Koala".intern #=> :Koala
710 * s = 'cat'.to_sym #=> :cat
711 * s == :cat #=> true
712 * s = '@cat'.to_sym #=> :@cat
713 * s == :@cat #=> true
714 *
715 * This can also be used to create symbols that cannot be represented using the
716 * <code>:xxx</code> notation.
717 *
718 * 'cat and dog'.to_sym #=> :"cat and dog"
719 */
720
721 VALUE
rb_str_intern(VALUE str)722 rb_str_intern(VALUE str)
723 {
724 #if USE_SYMBOL_GC
725 rb_encoding *enc, *ascii;
726 int type;
727 #else
728 ID id;
729 #endif
730 VALUE sym = lookup_str_sym(str);
731
732 if (sym) {
733 return sym;
734 }
735
736 #if USE_SYMBOL_GC
737 enc = rb_enc_get(str);
738 ascii = rb_usascii_encoding();
739 if (enc != ascii && sym_check_asciionly(str)) {
740 str = rb_str_dup(str);
741 rb_enc_associate(str, ascii);
742 OBJ_FREEZE(str);
743 enc = ascii;
744 }
745 else {
746 str = rb_str_dup(str);
747 OBJ_FREEZE(str);
748 }
749 str = rb_fstring(str);
750 type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
751 if (type < 0) type = ID_JUNK;
752 return dsymbol_alloc(rb_cSymbol, str, enc, type);
753 #else
754 id = intern_str(str, 0);
755 return ID2SYM(id);
756 #endif
757 }
758
759 ID
rb_sym2id(VALUE sym)760 rb_sym2id(VALUE sym)
761 {
762 ID id;
763 if (STATIC_SYM_P(sym)) {
764 id = STATIC_SYM2ID(sym);
765 }
766 else if (DYNAMIC_SYM_P(sym)) {
767 sym = dsymbol_check(sym);
768 id = RSYMBOL(sym)->id;
769 if (UNLIKELY(!(id & ~ID_SCOPE_MASK))) {
770 VALUE fstr = RSYMBOL(sym)->fstr;
771 ID num = next_id_base();
772
773 RSYMBOL(sym)->id = id |= num;
774 /* make it permanent object */
775 set_id_entry(rb_id_to_serial(num), fstr, sym);
776 rb_hash_delete_entry(global_symbols.dsymbol_fstr_hash, fstr);
777 }
778 }
779 else {
780 rb_raise(rb_eTypeError, "wrong argument type %s (expected Symbol)",
781 rb_builtin_class_name(sym));
782 }
783 return id;
784 }
785
786 #undef rb_id2sym
787 VALUE
rb_id2sym(ID x)788 rb_id2sym(ID x)
789 {
790 if (!DYNAMIC_ID_P(x)) return STATIC_ID2SYM(x);
791 return get_id_entry(x, ID_ENTRY_SYM);
792 }
793
794
795 VALUE
rb_sym2str(VALUE sym)796 rb_sym2str(VALUE sym)
797 {
798 if (DYNAMIC_SYM_P(sym)) {
799 return RSYMBOL(sym)->fstr;
800 }
801 else {
802 return rb_id2str(STATIC_SYM2ID(sym));
803 }
804 }
805
806 VALUE
rb_id2str(ID id)807 rb_id2str(ID id)
808 {
809 return lookup_id_str(id);
810 }
811
812 const char *
rb_id2name(ID id)813 rb_id2name(ID id)
814 {
815 VALUE str = rb_id2str(id);
816
817 if (!str) return 0;
818 return RSTRING_PTR(str);
819 }
820
821 ID
rb_make_internal_id(void)822 rb_make_internal_id(void)
823 {
824 return next_id_base() | ID_INTERNAL | ID_STATIC_SYM;
825 }
826
827 static int
symbols_i(st_data_t key,st_data_t value,st_data_t arg)828 symbols_i(st_data_t key, st_data_t value, st_data_t arg)
829 {
830 VALUE ary = (VALUE)arg;
831 VALUE sym = (VALUE)value;
832
833 if (STATIC_SYM_P(sym)) {
834 rb_ary_push(ary, sym);
835 return ST_CONTINUE;
836 }
837 else if (!DYNAMIC_SYM_P(sym)) {
838 rb_bug("invalid symbol: %s", RSTRING_PTR((VALUE)key));
839 }
840 else if (!SYMBOL_PINNED_P(sym) && rb_objspace_garbage_object_p(sym)) {
841 RSYMBOL(sym)->fstr = 0;
842 return ST_DELETE;
843 }
844 else {
845 rb_ary_push(ary, sym);
846 return ST_CONTINUE;
847 }
848
849 }
850
851 /*
852 * call-seq:
853 * Symbol.all_symbols => array
854 *
855 * Returns an array of all the symbols currently in Ruby's symbol
856 * table.
857 *
858 * Symbol.all_symbols.size #=> 903
859 * Symbol.all_symbols[1,20] #=> [:floor, :ARGV, :Binding, :symlink,
860 * :chown, :EOFError, :$;, :String,
861 * :LOCK_SH, :"setuid?", :$<,
862 * :default_proc, :compact, :extend,
863 * :Tms, :getwd, :$=, :ThreadGroup,
864 * :wait2, :$>]
865 */
866
867 VALUE
rb_sym_all_symbols(void)868 rb_sym_all_symbols(void)
869 {
870 VALUE ary = rb_ary_new2(global_symbols.str_sym->num_entries);
871 st_foreach(global_symbols.str_sym, symbols_i, ary);
872 return ary;
873 }
874
875 size_t
rb_sym_immortal_count(void)876 rb_sym_immortal_count(void)
877 {
878 return (size_t)global_symbols.last_id;
879 }
880
881 int
rb_is_const_id(ID id)882 rb_is_const_id(ID id)
883 {
884 return is_const_id(id);
885 }
886
887 int
rb_is_class_id(ID id)888 rb_is_class_id(ID id)
889 {
890 return is_class_id(id);
891 }
892
893 int
rb_is_global_id(ID id)894 rb_is_global_id(ID id)
895 {
896 return is_global_id(id);
897 }
898
899 int
rb_is_instance_id(ID id)900 rb_is_instance_id(ID id)
901 {
902 return is_instance_id(id);
903 }
904
905 int
rb_is_attrset_id(ID id)906 rb_is_attrset_id(ID id)
907 {
908 return is_attrset_id(id);
909 }
910
911 int
rb_is_local_id(ID id)912 rb_is_local_id(ID id)
913 {
914 return is_local_id(id);
915 }
916
917 int
rb_is_junk_id(ID id)918 rb_is_junk_id(ID id)
919 {
920 return is_junk_id(id);
921 }
922
923 int
rb_is_const_sym(VALUE sym)924 rb_is_const_sym(VALUE sym)
925 {
926 return is_const_sym(sym);
927 }
928
929 int
rb_is_class_sym(VALUE sym)930 rb_is_class_sym(VALUE sym)
931 {
932 return is_class_sym(sym);
933 }
934
935 int
rb_is_global_sym(VALUE sym)936 rb_is_global_sym(VALUE sym)
937 {
938 return is_global_sym(sym);
939 }
940
941 int
rb_is_instance_sym(VALUE sym)942 rb_is_instance_sym(VALUE sym)
943 {
944 return is_instance_sym(sym);
945 }
946
947 int
rb_is_attrset_sym(VALUE sym)948 rb_is_attrset_sym(VALUE sym)
949 {
950 return is_attrset_sym(sym);
951 }
952
953 int
rb_is_local_sym(VALUE sym)954 rb_is_local_sym(VALUE sym)
955 {
956 return is_local_sym(sym);
957 }
958
959 int
rb_is_junk_sym(VALUE sym)960 rb_is_junk_sym(VALUE sym)
961 {
962 return is_junk_sym(sym);
963 }
964
965 /**
966 * Returns ID for the given name if it is interned already, or 0.
967 *
968 * \param namep the pointer to the name object
969 * \return the ID for *namep
970 * \pre the object referred by \p namep must be a Symbol or
971 * a String, or possible to convert with to_str method.
972 * \post the object referred by \p namep is a Symbol or a
973 * String if non-zero value is returned, or is a String
974 * if 0 is returned.
975 */
976 ID
rb_check_id(volatile VALUE * namep)977 rb_check_id(volatile VALUE *namep)
978 {
979 VALUE tmp;
980 VALUE name = *namep;
981
982 if (STATIC_SYM_P(name)) {
983 return STATIC_SYM2ID(name);
984 }
985 else if (DYNAMIC_SYM_P(name)) {
986 if (SYMBOL_PINNED_P(name)) {
987 return RSYMBOL(name)->id;
988 }
989 else {
990 *namep = RSYMBOL(name)->fstr;
991 return 0;
992 }
993 }
994 else if (!RB_TYPE_P(name, T_STRING)) {
995 tmp = rb_check_string_type(name);
996 if (NIL_P(tmp)) {
997 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
998 name);
999 }
1000 name = tmp;
1001 *namep = name;
1002 }
1003
1004 sym_check_asciionly(name);
1005
1006 return lookup_str_id(name);
1007 }
1008
1009 VALUE
rb_check_symbol(volatile VALUE * namep)1010 rb_check_symbol(volatile VALUE *namep)
1011 {
1012 VALUE sym;
1013 VALUE tmp;
1014 VALUE name = *namep;
1015
1016 if (STATIC_SYM_P(name)) {
1017 return name;
1018 }
1019 else if (DYNAMIC_SYM_P(name)) {
1020 if (!SYMBOL_PINNED_P(name)) {
1021 name = dsymbol_check(name);
1022 *namep = name;
1023 }
1024 return name;
1025 }
1026 else if (!RB_TYPE_P(name, T_STRING)) {
1027 tmp = rb_check_string_type(name);
1028 if (NIL_P(tmp)) {
1029 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1030 name);
1031 }
1032 name = tmp;
1033 *namep = name;
1034 }
1035
1036 sym_check_asciionly(name);
1037
1038 if ((sym = lookup_str_sym(name)) != 0) {
1039 return sym;
1040 }
1041
1042 return Qnil;
1043 }
1044
1045 ID
rb_check_id_cstr(const char * ptr,long len,rb_encoding * enc)1046 rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
1047 {
1048 struct RString fake_str;
1049 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1050
1051 sym_check_asciionly(name);
1052
1053 return lookup_str_id(name);
1054 }
1055
1056 VALUE
rb_check_symbol_cstr(const char * ptr,long len,rb_encoding * enc)1057 rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
1058 {
1059 VALUE sym;
1060 struct RString fake_str;
1061 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1062
1063 sym_check_asciionly(name);
1064
1065 if ((sym = lookup_str_sym(name)) != 0) {
1066 return sym;
1067 }
1068
1069 return Qnil;
1070 }
1071
1072 #undef rb_sym_intern_cstr
1073 #undef rb_sym_intern_ascii_cstr
1074 #ifdef __clang__
1075 NOINLINE(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1076 #else
1077 FUNC_MINIMIZED(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1078 FUNC_MINIMIZED(VALUE rb_sym_intern_cstr(const char *ptr, rb_encoding *enc));
1079 FUNC_MINIMIZED(VALUE rb_sym_intern_ascii(const char *ptr, long len));
1080 FUNC_MINIMIZED(VALUE rb_sym_intern_ascii_cstr(const char *ptr));
1081 #endif
1082
1083 VALUE
rb_sym_intern(const char * ptr,long len,rb_encoding * enc)1084 rb_sym_intern(const char *ptr, long len, rb_encoding *enc)
1085 {
1086 struct RString fake_str;
1087 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1088 return rb_str_intern(name);
1089 }
1090
1091 VALUE
rb_sym_intern_cstr(const char * ptr,rb_encoding * enc)1092 rb_sym_intern_cstr(const char *ptr, rb_encoding *enc)
1093 {
1094 return rb_sym_intern(ptr, strlen(ptr), enc);
1095 }
1096
1097 VALUE
rb_sym_intern_ascii(const char * ptr,long len)1098 rb_sym_intern_ascii(const char *ptr, long len)
1099 {
1100 return rb_sym_intern(ptr, len, rb_usascii_encoding());
1101 }
1102
1103 VALUE
rb_sym_intern_ascii_cstr(const char * ptr)1104 rb_sym_intern_ascii_cstr(const char *ptr)
1105 {
1106 return rb_sym_intern_ascii(ptr, strlen(ptr));
1107 }
1108
1109 VALUE
rb_to_symbol_type(VALUE obj)1110 rb_to_symbol_type(VALUE obj)
1111 {
1112 return rb_convert_type_with_id(obj, T_SYMBOL, "Symbol", idTo_sym);
1113 }
1114
1115 static ID
attrsetname_to_attr_id(VALUE name)1116 attrsetname_to_attr_id(VALUE name)
1117 {
1118 ID id;
1119 struct RString fake_str;
1120 /* make local name by chopping '=' */
1121 const VALUE localname = rb_setup_fake_str(&fake_str,
1122 RSTRING_PTR(name), RSTRING_LEN(name) - 1,
1123 rb_enc_get(name));
1124 OBJ_FREEZE(localname);
1125
1126 if ((id = lookup_str_id(localname)) != 0) {
1127 return id;
1128 }
1129 RB_GC_GUARD(name);
1130 return (ID)0;
1131 }
1132
1133 static ID
attrsetname_to_attr(VALUE name)1134 attrsetname_to_attr(VALUE name)
1135 {
1136 if (rb_is_attrset_name(name)) {
1137 return attrsetname_to_attr_id(name);
1138 }
1139
1140 return (ID)0;
1141 }
1142
1143 int
rb_is_const_name(VALUE name)1144 rb_is_const_name(VALUE name)
1145 {
1146 return rb_str_symname_type(name, 0) == ID_CONST;
1147 }
1148
1149 int
rb_is_class_name(VALUE name)1150 rb_is_class_name(VALUE name)
1151 {
1152 return rb_str_symname_type(name, 0) == ID_CLASS;
1153 }
1154
1155 int
rb_is_global_name(VALUE name)1156 rb_is_global_name(VALUE name)
1157 {
1158 return rb_str_symname_type(name, 0) == ID_GLOBAL;
1159 }
1160
1161 int
rb_is_instance_name(VALUE name)1162 rb_is_instance_name(VALUE name)
1163 {
1164 return rb_str_symname_type(name, 0) == ID_INSTANCE;
1165 }
1166
1167 int
rb_is_attrset_name(VALUE name)1168 rb_is_attrset_name(VALUE name)
1169 {
1170 return rb_str_symname_type(name, IDSET_ATTRSET_FOR_INTERN) == ID_ATTRSET;
1171 }
1172
1173 int
rb_is_local_name(VALUE name)1174 rb_is_local_name(VALUE name)
1175 {
1176 return rb_str_symname_type(name, 0) == ID_LOCAL;
1177 }
1178
1179 int
rb_is_method_name(VALUE name)1180 rb_is_method_name(VALUE name)
1181 {
1182 switch (rb_str_symname_type(name, 0)) {
1183 case ID_LOCAL: case ID_ATTRSET: case ID_JUNK:
1184 return TRUE;
1185 }
1186 return FALSE;
1187 }
1188
1189 int
rb_is_junk_name(VALUE name)1190 rb_is_junk_name(VALUE name)
1191 {
1192 return rb_str_symname_type(name, IDSET_ATTRSET_FOR_SYNTAX) == -1;
1193 }
1194
1195 #include "id_table.c"
1196