1 /**********************************************************************
2 
3   hash.c -
4 
5   $Author: usa $
6   created at: Mon Nov 22 18:51:18 JST 1993
7 
8   Copyright (C) 1993-2007 Yukihiro Matsumoto
9   Copyright (C) 2000  Network Applied Communication Laboratory, Inc.
10   Copyright (C) 2000  Information-technology Promotion Agency, Japan
11 
12 **********************************************************************/
13 
14 #include "ruby/encoding.h"
15 #include "ruby/st.h"
16 #include "ruby/util.h"
17 #include "internal.h"
18 #include <errno.h>
19 #include "probes.h"
20 #include "id.h"
21 #include "symbol.h"
22 #include "gc.h"
23 #include "debug_counter.h"
24 #include "transient_heap.h"
25 #include "ruby_assert.h"
26 #ifdef __APPLE__
27 # ifdef HAVE_CRT_EXTERNS_H
28 #  include <crt_externs.h>
29 # else
30 #  include "missing/crt_externs.h"
31 # endif
32 #endif
33 
34 #ifndef HASH_DEBUG
35 #define HASH_DEBUG 0
36 #endif
37 
38 #define HAS_EXTRA_STATES(hash, klass) ( \
39     ((klass = has_extra_methods(rb_obj_class(hash))) != 0) || \
40     FL_TEST((hash), FL_EXIVAR|FL_TAINT|HASH_PROC_DEFAULT) || \
41     !NIL_P(RHASH_IFNONE(hash)))
42 
43 #define SET_DEFAULT(hash, ifnone) ( \
44     FL_UNSET_RAW(hash, HASH_PROC_DEFAULT), \
45     RHASH_SET_IFNONE(hash, ifnone))
46 
47 #define SET_PROC_DEFAULT(hash, proc) set_proc_default(hash, proc)
48 
49 #define COPY_DEFAULT(hash, hash2) copy_default(RHASH(hash), RHASH(hash2))
50 
51 static inline void
copy_default(struct RHash * hash,const struct RHash * hash2)52 copy_default(struct RHash *hash, const struct RHash *hash2)
53 {
54     hash->basic.flags &= ~HASH_PROC_DEFAULT;
55     hash->basic.flags |= hash2->basic.flags & HASH_PROC_DEFAULT;
56     RHASH_SET_IFNONE(hash, RHASH_IFNONE(hash2));
57 }
58 
59 static VALUE
has_extra_methods(VALUE klass)60 has_extra_methods(VALUE klass)
61 {
62     const VALUE base = rb_cHash;
63     VALUE c = klass;
64     while (c != base) {
65 	if (rb_class_has_methods(c)) return klass;
66 	c = RCLASS_SUPER(c);
67     }
68     return 0;
69 }
70 
71 static VALUE rb_hash_s_try_convert(VALUE, VALUE);
72 
73 /*
74  * Hash WB strategy:
75  *  1. Check mutate st_* functions
76  *     * st_insert()
77  *     * st_insert2()
78  *     * st_update()
79  *     * st_add_direct()
80  *  2. Insert WBs
81  */
82 
83 VALUE
rb_hash_freeze(VALUE hash)84 rb_hash_freeze(VALUE hash)
85 {
86     return rb_obj_freeze(hash);
87 }
88 
89 VALUE rb_cHash;
90 
91 static VALUE envtbl;
92 static ID id_hash, id_yield, id_default, id_flatten_bang;
93 
94 VALUE
rb_hash_set_ifnone(VALUE hash,VALUE ifnone)95 rb_hash_set_ifnone(VALUE hash, VALUE ifnone)
96 {
97     RB_OBJ_WRITE(hash, (&RHASH(hash)->ifnone), ifnone);
98     return hash;
99 }
100 
101 static int
rb_any_cmp(VALUE a,VALUE b)102 rb_any_cmp(VALUE a, VALUE b)
103 {
104     if (a == b) return 0;
105     if (FIXNUM_P(a) && FIXNUM_P(b)) {
106 	return a != b;
107     }
108     if (RB_TYPE_P(a, T_STRING) && RBASIC(a)->klass == rb_cString &&
109 	RB_TYPE_P(b, T_STRING) && RBASIC(b)->klass == rb_cString) {
110 	return rb_str_hash_cmp(a, b);
111     }
112     if (a == Qundef || b == Qundef) return -1;
113     if (SYMBOL_P(a) && SYMBOL_P(b)) {
114 	return a != b;
115     }
116 
117     return !rb_eql(a, b);
118 }
119 
120 static VALUE
hash_recursive(VALUE obj,VALUE arg,int recurse)121 hash_recursive(VALUE obj, VALUE arg, int recurse)
122 {
123     if (recurse) return INT2FIX(0);
124     return rb_funcallv(obj, id_hash, 0, 0);
125 }
126 
127 VALUE
rb_hash(VALUE obj)128 rb_hash(VALUE obj)
129 {
130     VALUE hval = rb_exec_recursive_outer(hash_recursive, obj, 0);
131 
132     while (!FIXNUM_P(hval)) {
133         if (RB_TYPE_P(hval, T_BIGNUM)) {
134             int sign;
135             unsigned long ul;
136             sign = rb_integer_pack(hval, &ul, 1, sizeof(ul), 0,
137                     INTEGER_PACK_NATIVE_BYTE_ORDER);
138             ul &= (1UL << (sizeof(long)*CHAR_BIT-1)) - 1;
139             if (sign < 0)
140                 return LONG2FIX(-(long)ul);
141             return LONG2FIX((long)ul);
142         }
143 	hval = rb_to_int(hval);
144     }
145     return hval;
146 }
147 
148 long rb_objid_hash(st_index_t index);
149 
150 long
rb_dbl_long_hash(double d)151 rb_dbl_long_hash(double d)
152 {
153     /* normalize -0.0 to 0.0 */
154     if (d == 0.0) d = 0.0;
155 #if SIZEOF_INT == SIZEOF_VOIDP
156     return rb_memhash(&d, sizeof(d));
157 #else
158     {
159 	union {double d; uint64_t i;} u;
160 
161 	u.d = d;
162 	return rb_objid_hash(rb_hash_start(u.i));
163     }
164 #endif
165 }
166 
167 static inline long
any_hash(VALUE a,st_index_t (* other_func)(VALUE))168 any_hash(VALUE a, st_index_t (*other_func)(VALUE))
169 {
170     VALUE hval;
171     st_index_t hnum;
172 
173     if (SPECIAL_CONST_P(a)) {
174 	if (STATIC_SYM_P(a)) {
175 	    hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
176 	    hnum = rb_hash_start(hnum);
177 	    goto out;
178 	}
179 	else if (FLONUM_P(a)) {
180 	    /* prevent pathological behavior: [Bug #10761] */
181 	    goto flt;
182 	}
183 	hnum = rb_objid_hash((st_index_t)a);
184     }
185     else if (BUILTIN_TYPE(a) == T_STRING) {
186 	hnum = rb_str_hash(a);
187     }
188     else if (BUILTIN_TYPE(a) == T_SYMBOL) {
189 	hnum = RSYMBOL(a)->hashval;
190     }
191     else if (BUILTIN_TYPE(a) == T_BIGNUM) {
192 	hval = rb_big_hash(a);
193 	hnum = FIX2LONG(hval);
194     }
195     else if (BUILTIN_TYPE(a) == T_FLOAT) {
196       flt:
197 	hnum = rb_dbl_long_hash(rb_float_value(a));
198     }
199     else {
200 	hnum = other_func(a);
201     }
202   out:
203 #if SIZEOF_LONG < SIZEOF_ST_INDEX_T
204     if (hnum > 0)
205 	hnum &= (unsigned long)-1 >> 2;
206     else
207 	hnum |= ~((unsigned long)-1 >> 2);
208 #else
209     hnum <<= 1;
210     hnum = RSHIFT(hnum, 1);
211 #endif
212     return (long)hnum;
213 }
214 
215 static st_index_t
obj_any_hash(VALUE obj)216 obj_any_hash(VALUE obj)
217 {
218     obj = rb_hash(obj);
219     return FIX2LONG(obj);
220 }
221 
222 static st_index_t
rb_any_hash(VALUE a)223 rb_any_hash(VALUE a)
224 {
225     return any_hash(a, obj_any_hash);
226 }
227 
228 /* Here is a hash function for 64-bit key.  It is about 5 times faster
229    (2 times faster when uint128 type is absent) on Haswell than
230    tailored Spooky or City hash function can be.  */
231 
232 /* Here we two primes with random bit generation.  */
233 static const uint64_t prime1 = ((uint64_t)0x2e0bb864 << 32) | 0xe9ea7df5;
234 static const uint32_t prime2 = 0x830fcab9;
235 
236 
237 static inline uint64_t
mult_and_mix(uint64_t m1,uint64_t m2)238 mult_and_mix(uint64_t m1, uint64_t m2)
239 {
240 #if defined HAVE_UINT128_T
241     uint128_t r = (uint128_t) m1 * (uint128_t) m2;
242     return (uint64_t) (r >> 64) ^ (uint64_t) r;
243 #else
244     uint64_t hm1 = m1 >> 32, hm2 = m2 >> 32;
245     uint64_t lm1 = m1, lm2 = m2;
246     uint64_t v64_128 = hm1 * hm2;
247     uint64_t v32_96 = hm1 * lm2 + lm1 * hm2;
248     uint64_t v1_32 = lm1 * lm2;
249 
250     return (v64_128 + (v32_96 >> 32)) ^ ((v32_96 << 32) + v1_32);
251 #endif
252 }
253 
254 static inline uint64_t
key64_hash(uint64_t key,uint32_t seed)255 key64_hash(uint64_t key, uint32_t seed)
256 {
257     return mult_and_mix(key + seed, prime1);
258 }
259 
260 long
rb_objid_hash(st_index_t index)261 rb_objid_hash(st_index_t index)
262 {
263     return (long)key64_hash(rb_hash_start(index), prime2);
264 }
265 
266 static st_index_t
objid_hash(VALUE obj)267 objid_hash(VALUE obj)
268 {
269     return rb_objid_hash((st_index_t)obj);
270 }
271 
272 VALUE
rb_obj_hash(VALUE obj)273 rb_obj_hash(VALUE obj)
274 {
275     long hnum = any_hash(obj, objid_hash);
276     return ST2FIX(hnum);
277 }
278 
279 static const struct st_hash_type objhash = {
280     rb_any_cmp,
281     rb_any_hash,
282 };
283 
284 #define rb_ident_cmp st_numcmp
285 
286 static st_index_t
rb_ident_hash(st_data_t n)287 rb_ident_hash(st_data_t n)
288 {
289 #ifdef USE_FLONUM /* RUBY */
290     /*
291      * - flonum (on 64-bit) is pathologically bad, mix the actual
292      *   float value in, but do not use the float value as-is since
293      *   many integers get interpreted as 2.0 or -2.0 [Bug #10761]
294      */
295     if (FLONUM_P(n)) {
296         union { double d; st_data_t i; } u;
297         u.d = rb_float_value(n);
298         n ^= u.i;
299     }
300 #endif
301 
302     return (st_index_t)key64_hash(rb_hash_start((st_index_t)n), prime2);
303 }
304 
305 static const struct st_hash_type identhash = {
306     rb_ident_cmp,
307     rb_ident_hash,
308 };
309 
310 #define EQUAL(x,y) ((x) == (y) || (*objhash.compare)((x),(y)) == 0)
311 #define PTR_EQUAL(ptr, hash_val, key_) \
312     ((ptr)->hash == (hash_val) && EQUAL((key_), (ptr)->key))
313 
314 #define RESERVED_HASH_VAL (~(st_hash_t) 0)
315 #define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
316 
317 #define SET_KEY(entry, _key) (entry)->key = (_key)
318 #define SET_HASH(entry, _hash) (entry)->hash = (_hash)
319 #define SET_RECORD(entry, _value) (entry)->record = (_value)
320 
321 typedef st_data_t st_hash_t;
322 extern const st_hash_t st_reserved_hash_val;
323 extern const st_hash_t st_reserved_hash_substitution_val;
324 
325 static inline st_hash_t
do_hash(st_data_t key)326 do_hash(st_data_t key)
327 {
328     st_hash_t hash = (st_hash_t)(*objhash.hash)(key);
329     return (RESERVED_HASH_VAL == hash) ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
330 }
331 
332 static inline void
set_entry(ar_table_entry * entry,st_data_t key,st_data_t val,st_hash_t hash)333 set_entry(ar_table_entry *entry, st_data_t key, st_data_t val, st_hash_t hash)
334 {
335     SET_HASH(entry, hash);
336     SET_KEY(entry, key);
337     SET_RECORD(entry, val);
338 }
339 
340 static inline void
clear_entry(ar_table_entry * entry)341 clear_entry(ar_table_entry* entry)
342 {
343     SET_KEY(entry, Qundef);
344     SET_RECORD(entry, Qundef);
345     SET_HASH(entry, RESERVED_HASH_VAL);
346 }
347 
348 static inline int
empty_entry(ar_table_entry * entry)349 empty_entry(ar_table_entry *entry)
350 {
351     return entry->hash == RESERVED_HASH_VAL;
352 }
353 
354 #define RHASH_AR_TABLE_SIZE(h) (HASH_ASSERT(RHASH_AR_TABLE_P(h)), \
355                              RHASH_AR_TABLE_SIZE_RAW(h))
356 
357 #define RHASH_AR_TABLE_BOUND_RAW(h) \
358   ((unsigned int)((RBASIC(h)->flags >> RHASH_AR_TABLE_BOUND_SHIFT) & \
359                   (RHASH_AR_TABLE_BOUND_MASK >> RHASH_AR_TABLE_BOUND_SHIFT)))
360 
361 #define RHASH_AR_TABLE_BOUND(h) (HASH_ASSERT(RHASH_AR_TABLE_P(h)), \
362                               RHASH_AR_TABLE_BOUND_RAW(h))
363 
364 #define RHASH_ST_TABLE_SET(h, s)  rb_hash_st_table_set(h, s)
365 #define RHASH_TYPE(hash) (RHASH_AR_TABLE_P(hash) ? &objhash : RHASH_ST_TABLE(hash)->type)
366 #define RHASH_AR_TABLE_REF(hash, n) (&RHASH_AR_TABLE(hash)->entries[n])
367 
368 #if HASH_DEBUG
369 #define hash_verify(hash) hash_verify_(hash, __FILE__, __LINE__)
370 #define HASH_ASSERT(expr) RUBY_ASSERT_MESG_WHEN(1, expr, #expr)
371 
372 void
rb_hash_dump(VALUE hash)373 rb_hash_dump(VALUE hash)
374 {
375     rb_obj_info_dump(hash);
376 
377     if (RHASH_AR_TABLE_P(hash)) {
378         unsigned i, n = 0, bound = RHASH_AR_TABLE_BOUND(hash);
379 
380         fprintf(stderr, "  size:%u bound:%u\n",
381                 RHASH_AR_TABLE_SIZE(hash), RHASH_AR_TABLE_BOUND(hash));
382 
383         for (i=0; i<bound; i++) {
384             ar_table_entry *cur_entry = RHASH_AR_TABLE_REF(hash, i);
385             st_data_t k, v;
386 
387             if (!empty_entry(cur_entry)) {
388                 char b1[0x100], b2[0x100];
389                 /* h = cur_entry->hash; */
390                 k = cur_entry->key;
391                 v = cur_entry->record;
392                 fprintf(stderr, "  %d key:%s val:%s\n", i,
393                         rb_raw_obj_info(b1, 0x100, k),
394                         rb_raw_obj_info(b2, 0x100, v));
395                 n++;
396             }
397             else {
398                 fprintf(stderr, "  %d empty\n", i);
399             }
400         }
401     }
402 }
403 
404 static VALUE
hash_verify_(VALUE hash,const char * file,int line)405 hash_verify_(VALUE hash, const char *file, int line)
406 {
407     HASH_ASSERT(RB_TYPE_P(hash, T_HASH));
408 
409     if (RHASH_AR_TABLE_P(hash)) {
410         unsigned i, n = 0, bound = RHASH_AR_TABLE_BOUND(hash);
411 
412         for (i=0; i<bound; i++) {
413             ar_table_entry *cur_entry = RHASH_AR_TABLE_REF(hash, i);
414             st_data_t h, k, v;
415             if (!empty_entry(cur_entry)) {
416                 h = cur_entry->hash;
417                 k = cur_entry->key;
418                 v = cur_entry->record;
419                 HASH_ASSERT(h != RESERVED_HASH_VAL);
420                 HASH_ASSERT(k != Qundef);
421                 HASH_ASSERT(v != Qundef);
422                 n++;
423             }
424         }
425         if (n != RHASH_AR_TABLE_SIZE(hash)) {
426             rb_bug("n:%u, RHASH_AR_TABLE_SIZE:%u", n, RHASH_AR_TABLE_SIZE(hash));
427         }
428     }
429     else {
430         HASH_ASSERT(RHASH_ST_TABLE(hash) != NULL);
431         HASH_ASSERT(RHASH_AR_TABLE_SIZE_RAW(hash) == 0);
432         HASH_ASSERT(RHASH_AR_TABLE_BOUND_RAW(hash) == 0);
433     }
434 
435     if (RHASH_TRANSIENT_P(hash)) {
436         volatile st_data_t MAYBE_UNUSED(key) = RHASH_AR_TABLE_REF(hash, 0)->key; /* read */
437         HASH_ASSERT(RHASH_AR_TABLE(hash) != NULL);
438         HASH_ASSERT(rb_transient_heap_managed_ptr_p(RHASH_AR_TABLE(hash)));
439     }
440     return hash;
441 }
442 
443 #else
444 #define hash_verify(h) ((void)0)
445 #define HASH_ASSERT(e) ((void)0)
446 #endif
447 
448 static inline int
RHASH_TABLE_NULL_P(VALUE hash)449 RHASH_TABLE_NULL_P(VALUE hash)
450 {
451     if (RHASH(hash)->as.ar == NULL) {
452         HASH_ASSERT(RHASH_AR_TABLE_P(hash));
453         return TRUE;
454     }
455     else {
456         return FALSE;
457     }
458 }
459 
460 static inline int
RHASH_TABLE_EMPTY_P(VALUE hash)461 RHASH_TABLE_EMPTY_P(VALUE hash)
462 {
463     return RHASH_SIZE(hash) == 0;
464 }
465 
466 MJIT_FUNC_EXPORTED int
rb_hash_ar_table_p(VALUE hash)467 rb_hash_ar_table_p(VALUE hash)
468 {
469     if (FL_TEST_RAW((hash), RHASH_ST_TABLE_FLAG)) {
470         HASH_ASSERT(RHASH(hash)->as.st != NULL);
471         return FALSE;
472     }
473     else {
474         return TRUE;
475     }
476 }
477 
478 ar_table *
rb_hash_ar_table(VALUE hash)479 rb_hash_ar_table(VALUE hash)
480 {
481     HASH_ASSERT(RHASH_AR_TABLE_P(hash));
482     return RHASH(hash)->as.ar;
483 }
484 
485 MJIT_FUNC_EXPORTED st_table *
rb_hash_st_table(VALUE hash)486 rb_hash_st_table(VALUE hash)
487 {
488     HASH_ASSERT(!RHASH_AR_TABLE_P(hash));
489     return RHASH(hash)->as.st;
490 }
491 
492 void
rb_hash_st_table_set(VALUE hash,st_table * st)493 rb_hash_st_table_set(VALUE hash, st_table *st)
494 {
495     HASH_ASSERT(st != NULL);
496     FL_SET_RAW((hash), RHASH_ST_TABLE_FLAG);
497     RHASH(hash)->as.st = st;
498 }
499 
500 static void
hash_ar_table_set(VALUE hash,ar_table * ar)501 hash_ar_table_set(VALUE hash, ar_table *ar)
502 {
503     HASH_ASSERT(RHASH_AR_TABLE_P(hash));
504     HASH_ASSERT((RHASH_TRANSIENT_P(hash) && ar == NULL) ? FALSE : TRUE);
505     RHASH(hash)->as.ar = ar;
506     hash_verify(hash);
507 }
508 
509 #define RHASH_AR_TABLE_SET(h, a) hash_ar_table_set(h, a)
510 
511 #define RHASH_SET_ST_FLAG(h)          FL_SET_RAW(h, RHASH_ST_TABLE_FLAG)
512 #define RHASH_UNSET_ST_FLAG(h)        FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG)
513 
514 #define RHASH_AR_TABLE_BOUND_SET(h, n) do { \
515     st_index_t tmp_n = (n);          \
516     HASH_ASSERT(RHASH_AR_TABLE_P(h)); \
517     HASH_ASSERT(tmp_n <= RHASH_AR_TABLE_MAX_BOUND); \
518     RBASIC(h)->flags &= ~RHASH_AR_TABLE_BOUND_MASK; \
519     RBASIC(h)->flags |= (tmp_n) << RHASH_AR_TABLE_BOUND_SHIFT; \
520 } while (0)
521 
522 #define RHASH_AR_TABLE_SIZE_SET(h, n) do { \
523     st_index_t tmp_n = n; \
524     HASH_ASSERT(RHASH_AR_TABLE_P(h)); \
525     RBASIC(h)->flags &= ~RHASH_AR_TABLE_SIZE_MASK; \
526     RBASIC(h)->flags |= (tmp_n) << RHASH_AR_TABLE_SIZE_SHIFT; \
527 } while (0)
528 
529 #define HASH_AR_TABLE_SIZE_ADD(h, n) do  { \
530     HASH_ASSERT(RHASH_AR_TABLE_P(h)); \
531     RHASH_AR_TABLE_SIZE_SET((h), RHASH_AR_TABLE_SIZE(h)+(n)); \
532     hash_verify(h); \
533 } while (0)
534 
535 #define RHASH_AR_TABLE_SIZE_INC(h) HASH_AR_TABLE_SIZE_ADD(h, 1)
536 #define RHASH_AR_TABLE_SIZE_DEC(h) do  { \
537     HASH_ASSERT(RHASH_AR_TABLE_P(h)); \
538     RHASH_AR_TABLE_SIZE_SET((h), RHASH_AR_TABLE_SIZE(h) - 1); \
539     hash_verify(h); \
540 } while (0)
541 
542 #define RHASH_AR_TABLE_CLEAR(h) do { \
543     RBASIC(h)->flags &= ~RHASH_AR_TABLE_SIZE_MASK; \
544     RBASIC(h)->flags &= ~RHASH_AR_TABLE_BOUND_MASK; \
545     RHASH_AR_TABLE_SET(hash, NULL); \
546 } while (0)
547 
548 
549 static ar_table*
ar_alloc_table(VALUE hash)550 ar_alloc_table(VALUE hash)
551 {
552     ar_table *tab = (ar_table*)rb_transient_heap_alloc(hash, sizeof(ar_table));
553 
554     if (tab != NULL) {
555         RHASH_SET_TRANSIENT_FLAG(hash);
556     }
557     else {
558         RHASH_UNSET_TRANSIENT_FLAG(hash);
559         tab = (ar_table*)ruby_xmalloc(sizeof(ar_table));
560     }
561 
562     RHASH_AR_TABLE_SIZE_SET(hash, 0);
563     RHASH_AR_TABLE_BOUND_SET(hash, 0);
564     RHASH_AR_TABLE_SET(hash, tab);
565 
566     return tab;
567 }
568 
569 static unsigned
find_entry(VALUE hash,st_hash_t hash_value,st_data_t key)570 find_entry(VALUE hash, st_hash_t hash_value, st_data_t key)
571 {
572     unsigned i, bound = RHASH_AR_TABLE_BOUND(hash);
573 
574     /* if table is NULL, then bound also should be 0 */
575 
576     for (i = 0; i < bound; i++) {
577         if (PTR_EQUAL(RHASH_AR_TABLE_REF(hash, i), hash_value, key)) {
578             return i;
579         }
580     }
581     return RHASH_AR_TABLE_MAX_BOUND;
582 }
583 
584 static inline void
ar_free_and_clear_table(VALUE hash)585 ar_free_and_clear_table(VALUE hash)
586 {
587     ar_table *tab = RHASH_AR_TABLE(hash);
588 
589     if (tab) {
590         if (RHASH_TRANSIENT_P(hash)) {
591             RHASH_UNSET_TRANSIENT_FLAG(hash);
592         }
593         else {
594             ruby_xfree(RHASH_AR_TABLE(hash));
595         }
596         RHASH_AR_TABLE_CLEAR(hash);
597     }
598     HASH_ASSERT(RHASH_AR_TABLE_SIZE(hash) == 0);
599     HASH_ASSERT(RHASH_AR_TABLE_BOUND(hash) == 0);
600     HASH_ASSERT(RHASH_TRANSIENT_P(hash) == 0);
601 }
602 
603 void st_add_direct_with_hash(st_table *tab, st_data_t key, st_data_t value, st_hash_t hash); /* st.c */
604 
605 static void
ar_try_convert_table(VALUE hash)606 ar_try_convert_table(VALUE hash)
607 {
608     st_table *new_tab;
609     ar_table_entry *entry;
610     unsigned size;
611     st_index_t i;
612 
613     if (!RHASH_AR_TABLE_P(hash)) return;
614 
615     size = RHASH_AR_TABLE_SIZE(hash);
616 
617     if (size < RHASH_AR_TABLE_MAX_SIZE) {
618         return;
619     }
620 
621     new_tab = st_init_table_with_size(&objhash, size * 2);
622 
623     for (i = 0; i < RHASH_AR_TABLE_MAX_BOUND; i++) {
624         entry = RHASH_AR_TABLE_REF(hash, i);
625         HASH_ASSERT(entry->hash != RESERVED_HASH_VAL);
626 
627         st_add_direct_with_hash(new_tab, entry->key, entry->record, entry->hash);
628     }
629     ar_free_and_clear_table(hash);
630     RHASH_ST_TABLE_SET(hash, new_tab);
631     return;
632 }
633 
634 static st_table *
ar_force_convert_table(VALUE hash,const char * file,int line)635 ar_force_convert_table(VALUE hash, const char *file, int line)
636 {
637     st_table *new_tab;
638 
639     if (RHASH_ST_TABLE_P(hash)) {
640         return RHASH_ST_TABLE(hash);
641     }
642 
643     if (RHASH_AR_TABLE(hash)) {
644         ar_table_entry *entry;
645         unsigned i, bound = RHASH_AR_TABLE_BOUND(hash);
646 
647 #if RHASH_CONVERT_TABLE_DEBUG
648         rb_obj_info_dump(hash);
649         fprintf(stderr, "force_convert: %s:%d\n", file, line);
650         RB_DEBUG_COUNTER_INC(obj_hash_force_convert);
651 #endif
652 
653         new_tab = st_init_table_with_size(&objhash, RHASH_AR_TABLE_SIZE(hash));
654 
655         for (i = 0; i < bound; i++) {
656             entry = RHASH_AR_TABLE_REF(hash, i);
657             if (empty_entry(entry)) continue;
658 
659             st_add_direct_with_hash(new_tab, entry->key, entry->record, entry->hash);
660         }
661         ar_free_and_clear_table(hash);
662     }
663     else {
664         new_tab = st_init_table(&objhash);
665     }
666     RHASH_ST_TABLE_SET(hash, new_tab);
667 
668     return new_tab;
669 }
670 
671 static ar_table *
hash_ar_table(VALUE hash)672 hash_ar_table(VALUE hash)
673 {
674     if (RHASH_TABLE_NULL_P(hash)) {
675         ar_alloc_table(hash);
676     }
677     return RHASH_AR_TABLE(hash);
678 }
679 
680 static int
ar_compact_table(VALUE hash)681 ar_compact_table(VALUE hash)
682 {
683     const unsigned bound = RHASH_AR_TABLE_BOUND(hash);
684     const unsigned size = RHASH_AR_TABLE_SIZE(hash);
685 
686     if (size == bound) {
687         return size;
688     }
689     else {
690         unsigned i, j=0;
691         ar_table_entry *entries = RHASH_AR_TABLE_REF(hash, 0);
692 
693         for (i=0; i<bound; i++) {
694             if (empty_entry(&entries[i])) {
695                 if (j <= i) j = i+1;
696                 for (; j<bound; j++) {
697                     if (!empty_entry(&entries[j])) {
698                         entries[i] = entries[j];
699                         clear_entry(&entries[j]);
700                         j++;
701                         goto found;
702                     }
703                 }
704                 /* non-empty is not found */
705                 goto done;
706               found:;
707             }
708         }
709       done:
710         HASH_ASSERT(i<=bound);
711 
712         RHASH_AR_TABLE_BOUND_SET(hash, size);
713         hash_verify(hash);
714         return size;
715     }
716 }
717 
718 static int
ar_add_direct_with_hash(VALUE hash,st_data_t key,st_data_t val,st_hash_t hash_value)719 ar_add_direct_with_hash(VALUE hash, st_data_t key, st_data_t val, st_hash_t hash_value)
720 {
721     unsigned bin = RHASH_AR_TABLE_BOUND(hash);
722     ar_table *tab = RHASH_AR_TABLE(hash);
723     ar_table_entry *entry;
724 
725     if (RHASH_AR_TABLE_SIZE(hash) >= RHASH_AR_TABLE_MAX_SIZE) {
726         return 1;
727     }
728     else {
729         if (UNLIKELY(bin >= RHASH_AR_TABLE_MAX_BOUND)) {
730             bin = ar_compact_table(hash);
731             hash_ar_table(hash);
732         }
733         HASH_ASSERT(bin < RHASH_AR_TABLE_MAX_BOUND);
734 
735         entry = &tab->entries[bin];
736         set_entry(entry, key, val, hash_value);
737         RHASH_AR_TABLE_BOUND_SET(hash, bin+1);
738         RHASH_AR_TABLE_SIZE_INC(hash);
739         return 0;
740     }
741 }
742 
743 static int
ar_foreach(VALUE hash,int (* func)(ANYARGS),st_data_t arg)744 ar_foreach(VALUE hash, int (*func)(ANYARGS), st_data_t arg)
745 {
746     if (RHASH_AR_TABLE_SIZE(hash) > 0) {
747         unsigned i, bound = RHASH_AR_TABLE_BOUND(hash);
748 
749         for (i = 0; i < bound; i++) {
750             enum st_retval retval;
751             ar_table_entry *cur_entry = RHASH_AR_TABLE_REF(hash, i);
752             if (empty_entry(cur_entry)) continue;
753             retval = (*func)(cur_entry->key, cur_entry->record, arg, 0);
754             /* cur_entry is not valid after that */
755 
756             switch (retval) {
757               case ST_CONTINUE:
758                 break;
759               case ST_CHECK:
760               case ST_STOP:
761                 return 0;
762               case ST_DELETE:
763                 clear_entry(RHASH_AR_TABLE_REF(hash, i));
764                 RHASH_AR_TABLE_SIZE_DEC(hash);
765                 break;
766             }
767         }
768     }
769     return 0;
770 }
771 
772 static int
ar_foreach_check(VALUE hash,int (* func)(ANYARGS),st_data_t arg,st_data_t never)773 ar_foreach_check(VALUE hash, int (*func)(ANYARGS), st_data_t arg,
774                      st_data_t never)
775 {
776     if (RHASH_AR_TABLE_SIZE(hash) > 0) {
777         unsigned i, ret = 0, bound = RHASH_AR_TABLE_BOUND(hash);
778         enum st_retval retval;
779         ar_table_entry *cur_entry;
780         st_data_t key;
781         st_hash_t hash_value;
782 
783         for (i = 0; i < bound; i++) {
784             cur_entry = RHASH_AR_TABLE_REF(hash, i);
785             if (empty_entry(cur_entry))
786               continue;
787             key = cur_entry->key;
788             hash_value = cur_entry->hash;
789 
790             retval = (*func)(key, cur_entry->record, arg, 0);
791             hash_verify(hash);
792 
793             cur_entry = RHASH_AR_TABLE_REF(hash, i);
794 
795             switch (retval) {
796               case ST_CHECK: {
797                   if (cur_entry->key == never && cur_entry->hash == RESERVED_HASH_VAL)
798                       break;
799                   ret = find_entry(hash, hash_value, key);
800                   if (ret == RHASH_AR_TABLE_MAX_BOUND) {
801                       retval = (*func)(0, 0, arg, 1);
802                       return 2;
803                   }
804               }
805               case ST_CONTINUE:
806                 break;
807               case ST_STOP:
808                 return 0;
809               case ST_DELETE: {
810                   if (!empty_entry(cur_entry)) {
811                       clear_entry(cur_entry);
812                       RHASH_AR_TABLE_SIZE_DEC(hash);
813                   }
814                   break;
815               }
816             }
817         }
818     }
819     return 0;
820 }
821 
822 static int
ar_update(VALUE hash,st_data_t key,st_update_callback_func * func,st_data_t arg)823 ar_update(VALUE hash, st_data_t key,
824               st_update_callback_func *func, st_data_t arg)
825 {
826     int retval, existing;
827     unsigned bin = RHASH_AR_TABLE_MAX_BOUND;
828     st_data_t value = 0, old_key;
829     st_hash_t hash_value = do_hash(key);
830 
831     if (UNLIKELY(!RHASH_AR_TABLE_P(hash))) {
832         /* `#hash` changes ar_table -> st_table */
833         return -1;
834     }
835 
836     if (RHASH_AR_TABLE_SIZE(hash) > 0) {
837         bin = find_entry(hash, hash_value, key);
838         existing = (bin != RHASH_AR_TABLE_MAX_BOUND) ? TRUE : FALSE;
839     }
840     else {
841         hash_ar_table(hash); /* allocate ltbl if needed */
842         existing = FALSE;
843     }
844 
845     if (existing) {
846         ar_table_entry *entry = RHASH_AR_TABLE_REF(hash, bin);
847         key = entry->key;
848         value = entry->record;
849     }
850     old_key = key;
851     retval = (*func)(&key, &value, arg, existing);
852 
853     switch (retval) {
854       case ST_CONTINUE:
855         if (!existing) {
856             if (ar_add_direct_with_hash(hash, key, value, hash_value)) {
857                 return -1;
858             }
859         }
860         else {
861             ar_table_entry *entry = RHASH_AR_TABLE_REF(hash, bin);
862             if (old_key != key) {
863                 entry->key = key;
864             }
865             entry->record = value;
866         }
867         break;
868       case ST_DELETE:
869         if (existing) {
870             clear_entry(RHASH_AR_TABLE_REF(hash, bin));
871             RHASH_AR_TABLE_SIZE_DEC(hash);
872         }
873         break;
874     }
875     return existing;
876 }
877 
878 static int
ar_insert(VALUE hash,st_data_t key,st_data_t value)879 ar_insert(VALUE hash, st_data_t key, st_data_t value)
880 {
881     unsigned bin = RHASH_AR_TABLE_BOUND(hash);
882     st_hash_t hash_value = do_hash(key);
883 
884     if (UNLIKELY(!RHASH_AR_TABLE_P(hash))) {
885         /* `#hash` changes ar_table -> st_table */
886         return -1;
887     }
888 
889     hash_ar_table(hash); /* prepare ltbl */
890 
891     bin = find_entry(hash, hash_value, key);
892     if (bin == RHASH_AR_TABLE_MAX_BOUND) {
893         if (RHASH_AR_TABLE_SIZE(hash) >= RHASH_AR_TABLE_MAX_SIZE) {
894             return -1;
895         }
896         else if (bin >= RHASH_AR_TABLE_MAX_BOUND) {
897             bin = ar_compact_table(hash);
898             hash_ar_table(hash);
899         }
900         HASH_ASSERT(bin < RHASH_AR_TABLE_MAX_BOUND);
901 
902         set_entry(RHASH_AR_TABLE_REF(hash, bin), key, value, hash_value);
903         RHASH_AR_TABLE_BOUND_SET(hash, bin+1);
904         RHASH_AR_TABLE_SIZE_INC(hash);
905         return 0;
906     }
907     else {
908         RHASH_AR_TABLE_REF(hash, bin)->record = value;
909         return 1;
910     }
911 }
912 
913 static int
ar_lookup(VALUE hash,st_data_t key,st_data_t * value)914 ar_lookup(VALUE hash, st_data_t key, st_data_t *value)
915 {
916     st_hash_t hash_value = do_hash(key);
917     unsigned bin;
918     if (UNLIKELY(!RHASH_AR_TABLE_P(hash))) {
919         /* `#hash` changes ar_table -> st_table */
920         return st_lookup(RHASH_ST_TABLE(hash), key, value);
921     }
922     bin = find_entry(hash, hash_value, key);
923 
924     if (bin == RHASH_AR_TABLE_MAX_BOUND) {
925         return 0;
926     }
927     else {
928         HASH_ASSERT(bin < RHASH_AR_TABLE_MAX_BOUND);
929         if (value != NULL) {
930             *value = RHASH_AR_TABLE_REF(hash, bin)->record;
931         }
932         return 1;
933     }
934 }
935 
936 static int
ar_delete(VALUE hash,st_data_t * key,st_data_t * value)937 ar_delete(VALUE hash, st_data_t *key, st_data_t *value)
938 {
939     unsigned bin;
940     st_hash_t hash_value = do_hash(*key);
941 
942     if (UNLIKELY(!RHASH_AR_TABLE_P(hash))) {
943         /* `#hash` changes ar_table -> st_table */
944         return st_delete(RHASH_ST_TABLE(hash), key, value);
945     }
946 
947     bin = find_entry(hash, hash_value, *key);
948 
949     if (bin == RHASH_AR_TABLE_MAX_BOUND) {
950         if (value != 0) *value = 0;
951         return 0;
952     }
953     else {
954         ar_table_entry *entry = RHASH_AR_TABLE_REF(hash, bin);
955         if (value != 0) *value = entry->record;
956         clear_entry(entry);
957         RHASH_AR_TABLE_SIZE_DEC(hash);
958         return 1;
959     }
960 }
961 
962 static int
ar_shift(VALUE hash,st_data_t * key,st_data_t * value)963 ar_shift(VALUE hash, st_data_t *key, st_data_t *value)
964 {
965     if (RHASH_AR_TABLE_SIZE(hash) > 0) {
966         unsigned i, bound = RHASH_AR_TABLE_BOUND(hash);
967         ar_table_entry *entry, *entries = RHASH_AR_TABLE(hash)->entries;
968 
969         for (i = 0; i < bound; i++) {
970             entry = &entries[i];
971             if (!empty_entry(entry)) {
972                 if (value != 0) *value = entry->record;
973                 *key = entry->key;
974                 clear_entry(entry);
975                 RHASH_AR_TABLE_SIZE_DEC(hash);
976                 return 1;
977             }
978         }
979     }
980     if (value != 0) *value = 0;
981     return 0;
982 }
983 
984 static long
ar_keys(VALUE hash,st_data_t * keys,st_index_t size)985 ar_keys(VALUE hash, st_data_t *keys, st_index_t size)
986 {
987     unsigned i, bound = RHASH_AR_TABLE_BOUND(hash);
988     st_data_t *keys_start = keys, *keys_end = keys + size;
989 
990     for (i = 0; i < bound; i++) {
991         if (keys == keys_end) {
992           break;
993         }
994         else {
995             ar_table_entry *cur_entry = RHASH_AR_TABLE_REF(hash, i);
996             if (!empty_entry(cur_entry))
997               *keys++ = cur_entry->key;
998         }
999     }
1000 
1001     return keys - keys_start;
1002 }
1003 
1004 static long
ar_values(VALUE hash,st_data_t * values,st_index_t size)1005 ar_values(VALUE hash, st_data_t *values, st_index_t size)
1006 {
1007     unsigned i, bound = RHASH_AR_TABLE_BOUND(hash);
1008     st_data_t *values_start = values, *values_end = values + size;
1009 
1010     for (i = 0; i < bound; i++) {
1011         if (values == values_end) {
1012           break;
1013         }
1014         else {
1015             ar_table_entry *cur_entry = RHASH_AR_TABLE_REF(hash, i);
1016             if (!empty_entry(cur_entry))
1017               *values++ = cur_entry->record;
1018         }
1019     }
1020 
1021     return values - values_start;
1022 }
1023 
1024 static ar_table*
ar_copy(VALUE hash1,VALUE hash2)1025 ar_copy(VALUE hash1, VALUE hash2)
1026 {
1027     ar_table *old_tab = RHASH_AR_TABLE(hash2);
1028 
1029     if (old_tab != NULL) {
1030         ar_table *new_tab = RHASH_AR_TABLE(hash1);
1031         if (new_tab == NULL) {
1032             new_tab = (ar_table*) rb_transient_heap_alloc(hash1, sizeof(ar_table));
1033             if (new_tab != NULL) {
1034                 RHASH_SET_TRANSIENT_FLAG(hash1);
1035             }
1036             else {
1037                 RHASH_UNSET_TRANSIENT_FLAG(hash1);
1038                 new_tab = (ar_table*)ruby_xmalloc(sizeof(ar_table));
1039             }
1040         }
1041         *new_tab = *old_tab;
1042         RHASH_AR_TABLE_BOUND_SET(hash1, RHASH_AR_TABLE_BOUND(hash2));
1043         RHASH_AR_TABLE_SIZE_SET(hash1, RHASH_AR_TABLE_SIZE(hash2));
1044         RHASH_AR_TABLE_SET(hash1, new_tab);
1045 
1046         rb_gc_writebarrier_remember(hash1);
1047         return new_tab;
1048     }
1049     else {
1050         RHASH_AR_TABLE_BOUND_SET(hash1, RHASH_AR_TABLE_BOUND(hash2));
1051         RHASH_AR_TABLE_SIZE_SET(hash1, RHASH_AR_TABLE_SIZE(hash2));
1052 
1053         if (RHASH_TRANSIENT_P(hash1)) {
1054             RHASH_UNSET_TRANSIENT_FLAG(hash1);
1055         }
1056         else if (RHASH_AR_TABLE(hash1)) {
1057             ruby_xfree(RHASH_AR_TABLE(hash1));
1058         }
1059 
1060         RHASH_AR_TABLE_SET(hash1, NULL);
1061 
1062         rb_gc_writebarrier_remember(hash1);
1063         return old_tab;
1064     }
1065 }
1066 
1067 static void
ar_clear(VALUE hash)1068 ar_clear(VALUE hash)
1069 {
1070     if (RHASH_AR_TABLE(hash) != NULL) {
1071         RHASH_AR_TABLE_SIZE_SET(hash, 0);
1072         RHASH_AR_TABLE_BOUND_SET(hash, 0);
1073     }
1074     else {
1075         HASH_ASSERT(RHASH_AR_TABLE_SIZE(hash) == 0);
1076         HASH_ASSERT(RHASH_AR_TABLE_BOUND(hash) == 0);
1077     }
1078 }
1079 
1080 #if USE_TRANSIENT_HEAP
1081 void
rb_hash_transient_heap_evacuate(VALUE hash,int promote)1082 rb_hash_transient_heap_evacuate(VALUE hash, int promote)
1083 {
1084     if (RHASH_TRANSIENT_P(hash)) {
1085         ar_table *new_tab;
1086         ar_table *old_tab = RHASH_AR_TABLE(hash);
1087 
1088         if (UNLIKELY(old_tab == NULL)) {
1089             rb_gc_force_recycle(hash);
1090             return;
1091         }
1092         HASH_ASSERT(old_tab != NULL);
1093         if (promote) {
1094           promote:
1095             new_tab = ruby_xmalloc(sizeof(ar_table));
1096             RHASH_UNSET_TRANSIENT_FLAG(hash);
1097         }
1098         else {
1099             new_tab = rb_transient_heap_alloc(hash, sizeof(ar_table));
1100             if (new_tab == NULL) goto promote;
1101         }
1102         *new_tab = *old_tab;
1103         RHASH_AR_TABLE_SET(hash, new_tab);
1104     }
1105     hash_verify(hash);
1106 }
1107 #endif
1108 
1109 typedef int st_foreach_func(st_data_t, st_data_t, st_data_t);
1110 
1111 struct foreach_safe_arg {
1112     st_table *tbl;
1113     st_foreach_func *func;
1114     st_data_t arg;
1115 };
1116 
1117 static int
foreach_safe_i(st_data_t key,st_data_t value,st_data_t args,int error)1118 foreach_safe_i(st_data_t key, st_data_t value, st_data_t args, int error)
1119 {
1120     int status;
1121     struct foreach_safe_arg *arg = (void *)args;
1122 
1123     if (error) return ST_STOP;
1124     status = (*arg->func)(key, value, arg->arg);
1125     if (status == ST_CONTINUE) {
1126 	return ST_CHECK;
1127     }
1128     return status;
1129 }
1130 
1131 void
st_foreach_safe(st_table * table,int (* func)(ANYARGS),st_data_t a)1132 st_foreach_safe(st_table *table, int (*func)(ANYARGS), st_data_t a)
1133 {
1134     struct foreach_safe_arg arg;
1135 
1136     arg.tbl = table;
1137     arg.func = (st_foreach_func *)func;
1138     arg.arg = a;
1139     if (st_foreach_check(table, foreach_safe_i, (st_data_t)&arg, 0)) {
1140 	rb_raise(rb_eRuntimeError, "hash modified during iteration");
1141     }
1142 }
1143 
1144 typedef int rb_foreach_func(VALUE, VALUE, VALUE);
1145 
1146 struct hash_foreach_arg {
1147     VALUE hash;
1148     rb_foreach_func *func;
1149     VALUE arg;
1150 };
1151 
1152 static int
hash_ar_foreach_iter(st_data_t key,st_data_t value,st_data_t argp,int error)1153 hash_ar_foreach_iter(st_data_t key, st_data_t value, st_data_t argp, int error)
1154 {
1155     struct hash_foreach_arg *arg = (struct hash_foreach_arg *)argp;
1156     int status;
1157 
1158     if (error) return ST_STOP;
1159     status = (*arg->func)((VALUE)key, (VALUE)value, arg->arg);
1160     /* TODO: rehash check? rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); */
1161 
1162     switch (status) {
1163       case ST_DELETE:
1164         return ST_DELETE;
1165       case ST_CONTINUE:
1166         break;
1167       case ST_STOP:
1168         return ST_STOP;
1169     }
1170     return ST_CHECK;
1171 }
1172 
1173 static int
hash_foreach_iter(st_data_t key,st_data_t value,st_data_t argp,int error)1174 hash_foreach_iter(st_data_t key, st_data_t value, st_data_t argp, int error)
1175 {
1176     struct hash_foreach_arg *arg = (struct hash_foreach_arg *)argp;
1177     int status;
1178     st_table *tbl;
1179 
1180     if (error) return ST_STOP;
1181     tbl = RHASH_ST_TABLE(arg->hash);
1182     status = (*arg->func)((VALUE)key, (VALUE)value, arg->arg);
1183     if (RHASH_ST_TABLE(arg->hash) != tbl) {
1184     	rb_raise(rb_eRuntimeError, "rehash occurred during iteration");
1185     }
1186     switch (status) {
1187       case ST_DELETE:
1188 	return ST_DELETE;
1189       case ST_CONTINUE:
1190 	break;
1191       case ST_STOP:
1192 	return ST_STOP;
1193     }
1194     return ST_CHECK;
1195 }
1196 
1197 static VALUE
hash_foreach_ensure_rollback(VALUE hash)1198 hash_foreach_ensure_rollback(VALUE hash)
1199 {
1200     RHASH_ITER_LEV(hash)++;
1201     return 0;
1202 }
1203 
1204 static VALUE
hash_foreach_ensure(VALUE hash)1205 hash_foreach_ensure(VALUE hash)
1206 {
1207     RHASH_ITER_LEV(hash)--;
1208     return 0;
1209 }
1210 
1211 int
rb_hash_stlike_foreach(VALUE hash,int (* func)(ANYARGS),st_data_t arg)1212 rb_hash_stlike_foreach(VALUE hash, int (*func)(ANYARGS), st_data_t arg)
1213 {
1214     if (RHASH_AR_TABLE_P(hash)) {
1215         return ar_foreach(hash, func, arg);
1216     }
1217     else {
1218         return st_foreach(RHASH_ST_TABLE(hash), func, arg);
1219     }
1220 }
1221 
1222 static VALUE
hash_foreach_call(VALUE arg)1223 hash_foreach_call(VALUE arg)
1224 {
1225     VALUE hash = ((struct hash_foreach_arg *)arg)->hash;
1226     int ret = 0;
1227     if (RHASH_AR_TABLE_P(hash)) {
1228         ret = ar_foreach_check(hash, hash_ar_foreach_iter,
1229                                    (st_data_t)arg, (st_data_t)Qundef);
1230     }
1231     else if (RHASH_ST_TABLE_P(hash)) {
1232         ret = st_foreach_check(RHASH_ST_TABLE(hash), hash_foreach_iter,
1233                                (st_data_t)arg, (st_data_t)Qundef);
1234     }
1235     if (ret) {
1236         rb_raise(rb_eRuntimeError, "ret: %d, hash modified during iteration", ret);
1237     }
1238     return Qnil;
1239 }
1240 
1241 void
rb_hash_foreach(VALUE hash,int (* func)(ANYARGS),VALUE farg)1242 rb_hash_foreach(VALUE hash, int (*func)(ANYARGS), VALUE farg)
1243 {
1244     struct hash_foreach_arg arg;
1245 
1246     if (RHASH_TABLE_EMPTY_P(hash))
1247         return;
1248     RHASH_ITER_LEV(hash)++;
1249     arg.hash = hash;
1250     arg.func = (rb_foreach_func *)func;
1251     arg.arg  = farg;
1252     rb_ensure(hash_foreach_call, (VALUE)&arg, hash_foreach_ensure, hash);
1253     hash_verify(hash);
1254 }
1255 
1256 static VALUE
hash_alloc_flags(VALUE klass,VALUE flags,VALUE ifnone)1257 hash_alloc_flags(VALUE klass, VALUE flags, VALUE ifnone)
1258 {
1259     const VALUE wb = (RGENGC_WB_PROTECTED_HASH ? FL_WB_PROTECTED : 0);
1260     NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags);
1261 
1262     RHASH_SET_IFNONE((VALUE)hash, ifnone);
1263 
1264     return (VALUE)hash;
1265 }
1266 
1267 static VALUE
hash_alloc(VALUE klass)1268 hash_alloc(VALUE klass)
1269 {
1270     return hash_alloc_flags(klass, 0, Qnil);
1271 }
1272 
1273 static VALUE
empty_hash_alloc(VALUE klass)1274 empty_hash_alloc(VALUE klass)
1275 {
1276     RUBY_DTRACE_CREATE_HOOK(HASH, 0);
1277 
1278     return hash_alloc(klass);
1279 }
1280 
1281 VALUE
rb_hash_new(void)1282 rb_hash_new(void)
1283 {
1284     return hash_alloc(rb_cHash);
1285 }
1286 
1287 VALUE
rb_hash_new_compare_by_id(void)1288 rb_hash_new_compare_by_id(void)
1289 {
1290     VALUE hash = rb_hash_new();
1291     RHASH_ST_TABLE_SET(hash, rb_init_identtable());
1292     return hash;
1293 }
1294 
1295 MJIT_FUNC_EXPORTED VALUE
rb_hash_new_with_size(st_index_t size)1296 rb_hash_new_with_size(st_index_t size)
1297 {
1298     VALUE ret = rb_hash_new();
1299     if (size == 0) {
1300         /* do nothing */
1301     }
1302     else if (size <= RHASH_AR_TABLE_MAX_SIZE) {
1303         ar_alloc_table(ret);
1304     }
1305     else {
1306         RHASH_ST_TABLE_SET(ret, st_init_table_with_size(&objhash, size));
1307     }
1308     return ret;
1309 }
1310 
1311 static VALUE
hash_dup(VALUE hash,VALUE klass,VALUE flags)1312 hash_dup(VALUE hash, VALUE klass, VALUE flags)
1313 {
1314     VALUE ret = hash_alloc_flags(klass, flags,
1315 				 RHASH_IFNONE(hash));
1316     if (!RHASH_EMPTY_P(hash)) {
1317         if (RHASH_AR_TABLE_P(hash))
1318             ar_copy(ret, hash);
1319         else if (RHASH_ST_TABLE_P(hash))
1320             RHASH_ST_TABLE_SET(ret, st_copy(RHASH_ST_TABLE(hash)));
1321     }
1322     return ret;
1323 }
1324 
1325 VALUE
rb_hash_dup(VALUE hash)1326 rb_hash_dup(VALUE hash)
1327 {
1328     const VALUE flags = RBASIC(hash)->flags;
1329     VALUE ret = hash_dup(hash, rb_obj_class(hash),
1330 			 flags & (FL_EXIVAR|FL_TAINT|HASH_PROC_DEFAULT));
1331     if (flags & FL_EXIVAR)
1332         rb_copy_generic_ivar(ret, hash);
1333     return ret;
1334 }
1335 
1336 MJIT_FUNC_EXPORTED VALUE
rb_hash_resurrect(VALUE hash)1337 rb_hash_resurrect(VALUE hash)
1338 {
1339     VALUE ret = hash_dup(hash, rb_cHash, 0);
1340     return ret;
1341 }
1342 
1343 static void
rb_hash_modify_check(VALUE hash)1344 rb_hash_modify_check(VALUE hash)
1345 {
1346     rb_check_frozen(hash);
1347 }
1348 
1349 MJIT_FUNC_EXPORTED struct st_table *
1350 #if RHASH_CONVERT_TABLE_DEBUG
rb_hash_tbl_raw(VALUE hash,const char * file,int line)1351 rb_hash_tbl_raw(VALUE hash, const char *file, int line)
1352 {
1353     return ar_force_convert_table(hash, file, line);
1354 }
1355 #else
1356 rb_hash_tbl_raw(VALUE hash)
1357 {
1358     return ar_force_convert_table(hash, NULL, 0);
1359 }
1360 #endif
1361 
1362 struct st_table *
rb_hash_tbl(VALUE hash,const char * file,int line)1363 rb_hash_tbl(VALUE hash, const char *file, int line)
1364 {
1365     OBJ_WB_UNPROTECT(hash);
1366     return RHASH_TBL_RAW(hash);
1367 }
1368 
1369 static void
rb_hash_modify(VALUE hash)1370 rb_hash_modify(VALUE hash)
1371 {
1372     rb_hash_modify_check(hash);
1373 }
1374 
1375 NORETURN(static void no_new_key(void));
1376 static void
no_new_key(void)1377 no_new_key(void)
1378 {
1379     rb_raise(rb_eRuntimeError, "can't add a new key into hash during iteration");
1380 }
1381 
1382 struct update_callback_arg {
1383     VALUE hash;
1384     st_data_t arg;
1385 };
1386 
1387 #define NOINSERT_UPDATE_CALLBACK(func)                                       \
1388 static int                                                                   \
1389 func##_noinsert(st_data_t *key, st_data_t *val, st_data_t arg, int existing) \
1390 {                                                                            \
1391     if (!existing) no_new_key();                                             \
1392     return func(key, val, (struct update_arg *)arg, existing);               \
1393 }                                                                            \
1394                                                                              \
1395 static int                                                                   \
1396 func##_insert(st_data_t *key, st_data_t *val, st_data_t arg, int existing)   \
1397 {                                                                            \
1398     return func(key, val, (struct update_arg *)arg, existing);               \
1399 }
1400 
1401 struct update_arg {
1402     st_data_t arg;
1403     VALUE hash;
1404     VALUE new_key;
1405     VALUE old_key;
1406     VALUE new_value;
1407     VALUE old_value;
1408 };
1409 
1410 typedef int (*tbl_update_func)(st_data_t *, st_data_t *, st_data_t, int);
1411 
1412 int
rb_hash_stlike_update(VALUE hash,st_data_t key,st_update_callback_func func,st_data_t arg)1413 rb_hash_stlike_update(VALUE hash, st_data_t key, st_update_callback_func func, st_data_t arg)
1414 {
1415     if (RHASH_AR_TABLE_P(hash)) {
1416         int result = ar_update(hash, (st_data_t)key, func, arg);
1417         if (result == -1) {
1418             ar_try_convert_table(hash);
1419         }
1420         else {
1421             return result;
1422         }
1423     }
1424 
1425     return st_update(RHASH_ST_TABLE(hash), (st_data_t)key, func, arg);
1426 }
1427 
1428 static int
tbl_update(VALUE hash,VALUE key,tbl_update_func func,st_data_t optional_arg)1429 tbl_update(VALUE hash, VALUE key, tbl_update_func func, st_data_t optional_arg)
1430 {
1431     struct update_arg arg;
1432     int result;
1433 
1434     arg.arg = optional_arg;
1435     arg.hash = hash;
1436     arg.new_key = 0;
1437     arg.old_key = Qundef;
1438     arg.new_value = 0;
1439     arg.old_value = Qundef;
1440 
1441     result = rb_hash_stlike_update(hash, key, func, (st_data_t)&arg);
1442 
1443     /* write barrier */
1444     if (arg.new_key)   RB_OBJ_WRITTEN(hash, arg.old_key, arg.new_key);
1445     if (arg.new_value) RB_OBJ_WRITTEN(hash, arg.old_value, arg.new_value);
1446 
1447     return result;
1448 }
1449 
1450 #define UPDATE_CALLBACK(iter_lev, func) ((iter_lev) > 0 ? func##_noinsert : func##_insert)
1451 
1452 #define RHASH_UPDATE_ITER(h, iter_lev, key, func, a) do {                        \
1453     tbl_update((h), (key), UPDATE_CALLBACK((iter_lev), func), (st_data_t)(a)); \
1454 } while (0)
1455 
1456 #define RHASH_UPDATE(hash, key, func, arg) \
1457     RHASH_UPDATE_ITER(hash, RHASH_ITER_LEV(hash), key, func, arg)
1458 
1459 static void
set_proc_default(VALUE hash,VALUE proc)1460 set_proc_default(VALUE hash, VALUE proc)
1461 {
1462     if (rb_proc_lambda_p(proc)) {
1463 	int n = rb_proc_arity(proc);
1464 
1465 	if (n != 2 && (n >= 0 || n < -3)) {
1466 	    if (n < 0) n = -n-1;
1467 	    rb_raise(rb_eTypeError, "default_proc takes two arguments (2 for %d)", n);
1468 	}
1469     }
1470 
1471     FL_SET_RAW(hash, HASH_PROC_DEFAULT);
1472     RHASH_SET_IFNONE(hash, proc);
1473 }
1474 
1475 /*
1476  *  call-seq:
1477  *     Hash.new                          -> new_hash
1478  *     Hash.new(obj)                     -> new_hash
1479  *     Hash.new {|hash, key| block }     -> new_hash
1480  *
1481  *  Returns a new, empty hash. If this hash is subsequently accessed by
1482  *  a key that doesn't correspond to a hash entry, the value returned
1483  *  depends on the style of <code>new</code> used to create the hash. In
1484  *  the first form, the access returns <code>nil</code>. If
1485  *  <i>obj</i> is specified, this single object will be used for
1486  *  all <em>default values</em>. If a block is specified, it will be
1487  *  called with the hash object and the key, and should return the
1488  *  default value. It is the block's responsibility to store the value
1489  *  in the hash if required.
1490  *
1491  *     h = Hash.new("Go Fish")
1492  *     h["a"] = 100
1493  *     h["b"] = 200
1494  *     h["a"]           #=> 100
1495  *     h["c"]           #=> "Go Fish"
1496  *     # The following alters the single default object
1497  *     h["c"].upcase!   #=> "GO FISH"
1498  *     h["d"]           #=> "GO FISH"
1499  *     h.keys           #=> ["a", "b"]
1500  *
1501  *     # While this creates a new default object each time
1502  *     h = Hash.new { |hash, key| hash[key] = "Go Fish: #{key}" }
1503  *     h["c"]           #=> "Go Fish: c"
1504  *     h["c"].upcase!   #=> "GO FISH: C"
1505  *     h["d"]           #=> "Go Fish: d"
1506  *     h.keys           #=> ["c", "d"]
1507  *
1508  */
1509 
1510 static VALUE
rb_hash_initialize(int argc,VALUE * argv,VALUE hash)1511 rb_hash_initialize(int argc, VALUE *argv, VALUE hash)
1512 {
1513     VALUE ifnone;
1514 
1515     rb_hash_modify(hash);
1516     if (rb_block_given_p()) {
1517 	rb_check_arity(argc, 0, 0);
1518 	ifnone = rb_block_proc();
1519 	SET_PROC_DEFAULT(hash, ifnone);
1520     }
1521     else {
1522 	rb_check_arity(argc, 0, 1);
1523 	ifnone = argc == 0 ? Qnil : argv[0];
1524 	RHASH_SET_IFNONE(hash, ifnone);
1525     }
1526 
1527     return hash;
1528 }
1529 
1530 /*
1531  *  call-seq:
1532  *     Hash[ key, value, ... ]         -> new_hash
1533  *     Hash[ [ [key, value], ... ] ]   -> new_hash
1534  *     Hash[ object ]                  -> new_hash
1535  *
1536  *  Creates a new hash populated with the given objects.
1537  *
1538  *  Similar to the literal <code>{ _key_ => _value_, ... }</code>. In the first
1539  *  form, keys and values occur in pairs, so there must be an even number of
1540  *  arguments.
1541  *
1542  *  The second and third form take a single argument which is either an array
1543  *  of key-value pairs or an object convertible to a hash.
1544  *
1545  *     Hash["a", 100, "b", 200]             #=> {"a"=>100, "b"=>200}
1546  *     Hash[ [ ["a", 100], ["b", 200] ] ]   #=> {"a"=>100, "b"=>200}
1547  *     Hash["a" => 100, "b" => 200]         #=> {"a"=>100, "b"=>200}
1548  */
1549 
1550 static VALUE
rb_hash_s_create(int argc,VALUE * argv,VALUE klass)1551 rb_hash_s_create(int argc, VALUE *argv, VALUE klass)
1552 {
1553     VALUE hash, tmp;
1554 
1555     if (argc == 1) {
1556         tmp = rb_hash_s_try_convert(Qnil, argv[0]);
1557 	if (!NIL_P(tmp)) {
1558 	    hash = hash_alloc(klass);
1559             if (RHASH_AR_TABLE_P(tmp)) {
1560                 ar_copy(hash, tmp);
1561 	    }
1562             else {
1563                 RHASH_ST_TABLE_SET(hash, st_copy(RHASH_ST_TABLE(tmp)));
1564             }
1565 	    return hash;
1566 	}
1567 
1568 	tmp = rb_check_array_type(argv[0]);
1569 	if (!NIL_P(tmp)) {
1570 	    long i;
1571 
1572 	    hash = hash_alloc(klass);
1573 	    for (i = 0; i < RARRAY_LEN(tmp); ++i) {
1574 		VALUE e = RARRAY_AREF(tmp, i);
1575 		VALUE v = rb_check_array_type(e);
1576 		VALUE key, val = Qnil;
1577 
1578 		if (NIL_P(v)) {
1579 #if 0 /* refix in the next release */
1580 		    rb_raise(rb_eArgError, "wrong element type %s at %ld (expected array)",
1581 			     rb_builtin_class_name(e), i);
1582 
1583 #else
1584 		    rb_warn("wrong element type %s at %ld (expected array)",
1585 			    rb_builtin_class_name(e), i);
1586 		    rb_warn("ignoring wrong elements is deprecated, remove them explicitly");
1587 		    rb_warn("this causes ArgumentError in the next release");
1588 		    continue;
1589 #endif
1590 		}
1591 		switch (RARRAY_LEN(v)) {
1592 		  default:
1593 		    rb_raise(rb_eArgError, "invalid number of elements (%ld for 1..2)",
1594 			     RARRAY_LEN(v));
1595 		  case 2:
1596 		    val = RARRAY_AREF(v, 1);
1597 		  case 1:
1598 		    key = RARRAY_AREF(v, 0);
1599 		    rb_hash_aset(hash, key, val);
1600 		}
1601 	    }
1602 	    return hash;
1603 	}
1604     }
1605     if (argc % 2 != 0) {
1606 	rb_raise(rb_eArgError, "odd number of arguments for Hash");
1607     }
1608 
1609     hash = hash_alloc(klass);
1610     rb_hash_bulk_insert(argc, argv, hash);
1611     hash_verify(hash);
1612     return hash;
1613 }
1614 
1615 VALUE
rb_to_hash_type(VALUE hash)1616 rb_to_hash_type(VALUE hash)
1617 {
1618     return rb_convert_type_with_id(hash, T_HASH, "Hash", idTo_hash);
1619 }
1620 #define to_hash rb_to_hash_type
1621 
1622 VALUE
rb_check_hash_type(VALUE hash)1623 rb_check_hash_type(VALUE hash)
1624 {
1625     return rb_check_convert_type_with_id(hash, T_HASH, "Hash", idTo_hash);
1626 }
1627 
1628 /*
1629  *  call-seq:
1630  *     Hash.try_convert(obj) -> hash or nil
1631  *
1632  *  Try to convert <i>obj</i> into a hash, using to_hash method.
1633  *  Returns converted hash or nil if <i>obj</i> cannot be converted
1634  *  for any reason.
1635  *
1636  *     Hash.try_convert({1=>2})   # => {1=>2}
1637  *     Hash.try_convert("1=>2")   # => nil
1638  */
1639 static VALUE
rb_hash_s_try_convert(VALUE dummy,VALUE hash)1640 rb_hash_s_try_convert(VALUE dummy, VALUE hash)
1641 {
1642     return rb_check_hash_type(hash);
1643 }
1644 
1645 struct rehash_arg {
1646     VALUE hash;
1647     st_table *tbl;
1648 };
1649 
1650 static int
rb_hash_rehash_i(VALUE key,VALUE value,VALUE arg)1651 rb_hash_rehash_i(VALUE key, VALUE value, VALUE arg)
1652 {
1653     if (RHASH_AR_TABLE_P(arg)) {
1654         ar_insert(arg, (st_data_t)key, (st_data_t)value);
1655     }
1656     else {
1657         st_insert(RHASH_ST_TABLE(arg), (st_data_t)key, (st_data_t)value);
1658     }
1659     return ST_CONTINUE;
1660 }
1661 
1662 /*
1663  *  call-seq:
1664  *     hsh.rehash -> hsh
1665  *
1666  *  Rebuilds the hash based on the current hash values for each key. If
1667  *  values of key objects have changed since they were inserted, this
1668  *  method will reindex <i>hsh</i>. If <code>Hash#rehash</code> is
1669  *  called while an iterator is traversing the hash, a
1670  *  <code>RuntimeError</code> will be raised in the iterator.
1671  *
1672  *     a = [ "a", "b" ]
1673  *     c = [ "c", "d" ]
1674  *     h = { a => 100, c => 300 }
1675  *     h[a]       #=> 100
1676  *     a[0] = "z"
1677  *     h[a]       #=> nil
1678  *     h.rehash   #=> {["z", "b"]=>100, ["c", "d"]=>300}
1679  *     h[a]       #=> 100
1680  */
1681 
1682 VALUE
rb_hash_rehash(VALUE hash)1683 rb_hash_rehash(VALUE hash)
1684 {
1685     VALUE tmp;
1686     st_table *tbl;
1687 
1688     if (RHASH_ITER_LEV(hash) > 0) {
1689 	rb_raise(rb_eRuntimeError, "rehash during iteration");
1690     }
1691     rb_hash_modify_check(hash);
1692     if (RHASH_AR_TABLE_P(hash)) {
1693         tmp = hash_alloc(0);
1694         ar_alloc_table(tmp);
1695         rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp);
1696         ar_free_and_clear_table(hash);
1697         ar_copy(hash, tmp);
1698         ar_free_and_clear_table(tmp);
1699     }
1700     else if (RHASH_ST_TABLE_P(hash)) {
1701         st_table *old_tab = RHASH_ST_TABLE(hash);
1702         tmp = hash_alloc(0);
1703         tbl = st_init_table_with_size(old_tab->type, old_tab->num_entries);
1704         RHASH_ST_TABLE_SET(tmp, tbl);
1705         rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp);
1706         st_free_table(old_tab);
1707         RHASH_ST_TABLE_SET(hash, tbl);
1708         RHASH_ST_CLEAR(tmp);
1709     }
1710     hash_verify(hash);
1711     return hash;
1712 }
1713 
1714 VALUE
rb_hash_default_value(VALUE hash,VALUE key)1715 rb_hash_default_value(VALUE hash, VALUE key)
1716 {
1717     if (rb_method_basic_definition_p(CLASS_OF(hash), id_default)) {
1718 	VALUE ifnone = RHASH_IFNONE(hash);
1719 	if (!FL_TEST(hash, HASH_PROC_DEFAULT)) return ifnone;
1720 	if (key == Qundef) return Qnil;
1721 	return rb_funcall(ifnone, id_yield, 2, hash, key);
1722     }
1723     else {
1724 	return rb_funcall(hash, id_default, 1, key);
1725     }
1726 }
1727 
1728 /*
1729  *  call-seq:
1730  *     hsh[key]    ->  value
1731  *
1732  *  Element Reference---Retrieves the <i>value</i> object corresponding
1733  *  to the <i>key</i> object. If not found, returns the default value (see
1734  *  <code>Hash::new</code> for details).
1735  *
1736  *     h = { "a" => 100, "b" => 200 }
1737  *     h["a"]   #=> 100
1738  *     h["c"]   #=> nil
1739  *
1740  */
1741 
1742 VALUE
rb_hash_aref(VALUE hash,VALUE key)1743 rb_hash_aref(VALUE hash, VALUE key)
1744 {
1745     st_data_t val;
1746 
1747     if (RHASH_AR_TABLE_P(hash) && ar_lookup(hash, key, &val)) {
1748         return (VALUE)val;
1749     }
1750     else if (RHASH_ST_TABLE_P(hash) && st_lookup(RHASH_ST_TABLE(hash), key, &val)) {
1751         return (VALUE)val;
1752     }
1753     hash_verify(hash);
1754     return rb_hash_default_value(hash, key);
1755 }
1756 
1757 MJIT_FUNC_EXPORTED int
rb_hash_stlike_lookup(VALUE hash,st_data_t key,st_data_t * pval)1758 rb_hash_stlike_lookup(VALUE hash, st_data_t key, st_data_t *pval)
1759 {
1760     hash_verify(hash);
1761 
1762     if (RHASH_AR_TABLE_P(hash)) {
1763         return ar_lookup(hash, key, pval);
1764     }
1765     else {
1766         return st_lookup(RHASH_ST_TABLE(hash), key, pval);
1767     }
1768 }
1769 
1770 VALUE
rb_hash_lookup2(VALUE hash,VALUE key,VALUE def)1771 rb_hash_lookup2(VALUE hash, VALUE key, VALUE def)
1772 {
1773     st_data_t val;
1774 
1775     if (rb_hash_stlike_lookup(hash, key, &val)) {
1776         return (VALUE)val;
1777     }
1778     else {
1779         return def; /* without Hash#default */
1780     }
1781 }
1782 
1783 VALUE
rb_hash_lookup(VALUE hash,VALUE key)1784 rb_hash_lookup(VALUE hash, VALUE key)
1785 {
1786     return rb_hash_lookup2(hash, key, Qnil);
1787 }
1788 
1789 /*
1790  *  call-seq:
1791  *     hsh.fetch(key [, default] )       -> obj
1792  *     hsh.fetch(key) {| key | block }   -> obj
1793  *
1794  *  Returns a value from the hash for the given key. If the key can't be
1795  *  found, there are several options: With no other arguments, it will
1796  *  raise a <code>KeyError</code> exception; if <i>default</i> is given,
1797  *  then that will be returned; if the optional code block is specified,
1798  *  then that will be run and its result returned.
1799  *
1800  *     h = { "a" => 100, "b" => 200 }
1801  *     h.fetch("a")                            #=> 100
1802  *     h.fetch("z", "go fish")                 #=> "go fish"
1803  *     h.fetch("z") { |el| "go fish, #{el}"}   #=> "go fish, z"
1804  *
1805  *  The following example shows that an exception is raised if the key
1806  *  is not found and a default value is not supplied.
1807  *
1808  *     h = { "a" => 100, "b" => 200 }
1809  *     h.fetch("z")
1810  *
1811  *  <em>produces:</em>
1812  *
1813  *     prog.rb:2:in `fetch': key not found (KeyError)
1814  *      from prog.rb:2
1815  *
1816  */
1817 
1818 static VALUE
rb_hash_fetch_m(int argc,VALUE * argv,VALUE hash)1819 rb_hash_fetch_m(int argc, VALUE *argv, VALUE hash)
1820 {
1821     VALUE key;
1822     st_data_t val;
1823     long block_given;
1824 
1825     rb_check_arity(argc, 1, 2);
1826     key = argv[0];
1827 
1828     block_given = rb_block_given_p();
1829     if (block_given && argc == 2) {
1830 	rb_warn("block supersedes default value argument");
1831     }
1832     if (RHASH_AR_TABLE_P(hash) && ar_lookup(hash, key, &val)) {
1833         return (VALUE)val;
1834     }
1835     else if (RHASH_ST_TABLE_P(hash) && st_lookup(RHASH_ST_TABLE(hash), key, &val)) {
1836         return (VALUE)val;
1837     }
1838     if (block_given) return rb_yield(key);
1839     if (argc == 1) {
1840         VALUE desc = rb_protect(rb_inspect, key, 0);
1841         if (NIL_P(desc)) {
1842             desc = rb_any_to_s(key);
1843 	}
1844         desc = rb_str_ellipsize(desc, 65);
1845         rb_key_err_raise(rb_sprintf("key not found: %"PRIsVALUE, desc), hash, key);
1846     }
1847     hash_verify(hash);
1848     return argv[1];
1849 }
1850 
1851 VALUE
rb_hash_fetch(VALUE hash,VALUE key)1852 rb_hash_fetch(VALUE hash, VALUE key)
1853 {
1854     return rb_hash_fetch_m(1, &key, hash);
1855 }
1856 
1857 /*
1858  *  call-seq:
1859  *     hsh.default(key=nil)   -> obj
1860  *
1861  *  Returns the default value, the value that would be returned by
1862  *  <i>hsh</i>[<i>key</i>] if <i>key</i> did not exist in <i>hsh</i>.
1863  *  See also <code>Hash::new</code> and <code>Hash#default=</code>.
1864  *
1865  *     h = Hash.new                            #=> {}
1866  *     h.default                               #=> nil
1867  *     h.default(2)                            #=> nil
1868  *
1869  *     h = Hash.new("cat")                     #=> {}
1870  *     h.default                               #=> "cat"
1871  *     h.default(2)                            #=> "cat"
1872  *
1873  *     h = Hash.new {|h,k| h[k] = k.to_i*10}   #=> {}
1874  *     h.default                               #=> nil
1875  *     h.default(2)                            #=> 20
1876  */
1877 
1878 static VALUE
rb_hash_default(int argc,VALUE * argv,VALUE hash)1879 rb_hash_default(int argc, VALUE *argv, VALUE hash)
1880 {
1881     VALUE args[2], ifnone;
1882 
1883     rb_check_arity(argc, 0, 1);
1884     ifnone = RHASH_IFNONE(hash);
1885     if (FL_TEST(hash, HASH_PROC_DEFAULT)) {
1886 	if (argc == 0) return Qnil;
1887 	args[0] = hash;
1888 	args[1] = argv[0];
1889 	return rb_funcallv(ifnone, id_yield, 2, args);
1890     }
1891     return ifnone;
1892 }
1893 
1894 /*
1895  *  call-seq:
1896  *     hsh.default = obj     -> obj
1897  *
1898  *  Sets the default value, the value returned for a key that does not
1899  *  exist in the hash. It is not possible to set the default to a
1900  *  <code>Proc</code> that will be executed on each key lookup.
1901  *
1902  *     h = { "a" => 100, "b" => 200 }
1903  *     h.default = "Go fish"
1904  *     h["a"]     #=> 100
1905  *     h["z"]     #=> "Go fish"
1906  *     # This doesn't do what you might hope...
1907  *     h.default = proc do |hash, key|
1908  *       hash[key] = key + key
1909  *     end
1910  *     h[2]       #=> #<Proc:0x401b3948@-:6>
1911  *     h["cat"]   #=> #<Proc:0x401b3948@-:6>
1912  */
1913 
1914 static VALUE
rb_hash_set_default(VALUE hash,VALUE ifnone)1915 rb_hash_set_default(VALUE hash, VALUE ifnone)
1916 {
1917     rb_hash_modify_check(hash);
1918     SET_DEFAULT(hash, ifnone);
1919     return ifnone;
1920 }
1921 
1922 /*
1923  *  call-seq:
1924  *     hsh.default_proc -> anObject
1925  *
1926  *  If <code>Hash::new</code> was invoked with a block, return that
1927  *  block, otherwise return <code>nil</code>.
1928  *
1929  *     h = Hash.new {|h,k| h[k] = k*k }   #=> {}
1930  *     p = h.default_proc                 #=> #<Proc:0x401b3d08@-:1>
1931  *     a = []                             #=> []
1932  *     p.call(a, 2)
1933  *     a                                  #=> [nil, nil, 4]
1934  */
1935 
1936 
1937 static VALUE
rb_hash_default_proc(VALUE hash)1938 rb_hash_default_proc(VALUE hash)
1939 {
1940     if (FL_TEST(hash, HASH_PROC_DEFAULT)) {
1941 	return RHASH_IFNONE(hash);
1942     }
1943     return Qnil;
1944 }
1945 
1946 /*
1947  *  call-seq:
1948  *     hsh.default_proc = proc_obj or nil
1949  *
1950  *  Sets the default proc to be executed on each failed key lookup.
1951  *
1952  *     h.default_proc = proc do |hash, key|
1953  *       hash[key] = key + key
1954  *     end
1955  *     h[2]       #=> 4
1956  *     h["cat"]   #=> "catcat"
1957  */
1958 
1959 VALUE
rb_hash_set_default_proc(VALUE hash,VALUE proc)1960 rb_hash_set_default_proc(VALUE hash, VALUE proc)
1961 {
1962     VALUE b;
1963 
1964     rb_hash_modify_check(hash);
1965     if (NIL_P(proc)) {
1966 	SET_DEFAULT(hash, proc);
1967 	return proc;
1968     }
1969     b = rb_check_convert_type_with_id(proc, T_DATA, "Proc", idTo_proc);
1970     if (NIL_P(b) || !rb_obj_is_proc(b)) {
1971 	rb_raise(rb_eTypeError,
1972 		 "wrong default_proc type %s (expected Proc)",
1973 		 rb_obj_classname(proc));
1974     }
1975     proc = b;
1976     SET_PROC_DEFAULT(hash, proc);
1977     return proc;
1978 }
1979 
1980 static int
key_i(VALUE key,VALUE value,VALUE arg)1981 key_i(VALUE key, VALUE value, VALUE arg)
1982 {
1983     VALUE *args = (VALUE *)arg;
1984 
1985     if (rb_equal(value, args[0])) {
1986 	args[1] = key;
1987 	return ST_STOP;
1988     }
1989     return ST_CONTINUE;
1990 }
1991 
1992 /*
1993  *  call-seq:
1994  *     hsh.key(value)    -> key
1995  *
1996  *  Returns the key of an occurrence of a given value. If the value is
1997  *  not found, returns <code>nil</code>.
1998  *
1999  *     h = { "a" => 100, "b" => 200, "c" => 300, "d" => 300 }
2000  *     h.key(200)   #=> "b"
2001  *     h.key(300)   #=> "c"
2002  *     h.key(999)   #=> nil
2003  *
2004  */
2005 
2006 static VALUE
rb_hash_key(VALUE hash,VALUE value)2007 rb_hash_key(VALUE hash, VALUE value)
2008 {
2009     VALUE args[2];
2010 
2011     args[0] = value;
2012     args[1] = Qnil;
2013 
2014     rb_hash_foreach(hash, key_i, (VALUE)args);
2015 
2016     return args[1];
2017 }
2018 
2019 /* :nodoc: */
2020 static VALUE
rb_hash_index(VALUE hash,VALUE value)2021 rb_hash_index(VALUE hash, VALUE value)
2022 {
2023     rb_warn("Hash#index is deprecated; use Hash#key");
2024     return rb_hash_key(hash, value);
2025 }
2026 
2027 int
rb_hash_stlike_delete(VALUE hash,st_data_t * pkey,st_data_t * pval)2028 rb_hash_stlike_delete(VALUE hash, st_data_t *pkey, st_data_t *pval)
2029 {
2030     if (RHASH_AR_TABLE_P(hash)) {
2031         return ar_delete(hash, pkey, pval);
2032     }
2033     else {
2034         return st_delete(RHASH_ST_TABLE(hash), pkey, pval);
2035     }
2036 }
2037 
2038 /*
2039  * delete a specified entry a given key.
2040  * if there is the corresponding entry, return a value of the entry.
2041  * if there is no corresponding entry, return Qundef.
2042  */
2043 VALUE
rb_hash_delete_entry(VALUE hash,VALUE key)2044 rb_hash_delete_entry(VALUE hash, VALUE key)
2045 {
2046     st_data_t ktmp = (st_data_t)key, val;
2047 
2048     if (rb_hash_stlike_delete(hash, &ktmp, &val)) {
2049         return (VALUE)val;
2050     }
2051     else {
2052         return Qundef;
2053     }
2054 }
2055 
2056 /*
2057  * delete a specified entry by a given key.
2058  * if there is the corresponding entry, return a value of the entry.
2059  * if there is no corresponding entry, return Qnil.
2060  */
2061 VALUE
rb_hash_delete(VALUE hash,VALUE key)2062 rb_hash_delete(VALUE hash, VALUE key)
2063 {
2064     VALUE deleted_value = rb_hash_delete_entry(hash, key);
2065 
2066     if (deleted_value != Qundef) { /* likely pass */
2067 	return deleted_value;
2068     }
2069     else {
2070 	return Qnil;
2071     }
2072 }
2073 
2074 /*
2075  *  call-seq:
2076  *     hsh.delete(key)                   -> value
2077  *     hsh.delete(key) {| key | block }  -> value
2078  *
2079  *  Deletes the key-value pair and returns the value from <i>hsh</i> whose
2080  *  key is equal to <i>key</i>. If the key is not found, it returns
2081  *  <em>nil</em>. If the optional code block is given and the
2082  *  key is not found, pass in the key and return the result of
2083  *  <i>block</i>.
2084  *
2085  *     h = { "a" => 100, "b" => 200 }
2086  *     h.delete("a")                              #=> 100
2087  *     h.delete("z")                              #=> nil
2088  *     h.delete("z") { |el| "#{el} not found" }   #=> "z not found"
2089  *
2090  */
2091 
2092 static VALUE
rb_hash_delete_m(VALUE hash,VALUE key)2093 rb_hash_delete_m(VALUE hash, VALUE key)
2094 {
2095     VALUE val;
2096 
2097     rb_hash_modify_check(hash);
2098     val = rb_hash_delete_entry(hash, key);
2099 
2100     if (val != Qundef) {
2101 	return val;
2102     }
2103     else {
2104 	if (rb_block_given_p()) {
2105 	    return rb_yield(key);
2106 	}
2107 	else {
2108 	    return Qnil;
2109 	}
2110     }
2111 }
2112 
2113 struct shift_var {
2114     VALUE key;
2115     VALUE val;
2116 };
2117 
2118 static int
shift_i_safe(VALUE key,VALUE value,VALUE arg)2119 shift_i_safe(VALUE key, VALUE value, VALUE arg)
2120 {
2121     struct shift_var *var = (struct shift_var *)arg;
2122 
2123     var->key = key;
2124     var->val = value;
2125     return ST_STOP;
2126 }
2127 
2128 /*
2129  *  call-seq:
2130  *     hsh.shift -> anArray or obj
2131  *
2132  *  Removes a key-value pair from <i>hsh</i> and returns it as the
2133  *  two-item array <code>[</code> <i>key, value</i> <code>]</code>, or
2134  *  the hash's default value if the hash is empty.
2135  *
2136  *     h = { 1 => "a", 2 => "b", 3 => "c" }
2137  *     h.shift   #=> [1, "a"]
2138  *     h         #=> {2=>"b", 3=>"c"}
2139  */
2140 
2141 static VALUE
rb_hash_shift(VALUE hash)2142 rb_hash_shift(VALUE hash)
2143 {
2144     struct shift_var var;
2145 
2146     rb_hash_modify_check(hash);
2147     if (RHASH_AR_TABLE_P(hash)) {
2148 	var.key = Qundef;
2149 	if (RHASH_ITER_LEV(hash) == 0) {
2150             if (ar_shift(hash, &var.key, &var.val)) {
2151 		return rb_assoc_new(var.key, var.val);
2152 	    }
2153 	}
2154 	else {
2155             rb_hash_foreach(hash, shift_i_safe, (VALUE)&var);
2156             if (var.key != Qundef) {
2157                 rb_hash_delete_entry(hash, var.key);
2158                 return rb_assoc_new(var.key, var.val);
2159             }
2160         }
2161     }
2162     if (RHASH_ST_TABLE_P(hash)) {
2163         var.key = Qundef;
2164         if (RHASH_ITER_LEV(hash) == 0) {
2165             if (st_shift(RHASH_ST_TABLE(hash), &var.key, &var.val)) {
2166                 return rb_assoc_new(var.key, var.val);
2167             }
2168         }
2169         else {
2170 	    rb_hash_foreach(hash, shift_i_safe, (VALUE)&var);
2171 	    if (var.key != Qundef) {
2172 		rb_hash_delete_entry(hash, var.key);
2173 		return rb_assoc_new(var.key, var.val);
2174 	    }
2175 	}
2176     }
2177     return rb_hash_default_value(hash, Qnil);
2178 }
2179 
2180 static int
delete_if_i(VALUE key,VALUE value,VALUE hash)2181 delete_if_i(VALUE key, VALUE value, VALUE hash)
2182 {
2183     if (RTEST(rb_yield_values(2, key, value))) {
2184 	return ST_DELETE;
2185     }
2186     return ST_CONTINUE;
2187 }
2188 
2189 static VALUE
hash_enum_size(VALUE hash,VALUE args,VALUE eobj)2190 hash_enum_size(VALUE hash, VALUE args, VALUE eobj)
2191 {
2192     return rb_hash_size(hash);
2193 }
2194 
2195 /*
2196  *  call-seq:
2197  *     hsh.delete_if {| key, value | block }  -> hsh
2198  *     hsh.delete_if                          -> an_enumerator
2199  *
2200  *  Deletes every key-value pair from <i>hsh</i> for which <i>block</i>
2201  *  evaluates to <code>true</code>.
2202  *
2203  *  If no block is given, an enumerator is returned instead.
2204  *
2205  *     h = { "a" => 100, "b" => 200, "c" => 300 }
2206  *     h.delete_if {|key, value| key >= "b" }   #=> {"a"=>100}
2207  *
2208  */
2209 
2210 VALUE
rb_hash_delete_if(VALUE hash)2211 rb_hash_delete_if(VALUE hash)
2212 {
2213     RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size);
2214     rb_hash_modify_check(hash);
2215     if (!RHASH_TABLE_EMPTY_P(hash)) {
2216         rb_hash_foreach(hash, delete_if_i, hash);
2217     }
2218     return hash;
2219 }
2220 
2221 /*
2222  *  call-seq:
2223  *     hsh.reject! {| key, value | block }  -> hsh or nil
2224  *     hsh.reject!                          -> an_enumerator
2225  *
2226  *  Equivalent to <code>Hash#delete_if</code>, but returns
2227  *  <code>nil</code> if no changes were made.
2228  */
2229 
2230 VALUE
rb_hash_reject_bang(VALUE hash)2231 rb_hash_reject_bang(VALUE hash)
2232 {
2233     st_index_t n;
2234 
2235     RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size);
2236     rb_hash_modify(hash);
2237     n = RHASH_SIZE(hash);
2238     if (!n) return Qnil;
2239     rb_hash_foreach(hash, delete_if_i, hash);
2240     if (n == RHASH_SIZE(hash)) return Qnil;
2241     return hash;
2242 }
2243 
2244 static int
reject_i(VALUE key,VALUE value,VALUE result)2245 reject_i(VALUE key, VALUE value, VALUE result)
2246 {
2247     if (!RTEST(rb_yield_values(2, key, value))) {
2248 	rb_hash_aset(result, key, value);
2249     }
2250     return ST_CONTINUE;
2251 }
2252 
2253 /*
2254  *  call-seq:
2255  *     hsh.reject {|key, value| block}   -> a_hash
2256  *     hsh.reject                        -> an_enumerator
2257  *
2258  *  Returns a new hash consisting of entries for which the block returns false.
2259  *
2260  *  If no block is given, an enumerator is returned instead.
2261  *
2262  *     h = { "a" => 100, "b" => 200, "c" => 300 }
2263  *     h.reject {|k,v| k < "b"}  #=> {"b" => 200, "c" => 300}
2264  *     h.reject {|k,v| v > 100}  #=> {"a" => 100}
2265  */
2266 
2267 VALUE
rb_hash_reject(VALUE hash)2268 rb_hash_reject(VALUE hash)
2269 {
2270     VALUE result;
2271 
2272     RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size);
2273     if (RTEST(ruby_verbose)) {
2274 	VALUE klass;
2275 	if (HAS_EXTRA_STATES(hash, klass)) {
2276 	    rb_warn("extra states are no longer copied: %+"PRIsVALUE, hash);
2277 	}
2278     }
2279     result = rb_hash_new();
2280     if (!RHASH_EMPTY_P(hash)) {
2281 	rb_hash_foreach(hash, reject_i, result);
2282     }
2283     return result;
2284 }
2285 
2286 /*
2287  *  call-seq:
2288  *     hsh.slice(*keys) -> a_hash
2289  *
2290  *  Returns a hash containing only the given keys and their values.
2291  *
2292  *     h = { a: 100, b: 200, c: 300 }
2293  *     h.slice(:a)           #=> {:a=>100}
2294  *     h.slice(:b, :c, :d)   #=> {:b=>200, :c=>300}
2295  */
2296 
2297 static VALUE
rb_hash_slice(int argc,VALUE * argv,VALUE hash)2298 rb_hash_slice(int argc, VALUE *argv, VALUE hash)
2299 {
2300     int i;
2301     VALUE key, value, result;
2302 
2303     if (argc == 0 || RHASH_EMPTY_P(hash)) {
2304 	return rb_hash_new();
2305     }
2306     result = rb_hash_new_with_size(argc);
2307 
2308     for (i = 0; i < argc; i++) {
2309 	key = argv[i];
2310 	value = rb_hash_lookup2(hash, key, Qundef);
2311 	if (value != Qundef)
2312 	    rb_hash_aset(result, key, value);
2313     }
2314 
2315     return result;
2316 }
2317 
2318 /*
2319  * call-seq:
2320  *   hsh.values_at(key, ...)   -> array
2321  *
2322  * Return an array containing the values associated with the given keys.
2323  * Also see <code>Hash.select</code>.
2324  *
2325  *   h = { "cat" => "feline", "dog" => "canine", "cow" => "bovine" }
2326  *   h.values_at("cow", "cat")  #=> ["bovine", "feline"]
2327  */
2328 
2329 VALUE
rb_hash_values_at(int argc,VALUE * argv,VALUE hash)2330 rb_hash_values_at(int argc, VALUE *argv, VALUE hash)
2331 {
2332     VALUE result = rb_ary_new2(argc);
2333     long i;
2334 
2335     for (i=0; i<argc; i++) {
2336 	rb_ary_push(result, rb_hash_aref(hash, argv[i]));
2337     }
2338     return result;
2339 }
2340 
2341 /*
2342  * call-seq:
2343  *   hsh.fetch_values(key, ...)                 -> array
2344  *   hsh.fetch_values(key, ...) { |key| block } -> array
2345  *
2346  * Returns an array containing the values associated with the given keys
2347  * but also raises <code>KeyError</code> when one of keys can't be found.
2348  * Also see <code>Hash#values_at</code> and <code>Hash#fetch</code>.
2349  *
2350  *   h = { "cat" => "feline", "dog" => "canine", "cow" => "bovine" }
2351  *
2352  *   h.fetch_values("cow", "cat")                   #=> ["bovine", "feline"]
2353  *   h.fetch_values("cow", "bird")                  # raises KeyError
2354  *   h.fetch_values("cow", "bird") { |k| k.upcase } #=> ["bovine", "BIRD"]
2355  */
2356 
2357 VALUE
rb_hash_fetch_values(int argc,VALUE * argv,VALUE hash)2358 rb_hash_fetch_values(int argc, VALUE *argv, VALUE hash)
2359 {
2360     VALUE result = rb_ary_new2(argc);
2361     long i;
2362 
2363     for (i=0; i<argc; i++) {
2364 	rb_ary_push(result, rb_hash_fetch(hash, argv[i]));
2365     }
2366     return result;
2367 }
2368 
2369 static int
select_i(VALUE key,VALUE value,VALUE result)2370 select_i(VALUE key, VALUE value, VALUE result)
2371 {
2372     if (RTEST(rb_yield_values(2, key, value))) {
2373 	rb_hash_aset(result, key, value);
2374     }
2375     return ST_CONTINUE;
2376 }
2377 
2378 /*
2379  *  call-seq:
2380  *     hsh.select {|key, value| block}   -> a_hash
2381  *     hsh.select                        -> an_enumerator
2382  *     hsh.filter {|key, value| block}   -> a_hash
2383  *     hsh.filter                        -> an_enumerator
2384  *
2385  *  Returns a new hash consisting of entries for which the block returns true.
2386  *
2387  *  If no block is given, an enumerator is returned instead.
2388  *
2389  *     h = { "a" => 100, "b" => 200, "c" => 300 }
2390  *     h.select {|k,v| k > "a"}  #=> {"b" => 200, "c" => 300}
2391  *     h.select {|k,v| v < 200}  #=> {"a" => 100}
2392  *
2393  *  Hash#filter is an alias for Hash#select.
2394  */
2395 
2396 VALUE
rb_hash_select(VALUE hash)2397 rb_hash_select(VALUE hash)
2398 {
2399     VALUE result;
2400 
2401     RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size);
2402     result = rb_hash_new();
2403     if (!RHASH_EMPTY_P(hash)) {
2404 	rb_hash_foreach(hash, select_i, result);
2405     }
2406     return result;
2407 }
2408 
2409 static int
keep_if_i(VALUE key,VALUE value,VALUE hash)2410 keep_if_i(VALUE key, VALUE value, VALUE hash)
2411 {
2412     if (!RTEST(rb_yield_values(2, key, value))) {
2413 	return ST_DELETE;
2414     }
2415     return ST_CONTINUE;
2416 }
2417 
2418 /*
2419  *  call-seq:
2420  *     hsh.select! {| key, value | block }  -> hsh or nil
2421  *     hsh.select!                          -> an_enumerator
2422  *     hsh.filter! {| key, value | block }  -> hsh or nil
2423  *     hsh.filter!                          -> an_enumerator
2424  *
2425  *  Equivalent to Hash#keep_if, but returns
2426  *  +nil+ if no changes were made.
2427  *
2428  *  Hash#filter! is an alias for Hash#select!.
2429  */
2430 
2431 VALUE
rb_hash_select_bang(VALUE hash)2432 rb_hash_select_bang(VALUE hash)
2433 {
2434     st_index_t n;
2435 
2436     RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size);
2437     rb_hash_modify_check(hash);
2438     n = RHASH_SIZE(hash);
2439     if (!n) return Qnil;
2440     rb_hash_foreach(hash, keep_if_i, hash);
2441     if (n == RHASH_SIZE(hash)) return Qnil;
2442     return hash;
2443 }
2444 
2445 /*
2446  *  call-seq:
2447  *     hsh.keep_if {| key, value | block }  -> hsh
2448  *     hsh.keep_if                          -> an_enumerator
2449  *
2450  *  Deletes every key-value pair from <i>hsh</i> for which <i>block</i>
2451  *  evaluates to +false+.
2452  *
2453  *  If no block is given, an enumerator is returned instead.
2454  *
2455  *  See also Hash#select!.
2456  */
2457 
2458 VALUE
rb_hash_keep_if(VALUE hash)2459 rb_hash_keep_if(VALUE hash)
2460 {
2461     RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size);
2462     rb_hash_modify_check(hash);
2463     if (!RHASH_TABLE_EMPTY_P(hash)) {
2464         rb_hash_foreach(hash, keep_if_i, hash);
2465     }
2466     return hash;
2467 }
2468 
2469 static int
clear_i(VALUE key,VALUE value,VALUE dummy)2470 clear_i(VALUE key, VALUE value, VALUE dummy)
2471 {
2472     return ST_DELETE;
2473 }
2474 
2475 /*
2476  *  call-seq:
2477  *     hsh.clear -> hsh
2478  *
2479  *  Removes all key-value pairs from <i>hsh</i>.
2480  *
2481  *     h = { "a" => 100, "b" => 200 }   #=> {"a"=>100, "b"=>200}
2482  *     h.clear                          #=> {}
2483  *
2484  */
2485 
2486 VALUE
rb_hash_clear(VALUE hash)2487 rb_hash_clear(VALUE hash)
2488 {
2489     rb_hash_modify_check(hash);
2490 
2491     if (RHASH_ITER_LEV(hash) > 0) {
2492         rb_hash_foreach(hash, clear_i, 0);
2493     }
2494     else if (RHASH_AR_TABLE_P(hash)) {
2495         ar_clear(hash);
2496     }
2497     else {
2498         st_clear(RHASH_ST_TABLE(hash));
2499     }
2500 
2501     return hash;
2502 }
2503 
2504 static int
hash_aset(st_data_t * key,st_data_t * val,struct update_arg * arg,int existing)2505 hash_aset(st_data_t *key, st_data_t *val, struct update_arg *arg, int existing)
2506 {
2507     if (existing) {
2508 	arg->new_value = arg->arg;
2509 	arg->old_value = *val;
2510     }
2511     else {
2512 	arg->new_key = *key;
2513 	arg->new_value = arg->arg;
2514     }
2515     *val = arg->arg;
2516     return ST_CONTINUE;
2517 }
2518 
2519 VALUE
rb_hash_key_str(VALUE key)2520 rb_hash_key_str(VALUE key)
2521 {
2522     if (!RB_FL_ANY_RAW(key, FL_TAINT|FL_EXIVAR) && RBASIC_CLASS(key) == rb_cString) {
2523         return rb_fstring(key);
2524     }
2525     else {
2526 	return rb_str_new_frozen(key);
2527     }
2528 }
2529 
2530 static int
hash_aset_str(st_data_t * key,st_data_t * val,struct update_arg * arg,int existing)2531 hash_aset_str(st_data_t *key, st_data_t *val, struct update_arg *arg, int existing)
2532 {
2533     if (!existing && !RB_OBJ_FROZEN(*key)) {
2534 	*key = rb_hash_key_str(*key);
2535     }
2536     return hash_aset(key, val, arg, existing);
2537 }
2538 
2539 NOINSERT_UPDATE_CALLBACK(hash_aset)
NOINSERT_UPDATE_CALLBACK(hash_aset_str)2540 NOINSERT_UPDATE_CALLBACK(hash_aset_str)
2541 
2542 /*
2543  *  call-seq:
2544  *     hsh[key] = value        -> value
2545  *     hsh.store(key, value)   -> value
2546  *
2547  *  == Element Assignment
2548  *
2549  *  Associates the value given by +value+ with the key given by +key+.
2550  *
2551  *     h = { "a" => 100, "b" => 200 }
2552  *     h["a"] = 9
2553  *     h["c"] = 4
2554  *     h   #=> {"a"=>9, "b"=>200, "c"=>4}
2555  *     h.store("d", 42) #=> 42
2556  *     h   #=> {"a"=>9, "b"=>200, "c"=>4, "d"=>42}
2557  *
2558  *  +key+ should not have its value changed while it is in use as a key (an
2559  *  <tt>unfrozen String</tt> passed as a key will be duplicated and frozen).
2560  *
2561  *     a = "a"
2562  *     b = "b".freeze
2563  *     h = { a => 100, b => 200 }
2564  *     h.key(100).equal? a #=> false
2565  *     h.key(200).equal? b #=> true
2566  *
2567  */
2568 
2569 VALUE
2570 rb_hash_aset(VALUE hash, VALUE key, VALUE val)
2571 {
2572     int iter_lev = RHASH_ITER_LEV(hash);
2573 
2574     rb_hash_modify(hash);
2575 
2576     if (RHASH_TABLE_NULL_P(hash)) {
2577 	if (iter_lev > 0) no_new_key();
2578         ar_alloc_table(hash);
2579     }
2580 
2581     if (RHASH_TYPE(hash) == &identhash || rb_obj_class(key) != rb_cString) {
2582 	RHASH_UPDATE_ITER(hash, iter_lev, key, hash_aset, val);
2583     }
2584     else {
2585 	RHASH_UPDATE_ITER(hash, iter_lev, key, hash_aset_str, val);
2586     }
2587     return val;
2588 }
2589 
2590 static int
replace_i(VALUE key,VALUE val,VALUE hash)2591 replace_i(VALUE key, VALUE val, VALUE hash)
2592 {
2593     rb_hash_aset(hash, key, val);
2594 
2595     return ST_CONTINUE;
2596 }
2597 
2598 /* :nodoc: */
2599 static VALUE
rb_hash_initialize_copy(VALUE hash,VALUE hash2)2600 rb_hash_initialize_copy(VALUE hash, VALUE hash2)
2601 {
2602     rb_hash_modify_check(hash);
2603     hash2 = to_hash(hash2);
2604 
2605     Check_Type(hash2, T_HASH);
2606 
2607     if (hash == hash2) return hash;
2608 
2609     if (RHASH_AR_TABLE_P(hash2)) {
2610         if (RHASH_AR_TABLE_P(hash)) ar_free_and_clear_table(hash);
2611         ar_copy(hash, hash2);
2612         if (RHASH_AR_TABLE_SIZE(hash))
2613 	    rb_hash_rehash(hash);
2614     }
2615     else if (RHASH_ST_TABLE_P(hash2)) {
2616         if (RHASH_ST_TABLE_P(hash)) st_free_table(RHASH_ST_TABLE(hash));
2617         RHASH_ST_TABLE_SET(hash, st_copy(RHASH_ST_TABLE(hash2)));
2618         if (RHASH_ST_TABLE(hash)->num_entries)
2619             rb_hash_rehash(hash);
2620     }
2621     else if (RHASH_AR_TABLE_P(hash)) {
2622         ar_clear(hash);
2623     }
2624     else if (RHASH_ST_TABLE_P(hash)) {
2625         st_clear(RHASH_ST_TABLE(hash));
2626     }
2627 
2628     COPY_DEFAULT(hash, hash2);
2629 
2630     return hash;
2631 }
2632 
2633 /*
2634  *  call-seq:
2635  *     hsh.replace(other_hash) -> hsh
2636  *
2637  *  Replaces the contents of <i>hsh</i> with the contents of
2638  *  <i>other_hash</i>.
2639  *
2640  *     h = { "a" => 100, "b" => 200 }
2641  *     h.replace({ "c" => 300, "d" => 400 })   #=> {"c"=>300, "d"=>400}
2642  *
2643  */
2644 
2645 static VALUE
rb_hash_replace(VALUE hash,VALUE hash2)2646 rb_hash_replace(VALUE hash, VALUE hash2)
2647 {
2648     rb_hash_modify_check(hash);
2649     if (hash == hash2) return hash;
2650     hash2 = to_hash(hash2);
2651 
2652     COPY_DEFAULT(hash, hash2);
2653 
2654     rb_hash_clear(hash);
2655 
2656     if (RHASH_AR_TABLE_P(hash)) {
2657         if (RHASH_AR_TABLE_P(hash2)) {
2658             ar_copy(hash, hash2);
2659         }
2660         else {
2661             goto st_to_st;
2662         }
2663     }
2664     else {
2665         if (RHASH_AR_TABLE_P(hash2)) ar_force_convert_table(hash2, __FILE__, __LINE__);
2666       st_to_st:
2667         RHASH_TBL_RAW(hash)->type = RHASH_ST_TABLE(hash2)->type;
2668         rb_hash_foreach(hash2, replace_i, hash);
2669     }
2670 
2671     return hash;
2672 }
2673 
2674 /*
2675  *  call-seq:
2676  *     hsh.length    ->  integer
2677  *     hsh.size      ->  integer
2678  *
2679  *  Returns the number of key-value pairs in the hash.
2680  *
2681  *     h = { "d" => 100, "a" => 200, "v" => 300, "e" => 400 }
2682  *     h.size          #=> 4
2683  *     h.delete("a")   #=> 200
2684  *     h.size          #=> 3
2685  *     h.length        #=> 3
2686  *
2687  *  Hash#length is an alias for Hash#size.
2688  */
2689 
2690 VALUE
rb_hash_size(VALUE hash)2691 rb_hash_size(VALUE hash)
2692 {
2693     return INT2FIX(RHASH_SIZE(hash));
2694 }
2695 
2696 size_t
rb_hash_size_num(VALUE hash)2697 rb_hash_size_num(VALUE hash)
2698 {
2699     return (long)RHASH_SIZE(hash);
2700 }
2701 
2702 /*
2703  *  call-seq:
2704  *     hsh.empty?    -> true or false
2705  *
2706  *  Returns <code>true</code> if <i>hsh</i> contains no key-value pairs.
2707  *
2708  *     {}.empty?   #=> true
2709  *
2710  */
2711 
2712 static VALUE
rb_hash_empty_p(VALUE hash)2713 rb_hash_empty_p(VALUE hash)
2714 {
2715     return RHASH_EMPTY_P(hash) ? Qtrue : Qfalse;
2716 }
2717 
2718 static int
each_value_i(VALUE key,VALUE value)2719 each_value_i(VALUE key, VALUE value)
2720 {
2721     rb_yield(value);
2722     return ST_CONTINUE;
2723 }
2724 
2725 /*
2726  *  call-seq:
2727  *     hsh.each_value {| value | block } -> hsh
2728  *     hsh.each_value                    -> an_enumerator
2729  *
2730  *  Calls <i>block</i> once for each key in <i>hsh</i>, passing the
2731  *  value as a parameter.
2732  *
2733  *  If no block is given, an enumerator is returned instead.
2734  *
2735  *     h = { "a" => 100, "b" => 200 }
2736  *     h.each_value {|value| puts value }
2737  *
2738  *  <em>produces:</em>
2739  *
2740  *     100
2741  *     200
2742  */
2743 
2744 static VALUE
rb_hash_each_value(VALUE hash)2745 rb_hash_each_value(VALUE hash)
2746 {
2747     RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size);
2748     rb_hash_foreach(hash, each_value_i, 0);
2749     return hash;
2750 }
2751 
2752 static int
each_key_i(VALUE key,VALUE value)2753 each_key_i(VALUE key, VALUE value)
2754 {
2755     rb_yield(key);
2756     return ST_CONTINUE;
2757 }
2758 
2759 /*
2760  *  call-seq:
2761  *     hsh.each_key {| key | block } -> hsh
2762  *     hsh.each_key                  -> an_enumerator
2763  *
2764  *  Calls <i>block</i> once for each key in <i>hsh</i>, passing the key
2765  *  as a parameter.
2766  *
2767  *  If no block is given, an enumerator is returned instead.
2768  *
2769  *     h = { "a" => 100, "b" => 200 }
2770  *     h.each_key {|key| puts key }
2771  *
2772  *  <em>produces:</em>
2773  *
2774  *     a
2775  *     b
2776  */
2777 static VALUE
rb_hash_each_key(VALUE hash)2778 rb_hash_each_key(VALUE hash)
2779 {
2780     RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size);
2781     rb_hash_foreach(hash, each_key_i, 0);
2782     return hash;
2783 }
2784 
2785 static int
each_pair_i(VALUE key,VALUE value)2786 each_pair_i(VALUE key, VALUE value)
2787 {
2788     rb_yield(rb_assoc_new(key, value));
2789     return ST_CONTINUE;
2790 }
2791 
2792 static int
each_pair_i_fast(VALUE key,VALUE value)2793 each_pair_i_fast(VALUE key, VALUE value)
2794 {
2795     VALUE argv[2];
2796     argv[0] = key;
2797     argv[1] = value;
2798     rb_yield_values2(2, argv);
2799     return ST_CONTINUE;
2800 }
2801 
2802 /*
2803  *  call-seq:
2804  *     hsh.each      {| key, value | block } -> hsh
2805  *     hsh.each_pair {| key, value | block } -> hsh
2806  *     hsh.each                              -> an_enumerator
2807  *     hsh.each_pair                         -> an_enumerator
2808  *
2809  *  Calls <i>block</i> once for each key in <i>hsh</i>, passing the key-value
2810  *  pair as parameters.
2811  *
2812  *  If no block is given, an enumerator is returned instead.
2813  *
2814  *     h = { "a" => 100, "b" => 200 }
2815  *     h.each {|key, value| puts "#{key} is #{value}" }
2816  *
2817  *  <em>produces:</em>
2818  *
2819  *     a is 100
2820  *     b is 200
2821  *
2822  */
2823 
2824 static VALUE
rb_hash_each_pair(VALUE hash)2825 rb_hash_each_pair(VALUE hash)
2826 {
2827     RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size);
2828     if (rb_block_arity() > 1)
2829 	rb_hash_foreach(hash, each_pair_i_fast, 0);
2830     else
2831 	rb_hash_foreach(hash, each_pair_i, 0);
2832     return hash;
2833 }
2834 
2835 static int
transform_keys_i(VALUE key,VALUE value,VALUE result)2836 transform_keys_i(VALUE key, VALUE value, VALUE result)
2837 {
2838     VALUE new_key = rb_yield(key);
2839     rb_hash_aset(result, new_key, value);
2840     return ST_CONTINUE;
2841 }
2842 
2843 /*
2844  *  call-seq:
2845  *     hsh.transform_keys {|key| block } -> new_hash
2846  *     hsh.transform_keys                -> an_enumerator
2847  *
2848  *  Returns a new hash with the results of running the block once for
2849  *  every key.
2850  *  This method does not change the values.
2851  *
2852  *     h = { a: 1, b: 2, c: 3 }
2853  *     h.transform_keys {|k| k.to_s }  #=> { "a" => 1, "b" => 2, "c" => 3 }
2854  *     h.transform_keys(&:to_s)        #=> { "a" => 1, "b" => 2, "c" => 3 }
2855  *     h.transform_keys.with_index {|k, i| "#{k}.#{i}" }
2856  *                                     #=> { "a.0" => 1, "b.1" => 2, "c.2" => 3 }
2857  *
2858  *  If no block is given, an enumerator is returned instead.
2859  */
2860 static VALUE
rb_hash_transform_keys(VALUE hash)2861 rb_hash_transform_keys(VALUE hash)
2862 {
2863     VALUE result;
2864 
2865     RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size);
2866     result = rb_hash_new();
2867     if (!RHASH_EMPTY_P(hash)) {
2868         rb_hash_foreach(hash, transform_keys_i, result);
2869     }
2870 
2871     return result;
2872 }
2873 
2874 static VALUE rb_hash_flatten(int argc, VALUE *argv, VALUE hash);
2875 
2876 /*
2877  *  call-seq:
2878  *     hsh.transform_keys! {|key| block } -> hsh
2879  *     hsh.transform_keys!                -> an_enumerator
2880  *
2881  *  Invokes the given block once for each key in <i>hsh</i>, replacing it
2882  *  with the new key returned by the block, and then returns <i>hsh</i>.
2883  *  This method does not change the values.
2884  *
2885  *     h = { a: 1, b: 2, c: 3 }
2886  *     h.transform_keys! {|k| k.to_s }  #=> { "a" => 1, "b" => 2, "c" => 3 }
2887  *     h.transform_keys!(&:to_sym)      #=> { a: 1, b: 2, c: 3 }
2888  *     h.transform_keys!.with_index {|k, i| "#{k}.#{i}" }
2889  *                                      #=> { "a.0" => 1, "b.1" => 2, "c.2" => 3 }
2890  *
2891  *  If no block is given, an enumerator is returned instead.
2892  */
2893 static VALUE
rb_hash_transform_keys_bang(VALUE hash)2894 rb_hash_transform_keys_bang(VALUE hash)
2895 {
2896     RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size);
2897     rb_hash_modify_check(hash);
2898     if (!RHASH_TABLE_EMPTY_P(hash)) {
2899         long i;
2900         VALUE pairs = rb_hash_flatten(0, NULL, hash);
2901         rb_hash_clear(hash);
2902         for (i = 0; i < RARRAY_LEN(pairs); i += 2) {
2903             VALUE key = RARRAY_AREF(pairs, i), new_key = rb_yield(key),
2904                   val = RARRAY_AREF(pairs, i+1);
2905             rb_hash_aset(hash, new_key, val);
2906         }
2907     }
2908     return hash;
2909 }
2910 
2911 static int
transform_values_i(VALUE key,VALUE value,VALUE result)2912 transform_values_i(VALUE key, VALUE value, VALUE result)
2913 {
2914     VALUE new_value = rb_yield(value);
2915     rb_hash_aset(result, key, new_value);
2916     return ST_CONTINUE;
2917 }
2918 
2919 /*
2920  *  call-seq:
2921  *     hsh.transform_values {|value| block } -> new_hash
2922  *     hsh.transform_values                  -> an_enumerator
2923  *
2924  *  Returns a new hash with the results of running the block once for
2925  *  every value.
2926  *  This method does not change the keys.
2927  *
2928  *     h = { a: 1, b: 2, c: 3 }
2929  *     h.transform_values {|v| v * v + 1 }  #=> { a: 2, b: 5, c: 10 }
2930  *     h.transform_values(&:to_s)           #=> { a: "1", b: "2", c: "3" }
2931  *     h.transform_values.with_index {|v, i| "#{v}.#{i}" }
2932  *                                          #=> { a: "1.0", b: "2.1", c: "3.2" }
2933  *
2934  *  If no block is given, an enumerator is returned instead.
2935  */
2936 static VALUE
rb_hash_transform_values(VALUE hash)2937 rb_hash_transform_values(VALUE hash)
2938 {
2939     VALUE result;
2940 
2941     RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size);
2942     result = rb_hash_new_with_size(RHASH_SIZE(hash));
2943     if (!RHASH_EMPTY_P(hash)) {
2944         rb_hash_foreach(hash, transform_values_i, result);
2945     }
2946 
2947     return result;
2948 }
2949 
2950 /*
2951  *  call-seq:
2952  *     hsh.transform_values! {|value| block } -> hsh
2953  *     hsh.transform_values!                  -> an_enumerator
2954  *
2955  *  Invokes the given block once for each value in <i>hsh</i>, replacing it
2956  *  with the new value returned by the block, and then returns <i>hsh</i>.
2957  *  This method does not change the keys.
2958  *
2959  *     h = { a: 1, b: 2, c: 3 }
2960  *     h.transform_values! {|v| v * v + 1 }  #=> { a: 2, b: 5, c: 10 }
2961  *     h.transform_values!(&:to_s)           #=> { a: "2", b: "5", c: "10" }
2962  *     h.transform_values!.with_index {|v, i| "#{v}.#{i}" }
2963  *                                           #=> { a: "2.0", b: "5.1", c: "10.2" }
2964  *
2965  *  If no block is given, an enumerator is returned instead.
2966  */
2967 static VALUE
rb_hash_transform_values_bang(VALUE hash)2968 rb_hash_transform_values_bang(VALUE hash)
2969 {
2970     RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size);
2971     rb_hash_modify_check(hash);
2972     if (!RHASH_TABLE_EMPTY_P(hash))
2973         rb_hash_foreach(hash, transform_values_i, hash);
2974     return hash;
2975 }
2976 
2977 static int
to_a_i(VALUE key,VALUE value,VALUE ary)2978 to_a_i(VALUE key, VALUE value, VALUE ary)
2979 {
2980     rb_ary_push(ary, rb_assoc_new(key, value));
2981     return ST_CONTINUE;
2982 }
2983 
2984 /*
2985  *  call-seq:
2986  *     hsh.to_a -> array
2987  *
2988  *  Converts <i>hsh</i> to a nested array of <code>[</code> <i>key,
2989  *  value</i> <code>]</code> arrays.
2990  *
2991  *     h = { "c" => 300, "a" => 100, "d" => 400, "c" => 300  }
2992  *     h.to_a   #=> [["c", 300], ["a", 100], ["d", 400]]
2993  */
2994 
2995 static VALUE
rb_hash_to_a(VALUE hash)2996 rb_hash_to_a(VALUE hash)
2997 {
2998     VALUE ary;
2999 
3000     ary = rb_ary_new_capa(RHASH_SIZE(hash));
3001     rb_hash_foreach(hash, to_a_i, ary);
3002     OBJ_INFECT(ary, hash);
3003 
3004     return ary;
3005 }
3006 
3007 static int
inspect_i(VALUE key,VALUE value,VALUE str)3008 inspect_i(VALUE key, VALUE value, VALUE str)
3009 {
3010     VALUE str2;
3011 
3012     str2 = rb_inspect(key);
3013     if (RSTRING_LEN(str) > 1) {
3014 	rb_str_buf_cat_ascii(str, ", ");
3015     }
3016     else {
3017 	rb_enc_copy(str, str2);
3018     }
3019     rb_str_buf_append(str, str2);
3020     OBJ_INFECT(str, str2);
3021     rb_str_buf_cat_ascii(str, "=>");
3022     str2 = rb_inspect(value);
3023     rb_str_buf_append(str, str2);
3024     OBJ_INFECT(str, str2);
3025 
3026     return ST_CONTINUE;
3027 }
3028 
3029 static VALUE
inspect_hash(VALUE hash,VALUE dummy,int recur)3030 inspect_hash(VALUE hash, VALUE dummy, int recur)
3031 {
3032     VALUE str;
3033 
3034     if (recur) return rb_usascii_str_new2("{...}");
3035     str = rb_str_buf_new2("{");
3036     rb_hash_foreach(hash, inspect_i, str);
3037     rb_str_buf_cat2(str, "}");
3038     OBJ_INFECT(str, hash);
3039 
3040     return str;
3041 }
3042 
3043 /*
3044  * call-seq:
3045  *   hsh.to_s     -> string
3046  *   hsh.inspect  -> string
3047  *
3048  * Return the contents of this hash as a string.
3049  *
3050  *     h = { "c" => 300, "a" => 100, "d" => 400, "c" => 300  }
3051  *     h.to_s   #=> "{\"c\"=>300, \"a\"=>100, \"d\"=>400}"
3052  */
3053 
3054 static VALUE
rb_hash_inspect(VALUE hash)3055 rb_hash_inspect(VALUE hash)
3056 {
3057     if (RHASH_EMPTY_P(hash))
3058 	return rb_usascii_str_new2("{}");
3059     return rb_exec_recursive(inspect_hash, hash, 0);
3060 }
3061 
3062 /*
3063  * call-seq:
3064  *    hsh.to_hash   => hsh
3065  *
3066  * Returns +self+.
3067  */
3068 
3069 static VALUE
rb_hash_to_hash(VALUE hash)3070 rb_hash_to_hash(VALUE hash)
3071 {
3072     return hash;
3073 }
3074 
3075 VALUE
rb_hash_set_pair(VALUE hash,VALUE arg)3076 rb_hash_set_pair(VALUE hash, VALUE arg)
3077 {
3078     VALUE pair;
3079 
3080     pair = rb_check_array_type(arg);
3081     if (NIL_P(pair)) {
3082         rb_raise(rb_eTypeError, "wrong element type %s (expected array)",
3083                  rb_builtin_class_name(arg));
3084     }
3085     if (RARRAY_LEN(pair) != 2) {
3086         rb_raise(rb_eArgError, "element has wrong array length (expected 2, was %ld)",
3087                  RARRAY_LEN(pair));
3088     }
3089     rb_hash_aset(hash, RARRAY_AREF(pair, 0), RARRAY_AREF(pair, 1));
3090     return hash;
3091 }
3092 
3093 static int
to_h_i(VALUE key,VALUE value,VALUE hash)3094 to_h_i(VALUE key, VALUE value, VALUE hash)
3095 {
3096     rb_hash_set_pair(hash, rb_yield_values(2, key, value));
3097     return ST_CONTINUE;
3098 }
3099 
3100 static VALUE
rb_hash_to_h_block(VALUE hash)3101 rb_hash_to_h_block(VALUE hash)
3102 {
3103     VALUE h = rb_hash_new_with_size(RHASH_SIZE(hash));
3104     rb_hash_foreach(hash, to_h_i, h);
3105     OBJ_INFECT(h, hash);
3106     return h;
3107 }
3108 
3109 /*
3110  *  call-seq:
3111  *     hsh.to_h                         -> hsh or new_hash
3112  *     hsh.to_h {|key, value| block }   -> new_hash
3113  *
3114  *  Returns +self+. If called on a subclass of Hash, converts
3115  *  the receiver to a Hash object.
3116  *
3117  *  If a block is given, the results of the block on each pair of
3118  *  the receiver will be used as pairs.
3119  */
3120 
3121 static VALUE
rb_hash_to_h(VALUE hash)3122 rb_hash_to_h(VALUE hash)
3123 {
3124     if (rb_block_given_p()) {
3125         return rb_hash_to_h_block(hash);
3126     }
3127     if (rb_obj_class(hash) != rb_cHash) {
3128 	const VALUE flags = RBASIC(hash)->flags;
3129 	hash = hash_dup(hash, rb_cHash, flags & HASH_PROC_DEFAULT);
3130     }
3131     return hash;
3132 }
3133 
3134 static int
keys_i(VALUE key,VALUE value,VALUE ary)3135 keys_i(VALUE key, VALUE value, VALUE ary)
3136 {
3137     rb_ary_push(ary, key);
3138     return ST_CONTINUE;
3139 }
3140 
3141 /*
3142  *  call-seq:
3143  *     hsh.keys    -> array
3144  *
3145  *  Returns a new array populated with the keys from this hash. See also
3146  *  <code>Hash#values</code>.
3147  *
3148  *     h = { "a" => 100, "b" => 200, "c" => 300, "d" => 400 }
3149  *     h.keys   #=> ["a", "b", "c", "d"]
3150  *
3151  */
3152 
3153 MJIT_FUNC_EXPORTED VALUE
rb_hash_keys(VALUE hash)3154 rb_hash_keys(VALUE hash)
3155 {
3156     st_index_t size = RHASH_SIZE(hash);
3157     VALUE keys =  rb_ary_new_capa(size);
3158 
3159     if (size == 0) return keys;
3160 
3161     if (ST_DATA_COMPATIBLE_P(VALUE)) {
3162         RARRAY_PTR_USE_TRANSIENT(keys, ptr, {
3163             if (RHASH_AR_TABLE_P(hash)) {
3164                 size = ar_keys(hash, ptr, size);
3165             }
3166             else {
3167                 st_table *table = RHASH_ST_TABLE(hash);
3168                 size = st_keys(table, ptr, size);
3169             }
3170         });
3171         rb_gc_writebarrier_remember(keys);
3172 	rb_ary_set_len(keys, size);
3173     }
3174     else {
3175 	rb_hash_foreach(hash, keys_i, keys);
3176     }
3177 
3178     return keys;
3179 }
3180 
3181 static int
values_i(VALUE key,VALUE value,VALUE ary)3182 values_i(VALUE key, VALUE value, VALUE ary)
3183 {
3184     rb_ary_push(ary, value);
3185     return ST_CONTINUE;
3186 }
3187 
3188 /*
3189  *  call-seq:
3190  *     hsh.values    -> array
3191  *
3192  *  Returns a new array populated with the values from <i>hsh</i>. See
3193  *  also <code>Hash#keys</code>.
3194  *
3195  *     h = { "a" => 100, "b" => 200, "c" => 300 }
3196  *     h.values   #=> [100, 200, 300]
3197  *
3198  */
3199 
3200 VALUE
rb_hash_values(VALUE hash)3201 rb_hash_values(VALUE hash)
3202 {
3203     VALUE values;
3204     st_index_t size = RHASH_SIZE(hash);
3205 
3206     values = rb_ary_new_capa(size);
3207     if (size == 0) return values;
3208 
3209     if (ST_DATA_COMPATIBLE_P(VALUE)) {
3210         if (RHASH_AR_TABLE_P(hash)) {
3211             rb_gc_writebarrier_remember(values);
3212             RARRAY_PTR_USE_TRANSIENT(values, ptr, {
3213                 size = ar_values(hash, ptr, size);
3214             });
3215         }
3216         else if (RHASH_ST_TABLE_P(hash)) {
3217             st_table *table = RHASH_ST_TABLE(hash);
3218             rb_gc_writebarrier_remember(values);
3219             RARRAY_PTR_USE_TRANSIENT(values, ptr, {
3220                 size = st_values(table, ptr, size);
3221             });
3222         }
3223 	rb_ary_set_len(values, size);
3224     }
3225     else {
3226 	rb_hash_foreach(hash, values_i, values);
3227     }
3228 
3229     return values;
3230 }
3231 
3232 /*
3233  *  call-seq:
3234  *     hsh.has_key?(key)    -> true or false
3235  *     hsh.include?(key)    -> true or false
3236  *     hsh.key?(key)        -> true or false
3237  *     hsh.member?(key)     -> true or false
3238  *
3239  *  Returns <code>true</code> if the given key is present in <i>hsh</i>.
3240  *
3241  *     h = { "a" => 100, "b" => 200 }
3242  *     h.has_key?("a")   #=> true
3243  *     h.has_key?("z")   #=> false
3244  *
3245  *  Note that <code>include?</code> and <code>member?</code> do not test member
3246  *  equality using <code>==</code> as do other Enumerables.
3247  *
3248  *  See also Enumerable#include?
3249  */
3250 
3251 MJIT_FUNC_EXPORTED VALUE
rb_hash_has_key(VALUE hash,VALUE key)3252 rb_hash_has_key(VALUE hash, VALUE key)
3253 {
3254     if (RHASH_AR_TABLE_P(hash) && ar_lookup(hash, key, 0)) {
3255         return Qtrue;
3256     }
3257     else if (RHASH_ST_TABLE_P(hash) && st_lookup(RHASH_ST_TABLE(hash), key, 0)) {
3258 	return Qtrue;
3259     }
3260     return Qfalse;
3261 }
3262 
3263 static int
rb_hash_search_value(VALUE key,VALUE value,VALUE arg)3264 rb_hash_search_value(VALUE key, VALUE value, VALUE arg)
3265 {
3266     VALUE *data = (VALUE *)arg;
3267 
3268     if (rb_equal(value, data[1])) {
3269 	data[0] = Qtrue;
3270 	return ST_STOP;
3271     }
3272     return ST_CONTINUE;
3273 }
3274 
3275 /*
3276  *  call-seq:
3277  *     hsh.has_value?(value)    -> true or false
3278  *     hsh.value?(value)        -> true or false
3279  *
3280  *  Returns <code>true</code> if the given value is present for some key
3281  *  in <i>hsh</i>.
3282  *
3283  *     h = { "a" => 100, "b" => 200 }
3284  *     h.value?(100)   #=> true
3285  *     h.value?(999)   #=> false
3286  */
3287 
3288 static VALUE
rb_hash_has_value(VALUE hash,VALUE val)3289 rb_hash_has_value(VALUE hash, VALUE val)
3290 {
3291     VALUE data[2];
3292 
3293     data[0] = Qfalse;
3294     data[1] = val;
3295     rb_hash_foreach(hash, rb_hash_search_value, (VALUE)data);
3296     return data[0];
3297 }
3298 
3299 struct equal_data {
3300     VALUE result;
3301     VALUE hash;
3302     int eql;
3303 };
3304 
3305 static int
eql_i(VALUE key,VALUE val1,VALUE arg)3306 eql_i(VALUE key, VALUE val1, VALUE arg)
3307 {
3308     struct equal_data *data = (struct equal_data *)arg;
3309     st_data_t val2;
3310 
3311     if (RHASH_AR_TABLE_P(data->hash) && !ar_lookup(data->hash, key, &val2)) {
3312 	data->result = Qfalse;
3313 	return ST_STOP;
3314     }
3315     else if (RHASH_ST_TABLE_P(data->hash) && !st_lookup(RHASH_ST_TABLE(data->hash), key, &val2)) {
3316         data->result = Qfalse;
3317         return ST_STOP;
3318     }
3319 
3320     if (!(data->eql ? rb_eql(val1, (VALUE)val2) : (int)rb_equal(val1, (VALUE)val2))) {
3321 	data->result = Qfalse;
3322 	return ST_STOP;
3323     }
3324     return ST_CONTINUE;
3325 }
3326 
3327 static VALUE
recursive_eql(VALUE hash,VALUE dt,int recur)3328 recursive_eql(VALUE hash, VALUE dt, int recur)
3329 {
3330     struct equal_data *data;
3331 
3332     if (recur) return Qtrue;	/* Subtle! */
3333     data = (struct equal_data*)dt;
3334     data->result = Qtrue;
3335     rb_hash_foreach(hash, eql_i, dt);
3336 
3337     return data->result;
3338 }
3339 
3340 static VALUE
hash_equal(VALUE hash1,VALUE hash2,int eql)3341 hash_equal(VALUE hash1, VALUE hash2, int eql)
3342 {
3343     struct equal_data data;
3344 
3345     if (hash1 == hash2) return Qtrue;
3346     if (!RB_TYPE_P(hash2, T_HASH)) {
3347 	if (!rb_respond_to(hash2, idTo_hash)) {
3348 	    return Qfalse;
3349 	}
3350 	if (eql) {
3351 	    if (rb_eql(hash2, hash1)) {
3352 		return Qtrue;
3353 	    }
3354 	    else {
3355 		return Qfalse;
3356 	    }
3357 	}
3358 	else {
3359 	    return rb_equal(hash2, hash1);
3360 	}
3361     }
3362     if (RHASH_SIZE(hash1) != RHASH_SIZE(hash2))
3363 	return Qfalse;
3364     if (!RHASH_TABLE_EMPTY_P(hash1) && !RHASH_TABLE_EMPTY_P(hash2)) {
3365         if (RHASH_TYPE(hash1) != RHASH_TYPE(hash2)) {
3366             return Qfalse;
3367         }
3368         else {
3369             data.hash = hash2;
3370             data.eql = eql;
3371             return rb_exec_recursive_paired(recursive_eql, hash1, hash2, (VALUE)&data);
3372         }
3373     }
3374 
3375 #if 0
3376     if (!(rb_equal(RHASH_IFNONE(hash1), RHASH_IFNONE(hash2)) &&
3377 	  FL_TEST(hash1, HASH_PROC_DEFAULT) == FL_TEST(hash2, HASH_PROC_DEFAULT)))
3378 	return Qfalse;
3379 #endif
3380     return Qtrue;
3381 }
3382 
3383 /*
3384  *  call-seq:
3385  *     hsh == other_hash    -> true or false
3386  *
3387  *  Equality---Two hashes are equal if they each contain the same number
3388  *  of keys and if each key-value pair is equal to (according to
3389  *  <code>Object#==</code>) the corresponding elements in the other
3390  *  hash.
3391  *
3392  *     h1 = { "a" => 1, "c" => 2 }
3393  *     h2 = { 7 => 35, "c" => 2, "a" => 1 }
3394  *     h3 = { "a" => 1, "c" => 2, 7 => 35 }
3395  *     h4 = { "a" => 1, "d" => 2, "f" => 35 }
3396  *     h1 == h2   #=> false
3397  *     h2 == h3   #=> true
3398  *     h3 == h4   #=> false
3399  *
3400  *  The orders of each hashes are not compared.
3401  *
3402  *     h1 = { "a" => 1, "c" => 2 }
3403  *     h2 = { "c" => 2, "a" => 1 }
3404  *     h1 == h2   #=> true
3405  *
3406  */
3407 
3408 static VALUE
rb_hash_equal(VALUE hash1,VALUE hash2)3409 rb_hash_equal(VALUE hash1, VALUE hash2)
3410 {
3411     return hash_equal(hash1, hash2, FALSE);
3412 }
3413 
3414 /*
3415  *  call-seq:
3416  *     hash.eql?(other)  -> true or false
3417  *
3418  *  Returns <code>true</code> if <i>hash</i> and <i>other</i> are
3419  *  both hashes with the same content.
3420  *  The orders of each hashes are not compared.
3421  */
3422 
3423 static VALUE
rb_hash_eql(VALUE hash1,VALUE hash2)3424 rb_hash_eql(VALUE hash1, VALUE hash2)
3425 {
3426     return hash_equal(hash1, hash2, TRUE);
3427 }
3428 
3429 static int
hash_i(VALUE key,VALUE val,VALUE arg)3430 hash_i(VALUE key, VALUE val, VALUE arg)
3431 {
3432     st_index_t *hval = (st_index_t *)arg;
3433     st_index_t hdata[2];
3434 
3435     hdata[0] = rb_hash(key);
3436     hdata[1] = rb_hash(val);
3437     *hval ^= st_hash(hdata, sizeof(hdata), 0);
3438     return ST_CONTINUE;
3439 }
3440 
3441 /*
3442  *  call-seq:
3443  *     hsh.hash   -> integer
3444  *
3445  *  Compute a hash-code for this hash. Two hashes with the same content
3446  *  will have the same hash code (and will compare using <code>eql?</code>).
3447  *
3448  *  See also Object#hash.
3449  */
3450 
3451 static VALUE
rb_hash_hash(VALUE hash)3452 rb_hash_hash(VALUE hash)
3453 {
3454     st_index_t size = RHASH_SIZE(hash);
3455     st_index_t hval = rb_hash_start(size);
3456     hval = rb_hash_uint(hval, (st_index_t)rb_hash_hash);
3457     if (size) {
3458 	rb_hash_foreach(hash, hash_i, (VALUE)&hval);
3459     }
3460     hval = rb_hash_end(hval);
3461     return ST2FIX(hval);
3462 }
3463 
3464 static int
rb_hash_invert_i(VALUE key,VALUE value,VALUE hash)3465 rb_hash_invert_i(VALUE key, VALUE value, VALUE hash)
3466 {
3467     rb_hash_aset(hash, value, key);
3468     return ST_CONTINUE;
3469 }
3470 
3471 /*
3472  *  call-seq:
3473  *     hsh.invert -> new_hash
3474  *
3475  *  Returns a new hash created by using <i>hsh</i>'s values as keys, and
3476  *  the keys as values.
3477  *  If a key with the same value already exists in the <i>hsh</i>, then
3478  *  the last one defined will be used, the earlier value(s) will be discarded.
3479  *
3480  *     h = { "n" => 100, "m" => 100, "y" => 300, "d" => 200, "a" => 0 }
3481  *     h.invert   #=> {0=>"a", 100=>"m", 200=>"d", 300=>"y"}
3482  *
3483  *  If there is no key with the same value, Hash#invert is involutive.
3484  *
3485  *    h = { a: 1, b: 3, c: 4 }
3486  *    h.invert.invert == h #=> true
3487  *
3488  *  The condition, no key with the same value, can be tested by comparing
3489  *  the size of inverted hash.
3490  *
3491  *    # no key with the same value
3492  *    h = { a: 1, b: 3, c: 4 }
3493  *    h.size == h.invert.size #=> true
3494  *
3495  *    # two (or more) keys has the same value
3496  *    h = { a: 1, b: 3, c: 1 }
3497  *    h.size == h.invert.size #=> false
3498  *
3499  */
3500 
3501 static VALUE
rb_hash_invert(VALUE hash)3502 rb_hash_invert(VALUE hash)
3503 {
3504     VALUE h = rb_hash_new_with_size(RHASH_SIZE(hash));
3505 
3506     rb_hash_foreach(hash, rb_hash_invert_i, h);
3507     return h;
3508 }
3509 
3510 static int
rb_hash_update_callback(st_data_t * key,st_data_t * value,struct update_arg * arg,int existing)3511 rb_hash_update_callback(st_data_t *key, st_data_t *value, struct update_arg *arg, int existing)
3512 {
3513     if (existing) {
3514 	arg->old_value = *value;
3515 	arg->new_value = arg->arg;
3516     }
3517     else {
3518 	arg->new_key = *key;
3519 	arg->new_value = arg->arg;
3520     }
3521     *value = arg->arg;
3522     return ST_CONTINUE;
3523 }
3524 
NOINSERT_UPDATE_CALLBACK(rb_hash_update_callback)3525 NOINSERT_UPDATE_CALLBACK(rb_hash_update_callback)
3526 
3527 static int
3528 rb_hash_update_i(VALUE key, VALUE value, VALUE hash)
3529 {
3530     RHASH_UPDATE(hash, key, rb_hash_update_callback, value);
3531     return ST_CONTINUE;
3532 }
3533 
3534 static int
rb_hash_update_block_callback(st_data_t * key,st_data_t * value,struct update_arg * arg,int existing)3535 rb_hash_update_block_callback(st_data_t *key, st_data_t *value, struct update_arg *arg, int existing)
3536 {
3537     VALUE newvalue = (VALUE)arg->arg;
3538 
3539     if (existing) {
3540 	newvalue = rb_yield_values(3, (VALUE)*key, (VALUE)*value, newvalue);
3541 	arg->old_value = *value;
3542     }
3543     else {
3544 	arg->new_key = *key;
3545     }
3546     arg->new_value = newvalue;
3547     *value = newvalue;
3548     return ST_CONTINUE;
3549 }
3550 
NOINSERT_UPDATE_CALLBACK(rb_hash_update_block_callback)3551 NOINSERT_UPDATE_CALLBACK(rb_hash_update_block_callback)
3552 
3553 static int
3554 rb_hash_update_block_i(VALUE key, VALUE value, VALUE hash)
3555 {
3556     RHASH_UPDATE(hash, key, rb_hash_update_block_callback, value);
3557     return ST_CONTINUE;
3558 }
3559 
3560 /*
3561  *  call-seq:
3562  *     hsh.merge!(other_hash1, other_hash2, ...)              -> hsh
3563  *     hsh.update(other_hash1, other_hash2, ...)              -> hsh
3564  *     hsh.merge!(other_hash1, other_hash2, ...) {|key, oldval, newval| block}
3565  *                                                            -> hsh
3566  *     hsh.update(other_hash1, other_hash2, ...) {|key, oldval, newval| block}
3567  *                                                            -> hsh
3568  *
3569  *  Adds the contents of the given hashes to the receiver.
3570  *
3571  *  If no block is given, entries with duplicate keys are overwritten
3572  *  with the values from each +other_hash+ successively,
3573  *  otherwise the value for each duplicate key is determined by
3574  *  calling the block with the key, its value in the receiver and
3575  *  its value in each +other_hash+.
3576  *
3577  *     h1 = { "a" => 100, "b" => 200 }
3578  *     h1.merge!          #=> {"a"=>100, "b"=>200}
3579  *     h1                 #=> {"a"=>100, "b"=>200}
3580  *
3581  *     h1 = { "a" => 100, "b" => 200 }
3582  *     h2 = { "b" => 246, "c" => 300 }
3583  *     h1.merge!(h2)      #=> {"a"=>100, "b"=>246, "c"=>300}
3584  *     h1                 #=> {"a"=>100, "b"=>246, "c"=>300}
3585  *
3586  *     h1 = { "a" => 100, "b" => 200 }
3587  *     h2 = { "b" => 246, "c" => 300 }
3588  *     h3 = { "b" => 357, "d" => 400 }
3589  *     h1.merge!(h2, h3)
3590  *                        #=> {"a"=>100, "b"=>357, "c"=>300, "d"=>400}
3591  *     h1                 #=> {"a"=>100, "b"=>357, "c"=>300, "d"=>400}
3592  *
3593  *     h1 = { "a" => 100, "b" => 200 }
3594  *     h2 = { "b" => 246, "c" => 300 }
3595  *     h3 = { "b" => 357, "d" => 400 }
3596  *     h1.merge!(h2, h3) {|key, v1, v2| v1 }
3597  *                        #=> {"a"=>100, "b"=>200, "c"=>300, "d"=>400}
3598  *     h1                 #=> {"a"=>100, "b"=>200, "c"=>300, "d"=>400}
3599  *
3600  *  Hash#update is an alias for Hash#merge!.
3601  */
3602 
3603 static VALUE
rb_hash_update(int argc,VALUE * argv,VALUE self)3604 rb_hash_update(int argc, VALUE *argv, VALUE self)
3605 {
3606     int i;
3607     bool block_given = rb_block_given_p();
3608 
3609     rb_hash_modify(self);
3610     for (i = 0; i < argc; i++){
3611        VALUE hash = to_hash(argv[i]);
3612        if (block_given) {
3613            rb_hash_foreach(hash, rb_hash_update_block_i, self);
3614        }
3615        else {
3616            rb_hash_foreach(hash, rb_hash_update_i, self);
3617        }
3618     }
3619     return self;
3620 }
3621 
3622 struct update_func_arg {
3623     VALUE hash;
3624     VALUE value;
3625     rb_hash_update_func *func;
3626 };
3627 
3628 static int
rb_hash_update_func_callback(st_data_t * key,st_data_t * value,struct update_arg * arg,int existing)3629 rb_hash_update_func_callback(st_data_t *key, st_data_t *value, struct update_arg *arg, int existing)
3630 {
3631     struct update_func_arg *uf_arg = (struct update_func_arg *)arg->arg;
3632     VALUE newvalue = uf_arg->value;
3633 
3634     if (existing) {
3635 	newvalue = (*uf_arg->func)((VALUE)*key, (VALUE)*value, newvalue);
3636 	arg->old_value = *value;
3637     }
3638     else {
3639 	arg->new_key = *key;
3640     }
3641     arg->new_value = newvalue;
3642     *value = newvalue;
3643     return ST_CONTINUE;
3644 }
3645 
NOINSERT_UPDATE_CALLBACK(rb_hash_update_func_callback)3646 NOINSERT_UPDATE_CALLBACK(rb_hash_update_func_callback)
3647 
3648 static int
3649 rb_hash_update_func_i(VALUE key, VALUE value, VALUE arg0)
3650 {
3651     struct update_func_arg *arg = (struct update_func_arg *)arg0;
3652     VALUE hash = arg->hash;
3653 
3654     arg->value = value;
3655     RHASH_UPDATE(hash, key, rb_hash_update_func_callback, (VALUE)arg);
3656     return ST_CONTINUE;
3657 }
3658 
3659 VALUE
rb_hash_update_by(VALUE hash1,VALUE hash2,rb_hash_update_func * func)3660 rb_hash_update_by(VALUE hash1, VALUE hash2, rb_hash_update_func *func)
3661 {
3662     rb_hash_modify(hash1);
3663     hash2 = to_hash(hash2);
3664     if (func) {
3665 	struct update_func_arg arg;
3666 	arg.hash = hash1;
3667 	arg.func = func;
3668 	rb_hash_foreach(hash2, rb_hash_update_func_i, (VALUE)&arg);
3669     }
3670     else {
3671 	rb_hash_foreach(hash2, rb_hash_update_i, hash1);
3672     }
3673     return hash1;
3674 }
3675 
3676 /*
3677  *  call-seq:
3678  *     hsh.merge(other_hash1, other_hash2, ...)           -> new_hash
3679  *     hsh.merge(other_hash1, other_hash2, ...) {|key, oldval, newval| block}
3680  *                                                        -> new_hash
3681  *
3682  *  Returns a new hash that combines the contents of the receiver and
3683  *  the contents of the given hashes.
3684  *
3685  *  If no block is given, entries with duplicate keys are overwritten
3686  *  with the values from each +other_hash+ successively,
3687  *  otherwise the value for each duplicate key is determined by
3688  *  calling the block with the key, its value in the receiver and
3689  *  its value in each +other_hash+.
3690  *
3691  *  When called without any argument, returns a copy of the receiver.
3692  *
3693  *     h1 = { "a" => 100, "b" => 200 }
3694  *     h2 = { "b" => 246, "c" => 300 }
3695  *     h3 = { "b" => 357, "d" => 400 }
3696  *     h1.merge          #=> {"a"=>100, "b"=>200}
3697  *     h1.merge(h2)      #=> {"a"=>100, "b"=>246, "c"=>300}
3698  *     h1.merge(h2, h3)  #=> {"a"=>100, "b"=>357, "c"=>300, "d"=>400}
3699  *     h1.merge(h2) {|key, oldval, newval| newval - oldval}
3700  *                       #=> {"a"=>100, "b"=>46,  "c"=>300}
3701  *     h1.merge(h2, h3) {|key, oldval, newval| newval - oldval}
3702  *                       #=> {"a"=>100, "b"=>311, "c"=>300, "d"=>400}
3703  *     h1                #=> {"a"=>100, "b"=>200}
3704  *
3705  */
3706 
3707 static VALUE
rb_hash_merge(int argc,VALUE * argv,VALUE self)3708 rb_hash_merge(int argc, VALUE *argv, VALUE self)
3709 {
3710     return rb_hash_update(argc, argv, rb_hash_dup(self));
3711 }
3712 
3713 static int
assoc_cmp(VALUE a,VALUE b)3714 assoc_cmp(VALUE a, VALUE b)
3715 {
3716     return !RTEST(rb_equal(a, b));
3717 }
3718 
3719 static VALUE
lookup2_call(VALUE arg)3720 lookup2_call(VALUE arg)
3721 {
3722     VALUE *args = (VALUE *)arg;
3723     return rb_hash_lookup2(args[0], args[1], Qundef);
3724 }
3725 
3726 struct reset_hash_type_arg {
3727     VALUE hash;
3728     const struct st_hash_type *orighash;
3729 };
3730 
3731 static VALUE
reset_hash_type(VALUE arg)3732 reset_hash_type(VALUE arg)
3733 {
3734     struct reset_hash_type_arg *p = (struct reset_hash_type_arg *)arg;
3735     HASH_ASSERT(RHASH_ST_TABLE_P(p->hash));
3736     RHASH_ST_TABLE(p->hash)->type = p->orighash;
3737     return Qundef;
3738 }
3739 
3740 static int
assoc_i(VALUE key,VALUE val,VALUE arg)3741 assoc_i(VALUE key, VALUE val, VALUE arg)
3742 {
3743     VALUE *args = (VALUE *)arg;
3744 
3745     if (RTEST(rb_equal(args[0], key))) {
3746 	args[1] = rb_assoc_new(key, val);
3747 	return ST_STOP;
3748     }
3749     return ST_CONTINUE;
3750 }
3751 
3752 /*
3753  *  call-seq:
3754  *     hash.assoc(obj)   ->  an_array  or  nil
3755  *
3756  *  Searches through the hash comparing _obj_ with the key using <code>==</code>.
3757  *  Returns the key-value pair (two elements array) or +nil+
3758  *  if no match is found.  See <code>Array#assoc</code>.
3759  *
3760  *     h = {"colors"  => ["red", "blue", "green"],
3761  *          "letters" => ["a", "b", "c" ]}
3762  *     h.assoc("letters")  #=> ["letters", ["a", "b", "c"]]
3763  *     h.assoc("foo")      #=> nil
3764  */
3765 
3766 VALUE
rb_hash_assoc(VALUE hash,VALUE key)3767 rb_hash_assoc(VALUE hash, VALUE key)
3768 {
3769     st_table *table;
3770     const struct st_hash_type *orighash;
3771     VALUE args[2];
3772 
3773     if (RHASH_EMPTY_P(hash)) return Qnil;
3774 
3775     ar_force_convert_table(hash, __FILE__, __LINE__);
3776     HASH_ASSERT(RHASH_ST_TABLE_P(hash));
3777     table = RHASH_ST_TABLE(hash);
3778     orighash = table->type;
3779 
3780     if (orighash != &identhash) {
3781 	VALUE value;
3782 	struct reset_hash_type_arg ensure_arg;
3783 	struct st_hash_type assochash;
3784 
3785 	assochash.compare = assoc_cmp;
3786 	assochash.hash = orighash->hash;
3787         table->type = &assochash;
3788 	args[0] = hash;
3789 	args[1] = key;
3790 	ensure_arg.hash = hash;
3791 	ensure_arg.orighash = orighash;
3792 	value = rb_ensure(lookup2_call, (VALUE)&args, reset_hash_type, (VALUE)&ensure_arg);
3793 	if (value != Qundef) return rb_assoc_new(key, value);
3794     }
3795 
3796     args[0] = key;
3797     args[1] = Qnil;
3798     rb_hash_foreach(hash, assoc_i, (VALUE)args);
3799     return args[1];
3800 }
3801 
3802 static int
rassoc_i(VALUE key,VALUE val,VALUE arg)3803 rassoc_i(VALUE key, VALUE val, VALUE arg)
3804 {
3805     VALUE *args = (VALUE *)arg;
3806 
3807     if (RTEST(rb_equal(args[0], val))) {
3808 	args[1] = rb_assoc_new(key, val);
3809 	return ST_STOP;
3810     }
3811     return ST_CONTINUE;
3812 }
3813 
3814 /*
3815  *  call-seq:
3816  *     hash.rassoc(obj) -> an_array or nil
3817  *
3818  *  Searches through the hash comparing _obj_ with the value using <code>==</code>.
3819  *  Returns the first key-value pair (two-element array) that matches. See
3820  *  also <code>Array#rassoc</code>.
3821  *
3822  *     a = {1=> "one", 2 => "two", 3 => "three", "ii" => "two"}
3823  *     a.rassoc("two")    #=> [2, "two"]
3824  *     a.rassoc("four")   #=> nil
3825  */
3826 
3827 VALUE
rb_hash_rassoc(VALUE hash,VALUE obj)3828 rb_hash_rassoc(VALUE hash, VALUE obj)
3829 {
3830     VALUE args[2];
3831 
3832     args[0] = obj;
3833     args[1] = Qnil;
3834     rb_hash_foreach(hash, rassoc_i, (VALUE)args);
3835     return args[1];
3836 }
3837 
3838 static int
flatten_i(VALUE key,VALUE val,VALUE ary)3839 flatten_i(VALUE key, VALUE val, VALUE ary)
3840 {
3841     VALUE pair[2];
3842 
3843     pair[0] = key;
3844     pair[1] = val;
3845     rb_ary_cat(ary, pair, 2);
3846 
3847     return ST_CONTINUE;
3848 }
3849 
3850 /*
3851  *  call-seq:
3852  *     hash.flatten -> an_array
3853  *     hash.flatten(level) -> an_array
3854  *
3855  *  Returns a new array that is a one-dimensional flattening of this
3856  *  hash. That is, for every key or value that is an array, extract
3857  *  its elements into the new array.  Unlike Array#flatten, this
3858  *  method does not flatten recursively by default.  The optional
3859  *  <i>level</i> argument determines the level of recursion to flatten.
3860  *
3861  *     a =  {1=> "one", 2 => [2,"two"], 3 => "three"}
3862  *     a.flatten    # => [1, "one", 2, [2, "two"], 3, "three"]
3863  *     a.flatten(2) # => [1, "one", 2, 2, "two", 3, "three"]
3864  */
3865 
3866 static VALUE
rb_hash_flatten(int argc,VALUE * argv,VALUE hash)3867 rb_hash_flatten(int argc, VALUE *argv, VALUE hash)
3868 {
3869     VALUE ary;
3870 
3871     rb_check_arity(argc, 0, 1);
3872 
3873     if (argc) {
3874 	int level = NUM2INT(argv[0]);
3875 
3876 	if (level == 0) return rb_hash_to_a(hash);
3877 
3878 	ary = rb_ary_new_capa(RHASH_SIZE(hash) * 2);
3879 	rb_hash_foreach(hash, flatten_i, ary);
3880 	level--;
3881 
3882 	if (level > 0) {
3883 	    VALUE ary_flatten_level = INT2FIX(level);
3884 	    rb_funcallv(ary, id_flatten_bang, 1, &ary_flatten_level);
3885 	}
3886 	else if (level < 0) {
3887 	    /* flatten recursively */
3888 	    rb_funcallv(ary, id_flatten_bang, 0, 0);
3889 	}
3890     }
3891     else {
3892 	ary = rb_ary_new_capa(RHASH_SIZE(hash) * 2);
3893 	rb_hash_foreach(hash, flatten_i, ary);
3894     }
3895 
3896     return ary;
3897 }
3898 
3899 static int
delete_if_nil(VALUE key,VALUE value,VALUE hash)3900 delete_if_nil(VALUE key, VALUE value, VALUE hash)
3901 {
3902     if (NIL_P(value)) {
3903 	return ST_DELETE;
3904     }
3905     return ST_CONTINUE;
3906 }
3907 
3908 static int
set_if_not_nil(VALUE key,VALUE value,VALUE hash)3909 set_if_not_nil(VALUE key, VALUE value, VALUE hash)
3910 {
3911     if (!NIL_P(value)) {
3912 	rb_hash_aset(hash, key, value);
3913     }
3914     return ST_CONTINUE;
3915 }
3916 
3917 /*
3918  *  call-seq:
3919  *     hsh.compact -> new_hash
3920  *
3921  *  Returns a new hash with the nil values/key pairs removed
3922  *
3923  *     h = { a: 1, b: false, c: nil }
3924  *     h.compact     #=> { a: 1, b: false }
3925  *     h             #=> { a: 1, b: false, c: nil }
3926  *
3927  */
3928 
3929 static VALUE
rb_hash_compact(VALUE hash)3930 rb_hash_compact(VALUE hash)
3931 {
3932     VALUE result = rb_hash_new();
3933     if (!RHASH_EMPTY_P(hash)) {
3934 	rb_hash_foreach(hash, set_if_not_nil, result);
3935     }
3936     return result;
3937 }
3938 
3939 /*
3940  *  call-seq:
3941  *     hsh.compact! -> hsh or nil
3942  *
3943  *  Removes all nil values from the hash.
3944  *  Returns nil if no changes were made, otherwise returns the hash.
3945  *
3946  *     h = { a: 1, b: false, c: nil }
3947  *     h.compact!     #=> { a: 1, b: false }
3948  *
3949  */
3950 
3951 static VALUE
rb_hash_compact_bang(VALUE hash)3952 rb_hash_compact_bang(VALUE hash)
3953 {
3954     st_index_t n;
3955     rb_hash_modify_check(hash);
3956     n = RHASH_SIZE(hash);
3957     if (n) {
3958 	rb_hash_foreach(hash, delete_if_nil, hash);
3959         if (n != RHASH_SIZE(hash))
3960 	    return hash;
3961     }
3962     return Qnil;
3963 }
3964 
3965 /*
3966  *  call-seq:
3967  *     hsh.compare_by_identity -> hsh
3968  *
3969  *  Makes <i>hsh</i> compare its keys by their identity, i.e. it
3970  *  will consider exact same objects as same keys.
3971  *
3972  *     h1 = { "a" => 100, "b" => 200, :c => "c" }
3973  *     h1["a"]        #=> 100
3974  *     h1.compare_by_identity
3975  *     h1.compare_by_identity? #=> true
3976  *     h1["a".dup]    #=> nil  # different objects.
3977  *     h1[:c]         #=> "c"  # same symbols are all same.
3978  *
3979  */
3980 
3981 static VALUE
rb_hash_compare_by_id(VALUE hash)3982 rb_hash_compare_by_id(VALUE hash)
3983 {
3984     VALUE tmp;
3985     st_table *identtable;
3986 
3987     if (rb_hash_compare_by_id_p(hash)) return hash;
3988 
3989     rb_hash_modify_check(hash);
3990     ar_force_convert_table(hash, __FILE__, __LINE__);
3991     HASH_ASSERT(RHASH_ST_TABLE_P(hash));
3992 
3993     tmp = hash_alloc(0);
3994     identtable = rb_init_identtable_with_size(RHASH_SIZE(hash));
3995     RHASH_ST_TABLE_SET(tmp, identtable);
3996     rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp);
3997     st_free_table(RHASH_ST_TABLE(hash));
3998     RHASH_ST_TABLE_SET(hash, identtable);
3999     RHASH_ST_CLEAR(tmp);
4000     rb_gc_force_recycle(tmp);
4001 
4002     return hash;
4003 }
4004 
4005 /*
4006  *  call-seq:
4007  *     hsh.compare_by_identity? -> true or false
4008  *
4009  *  Returns <code>true</code> if <i>hsh</i> will compare its keys by
4010  *  their identity.  Also see <code>Hash#compare_by_identity</code>.
4011  *
4012  */
4013 
4014 MJIT_FUNC_EXPORTED VALUE
rb_hash_compare_by_id_p(VALUE hash)4015 rb_hash_compare_by_id_p(VALUE hash)
4016 {
4017     if (RHASH_ST_TABLE_P(hash) && RHASH_ST_TABLE(hash)->type == &identhash) {
4018 	return Qtrue;
4019     }
4020     else {
4021         return Qfalse;
4022     }
4023 }
4024 
4025 VALUE
rb_ident_hash_new(void)4026 rb_ident_hash_new(void)
4027 {
4028     VALUE hash = rb_hash_new();
4029     RHASH_ST_TABLE_SET(hash, st_init_table(&identhash));
4030     return hash;
4031 }
4032 
4033 st_table *
rb_init_identtable(void)4034 rb_init_identtable(void)
4035 {
4036     return st_init_table(&identhash);
4037 }
4038 
4039 st_table *
rb_init_identtable_with_size(st_index_t size)4040 rb_init_identtable_with_size(st_index_t size)
4041 {
4042     return st_init_table_with_size(&identhash, size);
4043 }
4044 
4045 static int
any_p_i(VALUE key,VALUE value,VALUE arg)4046 any_p_i(VALUE key, VALUE value, VALUE arg)
4047 {
4048     VALUE ret = rb_yield(rb_assoc_new(key, value));
4049     if (RTEST(ret)) {
4050 	*(VALUE *)arg = Qtrue;
4051 	return ST_STOP;
4052     }
4053     return ST_CONTINUE;
4054 }
4055 
4056 static int
any_p_i_fast(VALUE key,VALUE value,VALUE arg)4057 any_p_i_fast(VALUE key, VALUE value, VALUE arg)
4058 {
4059     VALUE ret = rb_yield_values(2, key, value);
4060     if (RTEST(ret)) {
4061 	*(VALUE *)arg = Qtrue;
4062 	return ST_STOP;
4063     }
4064     return ST_CONTINUE;
4065 }
4066 
4067 static int
any_p_i_pattern(VALUE key,VALUE value,VALUE arg)4068 any_p_i_pattern(VALUE key, VALUE value, VALUE arg)
4069 {
4070     VALUE ret = rb_funcall(((VALUE *)arg)[1], idEqq, 1, rb_assoc_new(key, value));
4071     if (RTEST(ret)) {
4072 	*(VALUE *)arg = Qtrue;
4073 	return ST_STOP;
4074     }
4075     return ST_CONTINUE;
4076 }
4077 
4078 /*
4079  *  call-seq:
4080  *     hsh.any? [{ |(key, value)| block }]   -> true or false
4081  *     hsh.any?(pattern)                     -> true or false
4082  *
4083  *  See also Enumerable#any?
4084  */
4085 
4086 static VALUE
rb_hash_any_p(int argc,VALUE * argv,VALUE hash)4087 rb_hash_any_p(int argc, VALUE *argv, VALUE hash)
4088 {
4089     VALUE args[2];
4090     args[0] = Qfalse;
4091 
4092     rb_check_arity(argc, 0, 1);
4093     if (RHASH_EMPTY_P(hash)) return Qfalse;
4094     if (argc) {
4095         if (rb_block_given_p()) {
4096             rb_warn("given block not used");
4097         }
4098 	args[1] = argv[0];
4099 
4100 	rb_hash_foreach(hash, any_p_i_pattern, (VALUE)args);
4101     }
4102     else {
4103 	if (!rb_block_given_p()) {
4104 	    /* yields pairs, never false */
4105 	    return Qtrue;
4106 	}
4107 	if (rb_block_arity() > 1)
4108 	    rb_hash_foreach(hash, any_p_i_fast, (VALUE)args);
4109 	else
4110 	    rb_hash_foreach(hash, any_p_i, (VALUE)args);
4111     }
4112     return args[0];
4113 }
4114 
4115 /*
4116  * call-seq:
4117  *   hsh.dig(key, ...)                 -> object
4118  *
4119  * Extracts the nested value specified by the sequence of <i>key</i>
4120  * objects by calling +dig+ at each step, returning +nil+ if any
4121  * intermediate step is +nil+.
4122  *
4123  *   h = { foo: {bar: {baz: 1}}}
4124  *
4125  *   h.dig(:foo, :bar, :baz)     #=> 1
4126  *   h.dig(:foo, :zot, :xyz)     #=> nil
4127  *
4128  *   g = { foo: [10, 11, 12] }
4129  *   g.dig(:foo, 1)              #=> 11
4130  *   g.dig(:foo, 1, 0)           #=> TypeError: Integer does not have #dig method
4131  *   g.dig(:foo, :bar)           #=> TypeError: no implicit conversion of Symbol into Integer
4132  */
4133 
4134 static VALUE
rb_hash_dig(int argc,VALUE * argv,VALUE self)4135 rb_hash_dig(int argc, VALUE *argv, VALUE self)
4136 {
4137     rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS);
4138     self = rb_hash_aref(self, *argv);
4139     if (!--argc) return self;
4140     ++argv;
4141     return rb_obj_dig(argc, argv, self, Qnil);
4142 }
4143 
4144 static int
hash_le_i(VALUE key,VALUE value,VALUE arg)4145 hash_le_i(VALUE key, VALUE value, VALUE arg)
4146 {
4147     VALUE *args = (VALUE *)arg;
4148     VALUE v = rb_hash_lookup2(args[0], key, Qundef);
4149     if (v != Qundef && rb_equal(value, v)) return ST_CONTINUE;
4150     args[1] = Qfalse;
4151     return ST_STOP;
4152 }
4153 
4154 static VALUE
hash_le(VALUE hash1,VALUE hash2)4155 hash_le(VALUE hash1, VALUE hash2)
4156 {
4157     VALUE args[2];
4158     args[0] = hash2;
4159     args[1] = Qtrue;
4160     rb_hash_foreach(hash1, hash_le_i, (VALUE)args);
4161     return args[1];
4162 }
4163 
4164 /*
4165  * call-seq:
4166  *   hash <= other -> true or false
4167  *
4168  * Returns <code>true</code> if <i>hash</i> is subset of
4169  * <i>other</i> or equals to <i>other</i>.
4170  *
4171  *    h1 = {a:1, b:2}
4172  *    h2 = {a:1, b:2, c:3}
4173  *    h1 <= h2   #=> true
4174  *    h2 <= h1   #=> false
4175  *    h1 <= h1   #=> true
4176  */
4177 static VALUE
rb_hash_le(VALUE hash,VALUE other)4178 rb_hash_le(VALUE hash, VALUE other)
4179 {
4180     other = to_hash(other);
4181     if (RHASH_SIZE(hash) > RHASH_SIZE(other)) return Qfalse;
4182     return hash_le(hash, other);
4183 }
4184 
4185 /*
4186  * call-seq:
4187  *   hash < other -> true or false
4188  *
4189  * Returns <code>true</code> if <i>hash</i> is subset of
4190  * <i>other</i>.
4191  *
4192  *    h1 = {a:1, b:2}
4193  *    h2 = {a:1, b:2, c:3}
4194  *    h1 < h2    #=> true
4195  *    h2 < h1    #=> false
4196  *    h1 < h1    #=> false
4197  */
4198 static VALUE
rb_hash_lt(VALUE hash,VALUE other)4199 rb_hash_lt(VALUE hash, VALUE other)
4200 {
4201     other = to_hash(other);
4202     if (RHASH_SIZE(hash) >= RHASH_SIZE(other)) return Qfalse;
4203     return hash_le(hash, other);
4204 }
4205 
4206 /*
4207  * call-seq:
4208  *   hash >= other -> true or false
4209  *
4210  * Returns <code>true</code> if <i>other</i> is subset of
4211  * <i>hash</i> or equals to <i>hash</i>.
4212  *
4213  *    h1 = {a:1, b:2}
4214  *    h2 = {a:1, b:2, c:3}
4215  *    h1 >= h2   #=> false
4216  *    h2 >= h1   #=> true
4217  *    h1 >= h1   #=> true
4218  */
4219 static VALUE
rb_hash_ge(VALUE hash,VALUE other)4220 rb_hash_ge(VALUE hash, VALUE other)
4221 {
4222     other = to_hash(other);
4223     if (RHASH_SIZE(hash) < RHASH_SIZE(other)) return Qfalse;
4224     return hash_le(other, hash);
4225 }
4226 
4227 /*
4228  * call-seq:
4229  *   hash > other -> true or false
4230  *
4231  * Returns <code>true</code> if <i>other</i> is subset of
4232  * <i>hash</i>.
4233  *
4234  *    h1 = {a:1, b:2}
4235  *    h2 = {a:1, b:2, c:3}
4236  *    h1 > h2    #=> false
4237  *    h2 > h1    #=> true
4238  *    h1 > h1    #=> false
4239  */
4240 static VALUE
rb_hash_gt(VALUE hash,VALUE other)4241 rb_hash_gt(VALUE hash, VALUE other)
4242 {
4243     other = to_hash(other);
4244     if (RHASH_SIZE(hash) <= RHASH_SIZE(other)) return Qfalse;
4245     return hash_le(other, hash);
4246 }
4247 
4248 static VALUE
hash_proc_call(VALUE key,VALUE hash,int argc,const VALUE * argv,VALUE passed_proc)4249 hash_proc_call(VALUE key, VALUE hash, int argc, const VALUE *argv, VALUE passed_proc)
4250 {
4251     rb_check_arity(argc, 1, 1);
4252     return rb_hash_aref(hash, *argv);
4253 }
4254 
4255 /*
4256  * call-seq:
4257  *   hash.to_proc -> proc
4258  *
4259  * Returns a Proc which maps keys to values.
4260  *
4261  *   h = {a:1, b:2}
4262  *   hp = h.to_proc
4263  *   hp.call(:a)          #=> 1
4264  *   hp.call(:b)          #=> 2
4265  *   hp.call(:c)          #=> nil
4266  *   [:a, :b, :c].map(&h) #=> [1, 2, nil]
4267  */
4268 static VALUE
rb_hash_to_proc(VALUE hash)4269 rb_hash_to_proc(VALUE hash)
4270 {
4271     return rb_func_proc_new(hash_proc_call, hash);
4272 }
4273 
4274 static int
add_new_i(st_data_t * key,st_data_t * val,st_data_t arg,int existing)4275 add_new_i(st_data_t *key, st_data_t *val, st_data_t arg, int existing)
4276 {
4277     VALUE *args = (VALUE *)arg;
4278     if (existing) return ST_STOP;
4279     RB_OBJ_WRITTEN(args[0], Qundef, (VALUE)*key);
4280     RB_OBJ_WRITE(args[0], (VALUE *)val, args[1]);
4281     return ST_CONTINUE;
4282 }
4283 
4284 /*
4285  * add +key+ to +val+ pair if +hash+ does not contain +key+.
4286  * returns non-zero if +key+ was contained.
4287  */
4288 int
rb_hash_add_new_element(VALUE hash,VALUE key,VALUE val)4289 rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val)
4290 {
4291     st_table *tbl;
4292     int ret = 0;
4293     VALUE args[2];
4294     args[0] = hash;
4295     args[1] = val;
4296 
4297     if (RHASH_AR_TABLE_P(hash)) {
4298         hash_ar_table(hash);
4299 
4300         ret = ar_update(hash, (st_data_t)key, add_new_i, (st_data_t)args);
4301         if (ret != -1) {
4302             return ret;
4303         }
4304         ar_try_convert_table(hash);
4305     }
4306     tbl = RHASH_TBL_RAW(hash);
4307     return st_update(tbl, (st_data_t)key, add_new_i, (st_data_t)args);
4308 
4309 }
4310 
4311 static st_data_t
key_stringify(VALUE key)4312 key_stringify(VALUE key)
4313 {
4314     return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
4315         rb_hash_key_str(key) : key;
4316 }
4317 
4318 static void
ar_bulk_insert(VALUE hash,long argc,const VALUE * argv)4319 ar_bulk_insert(VALUE hash, long argc, const VALUE *argv)
4320 {
4321     long i;
4322     for (i = 0; i < argc; ) {
4323         st_data_t k = key_stringify(argv[i++]);
4324         st_data_t v = argv[i++];
4325         ar_insert(hash, k, v);
4326         RB_OBJ_WRITTEN(hash, Qundef, k);
4327         RB_OBJ_WRITTEN(hash, Qundef, v);
4328     }
4329 }
4330 
4331 MJIT_FUNC_EXPORTED void
rb_hash_bulk_insert(long argc,const VALUE * argv,VALUE hash)4332 rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash)
4333 {
4334     HASH_ASSERT(argc % 2 == 0);
4335     if (argc > 0) {
4336         st_index_t size = argc / 2;
4337 
4338         if (RHASH_TABLE_NULL_P(hash)) {
4339             if (size <= RHASH_AR_TABLE_MAX_SIZE) {
4340                 hash_ar_table(hash);
4341             }
4342             else {
4343                 RHASH_TBL_RAW(hash);
4344             }
4345         }
4346 
4347         if (RHASH_AR_TABLE_P(hash) &&
4348             (RHASH_AR_TABLE_SIZE(hash) + size <= RHASH_AR_TABLE_MAX_SIZE)) {
4349             ar_bulk_insert(hash, argc, argv);
4350         }
4351         else {
4352             rb_hash_bulk_insert_into_st_table(argc, argv, hash);
4353         }
4354     }
4355 }
4356 
4357 static int path_tainted = -1;
4358 
4359 static char **origenviron;
4360 #ifdef _WIN32
4361 #define GET_ENVIRON(e) ((e) = rb_w32_get_environ())
4362 #define FREE_ENVIRON(e) rb_w32_free_environ(e)
4363 static char **my_environ;
4364 #undef environ
4365 #define environ my_environ
4366 #undef getenv
4367 static char *(*w32_getenv)(const char*);
4368 static char *
w32_getenv_unknown(const char * name)4369 w32_getenv_unknown(const char *name)
4370 {
4371     char *(*func)(const char*);
4372     if (rb_locale_encindex() == rb_ascii8bit_encindex()) {
4373 	func = rb_w32_getenv;
4374     }
4375     else {
4376 	func = rb_w32_ugetenv;
4377     }
4378     /* atomic assignment in flat memory model */
4379     return (w32_getenv = func)(name);
4380 }
4381 static char *(*w32_getenv)(const char*) = w32_getenv_unknown;
4382 #define getenv(n) w32_getenv(n)
4383 #elif defined(__APPLE__)
4384 #undef environ
4385 #define environ (*_NSGetEnviron())
4386 #define GET_ENVIRON(e) (e)
4387 #define FREE_ENVIRON(e)
4388 #else
4389 extern char **environ;
4390 #define GET_ENVIRON(e) (e)
4391 #define FREE_ENVIRON(e)
4392 #endif
4393 #ifdef ENV_IGNORECASE
4394 #define ENVMATCH(s1, s2) (STRCASECMP((s1), (s2)) == 0)
4395 #define ENVNMATCH(s1, s2, n) (STRNCASECMP((s1), (s2), (n)) == 0)
4396 #else
4397 #define ENVMATCH(n1, n2) (strcmp((n1), (n2)) == 0)
4398 #define ENVNMATCH(s1, s2, n) (memcmp((s1), (s2), (n)) == 0)
4399 #endif
4400 
4401 static VALUE
env_enc_str_new(const char * ptr,long len,rb_encoding * enc)4402 env_enc_str_new(const char *ptr, long len, rb_encoding *enc)
4403 {
4404 #ifdef _WIN32
4405     rb_encoding *internal = rb_default_internal_encoding();
4406     const int ecflags = ECONV_INVALID_REPLACE | ECONV_UNDEF_REPLACE;
4407     rb_encoding *utf8 = rb_utf8_encoding();
4408     VALUE str = rb_enc_str_new(NULL, 0, (internal ? internal : enc));
4409     if (NIL_P(rb_str_cat_conv_enc_opts(str, 0, ptr, len, utf8, ecflags, Qnil))) {
4410         rb_str_initialize(str, ptr, len, NULL);
4411     }
4412 #else
4413     VALUE str = rb_external_str_new_with_enc(ptr, len, enc);
4414 #endif
4415 
4416     OBJ_TAINT(str);
4417     rb_obj_freeze(str);
4418     return str;
4419 }
4420 
4421 static VALUE
env_enc_str_new_cstr(const char * ptr,rb_encoding * enc)4422 env_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
4423 {
4424     return env_enc_str_new(ptr, strlen(ptr), enc);
4425 }
4426 
4427 static VALUE
env_str_new(const char * ptr,long len)4428 env_str_new(const char *ptr, long len)
4429 {
4430     return env_enc_str_new(ptr, len, rb_locale_encoding());
4431 }
4432 
4433 static VALUE
env_str_new2(const char * ptr)4434 env_str_new2(const char *ptr)
4435 {
4436     if (!ptr) return Qnil;
4437     return env_str_new(ptr, strlen(ptr));
4438 }
4439 
4440 static int env_path_tainted(const char *);
4441 
4442 static const char TZ_ENV[] = "TZ";
4443 extern bool ruby_tz_uptodate_p;
4444 
4445 static rb_encoding *
env_encoding_for(const char * name,const char * ptr)4446 env_encoding_for(const char *name, const char *ptr)
4447 {
4448     if (ENVMATCH(name, PATH_ENV) && !env_path_tainted(ptr)) {
4449 	return rb_filesystem_encoding();
4450     }
4451     else {
4452 	return rb_locale_encoding();
4453     }
4454 }
4455 
4456 static VALUE
env_name_new(const char * name,const char * ptr)4457 env_name_new(const char *name, const char *ptr)
4458 {
4459     return env_enc_str_new_cstr(ptr, env_encoding_for(name, ptr));
4460 }
4461 
4462 static void *
get_env_cstr(volatile VALUE * pstr,const char * name)4463 get_env_cstr(
4464 #ifdef _WIN32
4465     volatile VALUE *pstr,
4466 #else
4467     VALUE str,
4468 #endif
4469     const char *name)
4470 {
4471 #ifdef _WIN32
4472     VALUE str = *pstr;
4473 #endif
4474     char *var;
4475     rb_encoding *enc = rb_enc_get(str);
4476     if (!rb_enc_asciicompat(enc)) {
4477 	rb_raise(rb_eArgError, "bad environment variable %s: ASCII incompatible encoding: %s",
4478 		 name, rb_enc_name(enc));
4479     }
4480 #ifdef _WIN32
4481     if (!rb_enc_str_asciionly_p(str)) {
4482 	*pstr = str = rb_str_conv_enc(str, NULL, rb_utf8_encoding());
4483     }
4484 #endif
4485     var = RSTRING_PTR(str);
4486     if (memchr(var, '\0', RSTRING_LEN(str))) {
4487 	rb_raise(rb_eArgError, "bad environment variable %s: contains null byte", name);
4488     }
4489     return rb_str_fill_terminator(str, 1); /* ASCII compatible */
4490 }
4491 
4492 #ifdef _WIN32
4493 #define get_env_ptr(var, val) \
4494     (var = get_env_cstr(&(val), #var))
4495 #else
4496 #define get_env_ptr(var, val) \
4497     (var = get_env_cstr(val, #var))
4498 #endif
4499 
4500 static inline const char *
env_name(volatile VALUE * s)4501 env_name(volatile VALUE *s)
4502 {
4503     const char *name;
4504     SafeStringValue(*s);
4505     get_env_ptr(name, *s);
4506     return name;
4507 }
4508 
4509 #define env_name(s) env_name(&(s))
4510 
4511 static VALUE
env_delete(VALUE obj,VALUE name)4512 env_delete(VALUE obj, VALUE name)
4513 {
4514     const char *nam, *val;
4515 
4516     nam = env_name(name);
4517     val = getenv(nam);
4518     if (val) {
4519 	VALUE value = env_str_new2(val);
4520 
4521 	ruby_setenv(nam, 0);
4522 	if (ENVMATCH(nam, PATH_ENV)) {
4523 	    RB_GC_GUARD(name);
4524 	    path_tainted = 0;
4525 	}
4526 	else if (ENVMATCH(nam, TZ_ENV)) {
4527 	    ruby_tz_uptodate_p = FALSE;
4528 	}
4529 	return value;
4530     }
4531     return Qnil;
4532 }
4533 
4534 /*
4535  * call-seq:
4536  *   ENV.delete(name)                  -> value
4537  *   ENV.delete(name) { |name| block } -> value
4538  *
4539  * Deletes the environment variable with +name+ and returns the value of the
4540  * variable.  If a block is given it will be called when the named environment
4541  * does not exist.
4542  */
4543 static VALUE
env_delete_m(VALUE obj,VALUE name)4544 env_delete_m(VALUE obj, VALUE name)
4545 {
4546     VALUE val;
4547 
4548     val = env_delete(obj, name);
4549     if (NIL_P(val) && rb_block_given_p()) rb_yield(name);
4550     return val;
4551 }
4552 
4553 /*
4554  * call-seq:
4555  *   ENV[name] -> value
4556  *
4557  * Retrieves the +value+ for environment variable +name+ as a String.  Returns
4558  * +nil+ if the named variable does not exist.
4559  */
4560 static VALUE
rb_f_getenv(VALUE obj,VALUE name)4561 rb_f_getenv(VALUE obj, VALUE name)
4562 {
4563     const char *nam, *env;
4564 
4565     nam = env_name(name);
4566     env = getenv(nam);
4567     if (env) {
4568 	return env_name_new(nam, env);
4569     }
4570     return Qnil;
4571 }
4572 
4573 /*
4574  * :yield: missing_name
4575  * call-seq:
4576  *   ENV.fetch(name)                          -> value
4577  *   ENV.fetch(name, default)                 -> value
4578  *   ENV.fetch(name) { |missing_name| block } -> value
4579  *
4580  * Retrieves the environment variable +name+.
4581  *
4582  * If the given name does not exist and neither +default+ nor a block a
4583  * provided an KeyError is raised.  If a block is given it is called with
4584  * the missing name to provide a value.  If a default value is given it will
4585  * be returned when no block is given.
4586  */
4587 static VALUE
env_fetch(int argc,VALUE * argv)4588 env_fetch(int argc, VALUE *argv)
4589 {
4590     VALUE key;
4591     long block_given;
4592     const char *nam, *env;
4593 
4594     rb_check_arity(argc, 1, 2);
4595     key = argv[0];
4596     block_given = rb_block_given_p();
4597     if (block_given && argc == 2) {
4598 	rb_warn("block supersedes default value argument");
4599     }
4600     nam = env_name(key);
4601     env = getenv(nam);
4602     if (!env) {
4603 	if (block_given) return rb_yield(key);
4604 	if (argc == 1) {
4605 	    rb_key_err_raise(rb_sprintf("key not found: \"%"PRIsVALUE"\"", key), envtbl, key);
4606 	}
4607 	return argv[1];
4608     }
4609     return env_name_new(nam, env);
4610 }
4611 
4612 static void
path_tainted_p(const char * path)4613 path_tainted_p(const char *path)
4614 {
4615     path_tainted = rb_path_check(path)?0:1;
4616 }
4617 
4618 static int
env_path_tainted(const char * path)4619 env_path_tainted(const char *path)
4620 {
4621     if (path_tainted < 0) {
4622 	path_tainted_p(path);
4623     }
4624     return path_tainted;
4625 }
4626 
4627 int
rb_env_path_tainted(void)4628 rb_env_path_tainted(void)
4629 {
4630     if (path_tainted < 0) {
4631 	path_tainted_p(getenv(PATH_ENV));
4632     }
4633     return path_tainted;
4634 }
4635 
4636 #if defined(_WIN32) || (defined(HAVE_SETENV) && defined(HAVE_UNSETENV))
4637 #elif defined __sun
4638 static int
in_origenv(const char * str)4639 in_origenv(const char *str)
4640 {
4641     char **env;
4642     for (env = origenviron; *env; ++env) {
4643 	if (*env == str) return 1;
4644     }
4645     return 0;
4646 }
4647 #else
4648 static int
envix(const char * nam)4649 envix(const char *nam)
4650 {
4651     register int i, len = strlen(nam);
4652     char **env;
4653 
4654     env = GET_ENVIRON(environ);
4655     for (i = 0; env[i]; i++) {
4656 	if (ENVNMATCH(env[i],nam,len) && env[i][len] == '=')
4657 	    break;			/* memcmp must come first to avoid */
4658     }					/* potential SEGV's */
4659     FREE_ENVIRON(environ);
4660     return i;
4661 }
4662 #endif
4663 
4664 #if defined(_WIN32)
4665 static size_t
getenvsize(const WCHAR * p)4666 getenvsize(const WCHAR* p)
4667 {
4668     const WCHAR* porg = p;
4669     while (*p++) p += lstrlenW(p) + 1;
4670     return p - porg + 1;
4671 }
4672 
4673 static size_t
getenvblocksize(void)4674 getenvblocksize(void)
4675 {
4676 #ifdef _MAX_ENV
4677     return _MAX_ENV;
4678 #else
4679     return 32767;
4680 #endif
4681 }
4682 
4683 static int
check_envsize(size_t n)4684 check_envsize(size_t n)
4685 {
4686     if (_WIN32_WINNT < 0x0600 && rb_w32_osver() < 6) {
4687 	/* https://msdn.microsoft.com/en-us/library/windows/desktop/ms682653(v=vs.85).aspx */
4688 	/* Windows Server 2003 and Windows XP: The maximum size of the
4689 	 * environment block for the process is 32,767 characters. */
4690 	WCHAR* p = GetEnvironmentStringsW();
4691 	if (!p) return -1; /* never happen */
4692 	n += getenvsize(p);
4693 	FreeEnvironmentStringsW(p);
4694 	if (n >= getenvblocksize()) {
4695 	    return -1;
4696 	}
4697     }
4698     return 0;
4699 }
4700 #endif
4701 
4702 #if defined(_WIN32) || \
4703   (defined(__sun) && !(defined(HAVE_SETENV) && defined(HAVE_UNSETENV)))
4704 
4705 NORETURN(static void invalid_envname(const char *name));
4706 
4707 static void
invalid_envname(const char * name)4708 invalid_envname(const char *name)
4709 {
4710     rb_syserr_fail_str(EINVAL, rb_sprintf("ruby_setenv(%s)", name));
4711 }
4712 
4713 static const char *
check_envname(const char * name)4714 check_envname(const char *name)
4715 {
4716     if (strchr(name, '=')) {
4717 	invalid_envname(name);
4718     }
4719     return name;
4720 }
4721 #endif
4722 
4723 void
ruby_setenv(const char * name,const char * value)4724 ruby_setenv(const char *name, const char *value)
4725 {
4726 #if defined(_WIN32)
4727 # if defined(MINGW_HAS_SECURE_API) || RUBY_MSVCRT_VERSION >= 80
4728 #   define HAVE__WPUTENV_S 1
4729 # endif
4730     VALUE buf;
4731     WCHAR *wname;
4732     WCHAR *wvalue = 0;
4733     int failed = 0;
4734     int len;
4735     check_envname(name);
4736     len = MultiByteToWideChar(CP_UTF8, 0, name, -1, NULL, 0);
4737     if (value) {
4738 	int len2;
4739 	len2 = MultiByteToWideChar(CP_UTF8, 0, value, -1, NULL, 0);
4740 	if (check_envsize((size_t)len + len2)) { /* len and len2 include '\0' */
4741 	    goto fail;  /* 2 for '=' & '\0' */
4742 	}
4743 	wname = ALLOCV_N(WCHAR, buf, len + len2);
4744 	wvalue = wname + len;
4745 	MultiByteToWideChar(CP_UTF8, 0, name, -1, wname, len);
4746 	MultiByteToWideChar(CP_UTF8, 0, value, -1, wvalue, len2);
4747 #ifndef HAVE__WPUTENV_S
4748 	wname[len-1] = L'=';
4749 #endif
4750     }
4751     else {
4752 	wname = ALLOCV_N(WCHAR, buf, len + 1);
4753 	MultiByteToWideChar(CP_UTF8, 0, name, -1, wname, len);
4754 	wvalue = wname + len;
4755 	*wvalue = L'\0';
4756 #ifndef HAVE__WPUTENV_S
4757 	wname[len-1] = L'=';
4758 #endif
4759     }
4760 #ifndef HAVE__WPUTENV_S
4761     failed = _wputenv(wname);
4762 #else
4763     failed = _wputenv_s(wname, wvalue);
4764 #endif
4765     ALLOCV_END(buf);
4766     /* even if putenv() failed, clean up and try to delete the
4767      * variable from the system area. */
4768     if (!value || !*value) {
4769 	/* putenv() doesn't handle empty value */
4770 	if (!SetEnvironmentVariable(name, value) &&
4771 	    GetLastError() != ERROR_ENVVAR_NOT_FOUND) goto fail;
4772     }
4773     if (failed) {
4774       fail:
4775 	invalid_envname(name);
4776     }
4777 #elif defined(HAVE_SETENV) && defined(HAVE_UNSETENV)
4778     if (value) {
4779 	if (setenv(name, value, 1))
4780 	    rb_sys_fail_str(rb_sprintf("setenv(%s)", name));
4781     }
4782     else {
4783 #ifdef VOID_UNSETENV
4784 	unsetenv(name);
4785 #else
4786 	if (unsetenv(name))
4787 	    rb_sys_fail_str(rb_sprintf("unsetenv(%s)", name));
4788 #endif
4789     }
4790 #elif defined __sun
4791     /* Solaris 9 (or earlier) does not have setenv(3C) and unsetenv(3C). */
4792     /* The below code was tested on Solaris 10 by:
4793          % ./configure ac_cv_func_setenv=no ac_cv_func_unsetenv=no
4794     */
4795     size_t len, mem_size;
4796     char **env_ptr, *str, *mem_ptr;
4797 
4798     check_envname(name);
4799     len = strlen(name);
4800     if (value) {
4801 	mem_size = len + strlen(value) + 2;
4802 	mem_ptr = malloc(mem_size);
4803 	if (mem_ptr == NULL)
4804 	    rb_sys_fail_str(rb_sprintf("malloc("PRIuSIZE")", mem_size));
4805 	snprintf(mem_ptr, mem_size, "%s=%s", name, value);
4806     }
4807     for (env_ptr = GET_ENVIRON(environ); (str = *env_ptr) != 0; ++env_ptr) {
4808 	if (!strncmp(str, name, len) && str[len] == '=') {
4809 	    if (!in_origenv(str)) free(str);
4810 	    while ((env_ptr[0] = env_ptr[1]) != 0) env_ptr++;
4811 	    break;
4812 	}
4813     }
4814     if (value) {
4815 	if (putenv(mem_ptr)) {
4816 	    free(mem_ptr);
4817 	    rb_sys_fail_str(rb_sprintf("putenv(%s)", name));
4818 	}
4819     }
4820 #else  /* WIN32 */
4821     size_t len;
4822     int i;
4823 
4824     i=envix(name);		        /* where does it go? */
4825 
4826     if (environ == origenviron) {	/* need we copy environment? */
4827 	int j;
4828 	int max;
4829 	char **tmpenv;
4830 
4831 	for (max = i; environ[max]; max++) ;
4832 	tmpenv = ALLOC_N(char*, max+2);
4833 	for (j=0; j<max; j++)		/* copy environment */
4834 	    tmpenv[j] = ruby_strdup(environ[j]);
4835 	tmpenv[max] = 0;
4836 	environ = tmpenv;		/* tell exec where it is now */
4837     }
4838     if (environ[i]) {
4839 	char **envp = origenviron;
4840 	while (*envp && *envp != environ[i]) envp++;
4841 	if (!*envp)
4842 	    xfree(environ[i]);
4843 	if (!value) {
4844 	    while (environ[i]) {
4845 		environ[i] = environ[i+1];
4846 		i++;
4847 	    }
4848 	    return;
4849 	}
4850     }
4851     else {			/* does not exist yet */
4852 	if (!value) return;
4853 	REALLOC_N(environ, char*, i+2);	/* just expand it a bit */
4854 	environ[i+1] = 0;	/* make sure it's null terminated */
4855     }
4856     len = strlen(name) + strlen(value) + 2;
4857     environ[i] = ALLOC_N(char, len);
4858     snprintf(environ[i],len,"%s=%s",name,value); /* all that work just for this */
4859 #endif /* WIN32 */
4860 }
4861 
4862 void
ruby_unsetenv(const char * name)4863 ruby_unsetenv(const char *name)
4864 {
4865     ruby_setenv(name, 0);
4866 }
4867 
4868 /*
4869  * call-seq:
4870  *   ENV[name] = value
4871  *   ENV.store(name, value) -> value
4872  *
4873  * Sets the environment variable +name+ to +value+.  If the value given is
4874  * +nil+ the environment variable is deleted.
4875  * +name+ must be a string.
4876  *
4877  */
4878 static VALUE
env_aset(VALUE obj,VALUE nm,VALUE val)4879 env_aset(VALUE obj, VALUE nm, VALUE val)
4880 {
4881     char *name, *value;
4882 
4883     if (NIL_P(val)) {
4884 	env_delete(obj, nm);
4885 	return Qnil;
4886     }
4887     SafeStringValue(nm);
4888     SafeStringValue(val);
4889     /* nm can be modified in `val.to_str`, don't get `name` before
4890      * check for `val` */
4891     get_env_ptr(name, nm);
4892     get_env_ptr(value, val);
4893 
4894     ruby_setenv(name, value);
4895     if (ENVMATCH(name, PATH_ENV)) {
4896 	RB_GC_GUARD(nm);
4897 	if (OBJ_TAINTED(val)) {
4898 	    /* already tainted, no check */
4899 	    path_tainted = 1;
4900 	    return val;
4901 	}
4902 	else {
4903 	    path_tainted_p(value);
4904 	}
4905     }
4906     else if (ENVMATCH(name, TZ_ENV)) {
4907 	ruby_tz_uptodate_p = FALSE;
4908     }
4909     return val;
4910 }
4911 
4912 /*
4913  * call-seq:
4914  *   ENV.keys -> Array
4915  *
4916  * Returns every environment variable name in an Array
4917  */
4918 static VALUE
env_keys(void)4919 env_keys(void)
4920 {
4921     char **env;
4922     VALUE ary;
4923 
4924     ary = rb_ary_new();
4925     env = GET_ENVIRON(environ);
4926     while (*env) {
4927 	char *s = strchr(*env, '=');
4928 	if (s) {
4929 	    rb_ary_push(ary, env_str_new(*env, s-*env));
4930 	}
4931 	env++;
4932     }
4933     FREE_ENVIRON(environ);
4934     return ary;
4935 }
4936 
4937 static VALUE
rb_env_size(VALUE ehash,VALUE args,VALUE eobj)4938 rb_env_size(VALUE ehash, VALUE args, VALUE eobj)
4939 {
4940     char **env;
4941     long cnt = 0;
4942 
4943     env = GET_ENVIRON(environ);
4944     for (; *env ; ++env) {
4945 	if (strchr(*env, '=')) {
4946 	    cnt++;
4947 	}
4948     }
4949     FREE_ENVIRON(environ);
4950     return LONG2FIX(cnt);
4951 }
4952 
4953 /*
4954  * call-seq:
4955  *   ENV.each_key { |name| block } -> Hash
4956  *   ENV.each_key                  -> Enumerator
4957  *
4958  * Yields each environment variable name.
4959  *
4960  * An Enumerator is returned if no block is given.
4961  */
4962 static VALUE
env_each_key(VALUE ehash)4963 env_each_key(VALUE ehash)
4964 {
4965     VALUE keys;
4966     long i;
4967 
4968     RETURN_SIZED_ENUMERATOR(ehash, 0, 0, rb_env_size);
4969     keys = env_keys();
4970     for (i=0; i<RARRAY_LEN(keys); i++) {
4971 	rb_yield(RARRAY_AREF(keys, i));
4972     }
4973     return ehash;
4974 }
4975 
4976 /*
4977  * call-seq:
4978  *   ENV.values -> Array
4979  *
4980  * Returns every environment variable value as an Array
4981  */
4982 static VALUE
env_values(void)4983 env_values(void)
4984 {
4985     VALUE ary;
4986     char **env;
4987 
4988     ary = rb_ary_new();
4989     env = GET_ENVIRON(environ);
4990     while (*env) {
4991 	char *s = strchr(*env, '=');
4992 	if (s) {
4993 	    rb_ary_push(ary, env_str_new2(s+1));
4994 	}
4995 	env++;
4996     }
4997     FREE_ENVIRON(environ);
4998     return ary;
4999 }
5000 
5001 /*
5002  * call-seq:
5003  *   ENV.each_value { |value| block } -> Hash
5004  *   ENV.each_value                   -> Enumerator
5005  *
5006  * Yields each environment variable +value+.
5007  *
5008  * An Enumerator is returned if no block was given.
5009  */
5010 static VALUE
env_each_value(VALUE ehash)5011 env_each_value(VALUE ehash)
5012 {
5013     VALUE values;
5014     long i;
5015 
5016     RETURN_SIZED_ENUMERATOR(ehash, 0, 0, rb_env_size);
5017     values = env_values();
5018     for (i=0; i<RARRAY_LEN(values); i++) {
5019 	rb_yield(RARRAY_AREF(values, i));
5020     }
5021     return ehash;
5022 }
5023 
5024 /*
5025  * call-seq:
5026  *   ENV.each      { |name, value| block } -> Hash
5027  *   ENV.each                              -> Enumerator
5028  *   ENV.each_pair { |name, value| block } -> Hash
5029  *   ENV.each_pair                         -> Enumerator
5030  *
5031  * Yields each environment variable +name+ and +value+.
5032  *
5033  * If no block is given an Enumerator is returned.
5034  */
5035 static VALUE
env_each_pair(VALUE ehash)5036 env_each_pair(VALUE ehash)
5037 {
5038     char **env;
5039     VALUE ary;
5040     long i;
5041 
5042     RETURN_SIZED_ENUMERATOR(ehash, 0, 0, rb_env_size);
5043 
5044     ary = rb_ary_new();
5045     env = GET_ENVIRON(environ);
5046     while (*env) {
5047 	char *s = strchr(*env, '=');
5048 	if (s) {
5049 	    rb_ary_push(ary, env_str_new(*env, s-*env));
5050 	    rb_ary_push(ary, env_str_new2(s+1));
5051 	}
5052 	env++;
5053     }
5054     FREE_ENVIRON(environ);
5055 
5056     if (rb_block_arity() > 1) {
5057 	for (i=0; i<RARRAY_LEN(ary); i+=2) {
5058 	    rb_yield_values(2, RARRAY_AREF(ary, i), RARRAY_AREF(ary, i+1));
5059 	}
5060     }
5061     else {
5062 	for (i=0; i<RARRAY_LEN(ary); i+=2) {
5063 	    rb_yield(rb_assoc_new(RARRAY_AREF(ary, i), RARRAY_AREF(ary, i+1)));
5064 	}
5065     }
5066     return ehash;
5067 }
5068 
5069 /*
5070  * call-seq:
5071  *   ENV.reject! { |name, value| block } -> ENV or nil
5072  *   ENV.reject!                         -> Enumerator
5073  *
5074  * Equivalent to ENV.delete_if but returns +nil+ if no changes were made.
5075  *
5076  * Returns an Enumerator if no block was given.
5077  */
5078 static VALUE
env_reject_bang(VALUE ehash)5079 env_reject_bang(VALUE ehash)
5080 {
5081     VALUE keys;
5082     long i;
5083     int del = 0;
5084 
5085     RETURN_SIZED_ENUMERATOR(ehash, 0, 0, rb_env_size);
5086     keys = env_keys();
5087     RBASIC_CLEAR_CLASS(keys);
5088     for (i=0; i<RARRAY_LEN(keys); i++) {
5089 	VALUE val = rb_f_getenv(Qnil, RARRAY_AREF(keys, i));
5090 	if (!NIL_P(val)) {
5091 	    if (RTEST(rb_yield_values(2, RARRAY_AREF(keys, i), val))) {
5092 		FL_UNSET(RARRAY_AREF(keys, i), FL_TAINT);
5093 		env_delete(Qnil, RARRAY_AREF(keys, i));
5094 		del++;
5095 	    }
5096 	}
5097     }
5098     RB_GC_GUARD(keys);
5099     if (del == 0) return Qnil;
5100     return envtbl;
5101 }
5102 
5103 /*
5104  * call-seq:
5105  *   ENV.delete_if { |name, value| block } -> Hash
5106  *   ENV.delete_if                         -> Enumerator
5107  *
5108  * Deletes every environment variable for which the block evaluates to +true+.
5109  *
5110  * If no block is given an enumerator is returned instead.
5111  */
5112 static VALUE
env_delete_if(VALUE ehash)5113 env_delete_if(VALUE ehash)
5114 {
5115     RETURN_SIZED_ENUMERATOR(ehash, 0, 0, rb_env_size);
5116     env_reject_bang(ehash);
5117     return envtbl;
5118 }
5119 
5120 /*
5121  * call-seq:
5122  *   ENV.values_at(name, ...) -> Array
5123  *
5124  * Returns an array containing the environment variable values associated with
5125  * the given names.  See also ENV.select.
5126  */
5127 static VALUE
env_values_at(int argc,VALUE * argv)5128 env_values_at(int argc, VALUE *argv)
5129 {
5130     VALUE result;
5131     long i;
5132 
5133     result = rb_ary_new();
5134     for (i=0; i<argc; i++) {
5135 	rb_ary_push(result, rb_f_getenv(Qnil, argv[i]));
5136     }
5137     return result;
5138 }
5139 
5140 /*
5141  * call-seq:
5142  *   ENV.select { |name, value| block } -> Hash
5143  *   ENV.select                         -> Enumerator
5144  *   ENV.filter { |name, value| block } -> Hash
5145  *   ENV.filter                         -> Enumerator
5146  *
5147  * Returns a copy of the environment for entries where the block returns true.
5148  *
5149  * Returns an Enumerator if no block was given.
5150  *
5151  * ENV.filter is an alias for ENV.select.
5152  */
5153 static VALUE
env_select(VALUE ehash)5154 env_select(VALUE ehash)
5155 {
5156     VALUE result;
5157     VALUE keys;
5158     long i;
5159 
5160     RETURN_SIZED_ENUMERATOR(ehash, 0, 0, rb_env_size);
5161     result = rb_hash_new();
5162     keys = env_keys();
5163     for (i = 0; i < RARRAY_LEN(keys); ++i) {
5164 	VALUE key = RARRAY_AREF(keys, i);
5165 	VALUE val = rb_f_getenv(Qnil, key);
5166 	if (!NIL_P(val)) {
5167 	    if (RTEST(rb_yield_values(2, key, val))) {
5168 		rb_hash_aset(result, key, val);
5169 	    }
5170 	}
5171     }
5172     RB_GC_GUARD(keys);
5173 
5174     return result;
5175 }
5176 
5177 /*
5178  * call-seq:
5179  *   ENV.select! { |name, value| block } -> ENV or nil
5180  *   ENV.select!                         -> Enumerator
5181  *   ENV.filter! { |name, value| block } -> ENV or nil
5182  *   ENV.filter!                         -> Enumerator
5183  *
5184  * Equivalent to ENV.keep_if but returns +nil+ if no changes were made.
5185  *
5186  * ENV.filter! is an alias for ENV.select!.
5187  */
5188 static VALUE
env_select_bang(VALUE ehash)5189 env_select_bang(VALUE ehash)
5190 {
5191     VALUE keys;
5192     long i;
5193     int del = 0;
5194 
5195     RETURN_SIZED_ENUMERATOR(ehash, 0, 0, rb_env_size);
5196     keys = env_keys();
5197     RBASIC_CLEAR_CLASS(keys);
5198     for (i=0; i<RARRAY_LEN(keys); i++) {
5199 	VALUE val = rb_f_getenv(Qnil, RARRAY_AREF(keys, i));
5200 	if (!NIL_P(val)) {
5201 	    if (!RTEST(rb_yield_values(2, RARRAY_AREF(keys, i), val))) {
5202 		FL_UNSET(RARRAY_AREF(keys, i), FL_TAINT);
5203 		env_delete(Qnil, RARRAY_AREF(keys, i));
5204 		del++;
5205 	    }
5206 	}
5207     }
5208     RB_GC_GUARD(keys);
5209     if (del == 0) return Qnil;
5210     return envtbl;
5211 }
5212 
5213 /*
5214  * call-seq:
5215  *   ENV.keep_if { |name, value| block } -> Hash
5216  *   ENV.keep_if                         -> Enumerator
5217  *
5218  * Deletes every environment variable where the block evaluates to +false+.
5219  *
5220  * Returns an enumerator if no block was given.
5221  */
5222 static VALUE
env_keep_if(VALUE ehash)5223 env_keep_if(VALUE ehash)
5224 {
5225     RETURN_SIZED_ENUMERATOR(ehash, 0, 0, rb_env_size);
5226     env_select_bang(ehash);
5227     return envtbl;
5228 }
5229 
5230 /*
5231  *  call-seq:
5232  *     ENV.slice(*keys) -> a_hash
5233  *
5234  *  Returns a hash containing only the given keys from ENV and their values.
5235  *
5236  *     ENV.slice("TERM","HOME")  #=> {"TERM"=>"xterm-256color", "HOME"=>"/Users/rhc"}
5237  */
5238 static VALUE
env_slice(int argc,VALUE * argv)5239 env_slice(int argc, VALUE *argv)
5240 {
5241     int i;
5242     VALUE key, value, result;
5243 
5244     if (argc == 0) {
5245         return rb_hash_new();
5246     }
5247     result = rb_hash_new_with_size(argc);
5248 
5249     for (i = 0; i < argc; i++) {
5250         key = argv[i];
5251         value = rb_f_getenv(Qnil, key);
5252         if (value != Qnil)
5253             rb_hash_aset(result, key, value);
5254     }
5255 
5256     return result;
5257 }
5258 
5259 /*
5260  * call-seq:
5261  *   ENV.clear
5262  *
5263  * Removes every environment variable.
5264  */
5265 VALUE
rb_env_clear(void)5266 rb_env_clear(void)
5267 {
5268     VALUE keys;
5269     long i;
5270 
5271     keys = env_keys();
5272     for (i=0; i<RARRAY_LEN(keys); i++) {
5273 	VALUE val = rb_f_getenv(Qnil, RARRAY_AREF(keys, i));
5274 	if (!NIL_P(val)) {
5275 	    env_delete(Qnil, RARRAY_AREF(keys, i));
5276 	}
5277     }
5278     RB_GC_GUARD(keys);
5279     return envtbl;
5280 }
5281 
5282 /*
5283  * call-seq:
5284  *   ENV.to_s -> "ENV"
5285  *
5286  * Returns "ENV"
5287  */
5288 static VALUE
env_to_s(void)5289 env_to_s(void)
5290 {
5291     return rb_usascii_str_new2("ENV");
5292 }
5293 
5294 /*
5295  * call-seq:
5296  *   ENV.inspect -> string
5297  *
5298  * Returns the contents of the environment as a String.
5299  */
5300 static VALUE
env_inspect(void)5301 env_inspect(void)
5302 {
5303     char **env;
5304     VALUE str, i;
5305 
5306     str = rb_str_buf_new2("{");
5307     env = GET_ENVIRON(environ);
5308     while (*env) {
5309 	char *s = strchr(*env, '=');
5310 
5311 	if (env != environ) {
5312 	    rb_str_buf_cat2(str, ", ");
5313 	}
5314 	if (s) {
5315 	    rb_str_buf_cat2(str, "\"");
5316 	    rb_str_buf_cat(str, *env, s-*env);
5317 	    rb_str_buf_cat2(str, "\"=>");
5318 	    i = rb_inspect(rb_str_new2(s+1));
5319 	    rb_str_buf_append(str, i);
5320 	}
5321 	env++;
5322     }
5323     FREE_ENVIRON(environ);
5324     rb_str_buf_cat2(str, "}");
5325     OBJ_TAINT(str);
5326 
5327     return str;
5328 }
5329 
5330 /*
5331  * call-seq:
5332  *   ENV.to_a -> Array
5333  *
5334  * Converts the environment variables into an array of names and value arrays.
5335  *
5336  *   ENV.to_a # => [["TERM", "xterm-color"], ["SHELL", "/bin/bash"], ...]
5337  *
5338  */
5339 static VALUE
env_to_a(void)5340 env_to_a(void)
5341 {
5342     char **env;
5343     VALUE ary;
5344 
5345     ary = rb_ary_new();
5346     env = GET_ENVIRON(environ);
5347     while (*env) {
5348 	char *s = strchr(*env, '=');
5349 	if (s) {
5350 	    rb_ary_push(ary, rb_assoc_new(env_str_new(*env, s-*env),
5351 					  env_str_new2(s+1)));
5352 	}
5353 	env++;
5354     }
5355     FREE_ENVIRON(environ);
5356     return ary;
5357 }
5358 
5359 /*
5360  * call-seq:
5361  *   ENV.rehash
5362  *
5363  * Re-hashing the environment variables does nothing.  It is provided for
5364  * compatibility with Hash.
5365  */
5366 static VALUE
env_none(void)5367 env_none(void)
5368 {
5369     return Qnil;
5370 }
5371 
5372 /*
5373  * call-seq:
5374  *   ENV.length
5375  *   ENV.size
5376  *
5377  * Returns the number of environment variables.
5378  */
5379 static VALUE
env_size(void)5380 env_size(void)
5381 {
5382     int i;
5383     char **env;
5384 
5385     env = GET_ENVIRON(environ);
5386     for (i=0; env[i]; i++)
5387 	;
5388     FREE_ENVIRON(environ);
5389     return INT2FIX(i);
5390 }
5391 
5392 /*
5393  * call-seq:
5394  *   ENV.empty? -> true or false
5395  *
5396  * Returns true when there are no environment variables
5397  */
5398 static VALUE
env_empty_p(void)5399 env_empty_p(void)
5400 {
5401     char **env;
5402 
5403     env = GET_ENVIRON(environ);
5404     if (env[0] == 0) {
5405 	FREE_ENVIRON(environ);
5406 	return Qtrue;
5407     }
5408     FREE_ENVIRON(environ);
5409     return Qfalse;
5410 }
5411 
5412 /*
5413  * call-seq:
5414  *   ENV.key?(name)     -> true or false
5415  *   ENV.include?(name) -> true or false
5416  *   ENV.has_key?(name) -> true or false
5417  *   ENV.member?(name)  -> true or false
5418  *
5419  * Returns +true+ if there is an environment variable with the given +name+.
5420  */
5421 static VALUE
env_has_key(VALUE env,VALUE key)5422 env_has_key(VALUE env, VALUE key)
5423 {
5424     const char *s;
5425 
5426     s = env_name(key);
5427     if (getenv(s)) return Qtrue;
5428     return Qfalse;
5429 }
5430 
5431 /*
5432  * call-seq:
5433  *   ENV.assoc(name) -> Array or nil
5434  *
5435  * Returns an Array of the name and value of the environment variable with
5436  * +name+ or +nil+ if the name cannot be found.
5437  */
5438 static VALUE
env_assoc(VALUE env,VALUE key)5439 env_assoc(VALUE env, VALUE key)
5440 {
5441     const char *s, *e;
5442 
5443     s = env_name(key);
5444     e = getenv(s);
5445     if (e) return rb_assoc_new(key, env_str_new2(e));
5446     return Qnil;
5447 }
5448 
5449 /*
5450  * call-seq:
5451  *   ENV.value?(value) -> true or false
5452  *   ENV.has_value?(value) -> true or false
5453  *
5454  * Returns +true+ if there is an environment variable with the given +value+.
5455  */
5456 static VALUE
env_has_value(VALUE dmy,VALUE obj)5457 env_has_value(VALUE dmy, VALUE obj)
5458 {
5459     char **env;
5460 
5461     obj = rb_check_string_type(obj);
5462     if (NIL_P(obj)) return Qnil;
5463     rb_check_safe_obj(obj);
5464     env = GET_ENVIRON(environ);
5465     while (*env) {
5466 	char *s = strchr(*env, '=');
5467 	if (s++) {
5468 	    long len = strlen(s);
5469 	    if (RSTRING_LEN(obj) == len && strncmp(s, RSTRING_PTR(obj), len) == 0) {
5470 		FREE_ENVIRON(environ);
5471 		return Qtrue;
5472 	    }
5473 	}
5474 	env++;
5475     }
5476     FREE_ENVIRON(environ);
5477     return Qfalse;
5478 }
5479 
5480 /*
5481  * call-seq:
5482  *   ENV.rassoc(value)
5483  *
5484  * Returns an Array of the name and value of the environment variable with
5485  * +value+ or +nil+ if the value cannot be found.
5486  */
5487 static VALUE
env_rassoc(VALUE dmy,VALUE obj)5488 env_rassoc(VALUE dmy, VALUE obj)
5489 {
5490     char **env;
5491 
5492     obj = rb_check_string_type(obj);
5493     if (NIL_P(obj)) return Qnil;
5494     rb_check_safe_obj(obj);
5495     env = GET_ENVIRON(environ);
5496     while (*env) {
5497 	char *s = strchr(*env, '=');
5498 	if (s++) {
5499 	    long len = strlen(s);
5500 	    if (RSTRING_LEN(obj) == len && strncmp(s, RSTRING_PTR(obj), len) == 0) {
5501 		VALUE result = rb_assoc_new(rb_tainted_str_new(*env, s-*env-1), obj);
5502 		FREE_ENVIRON(environ);
5503 		return result;
5504 	    }
5505 	}
5506 	env++;
5507     }
5508     FREE_ENVIRON(environ);
5509     return Qnil;
5510 }
5511 
5512 /*
5513  * call-seq:
5514  *   ENV.key(value) -> name
5515  *
5516  * Returns the name of the environment variable with +value+.  If the value is
5517  * not found +nil+ is returned.
5518  */
5519 static VALUE
env_key(VALUE dmy,VALUE value)5520 env_key(VALUE dmy, VALUE value)
5521 {
5522     char **env;
5523     VALUE str;
5524 
5525     SafeStringValue(value);
5526     env = GET_ENVIRON(environ);
5527     while (*env) {
5528 	char *s = strchr(*env, '=');
5529 	if (s++) {
5530 	    long len = strlen(s);
5531 	    if (RSTRING_LEN(value) == len && strncmp(s, RSTRING_PTR(value), len) == 0) {
5532 		str = env_str_new(*env, s-*env-1);
5533 		FREE_ENVIRON(environ);
5534 		return str;
5535 	    }
5536 	}
5537 	env++;
5538     }
5539     FREE_ENVIRON(environ);
5540     return Qnil;
5541 }
5542 
5543 /*
5544  * call-seq:
5545  *   ENV.index(value) -> key
5546  *
5547  * Deprecated method that is equivalent to ENV.key
5548  */
5549 static VALUE
env_index(VALUE dmy,VALUE value)5550 env_index(VALUE dmy, VALUE value)
5551 {
5552     rb_warn("ENV.index is deprecated; use ENV.key");
5553     return env_key(dmy, value);
5554 }
5555 
5556 /*
5557  * call-seq:
5558  *   ENV.to_hash -> hash
5559  *
5560  * Creates a hash with a copy of the environment variables.
5561  *
5562  */
5563 static VALUE
env_to_hash(void)5564 env_to_hash(void)
5565 {
5566     char **env;
5567     VALUE hash;
5568 
5569     hash = rb_hash_new();
5570     env = GET_ENVIRON(environ);
5571     while (*env) {
5572 	char *s = strchr(*env, '=');
5573 	if (s) {
5574 	    rb_hash_aset(hash, env_str_new(*env, s-*env),
5575 			       env_str_new2(s+1));
5576 	}
5577 	env++;
5578     }
5579     FREE_ENVIRON(environ);
5580     return hash;
5581 }
5582 
5583 /*
5584  * call-seq:
5585  *   ENV.to_h                        -> hash
5586  *   ENV.to_h {|name, value| block } -> hash
5587  *
5588  * Creates a hash with a copy of the environment variables.
5589  *
5590  */
5591 static VALUE
env_to_h(void)5592 env_to_h(void)
5593 {
5594     VALUE hash = env_to_hash();
5595     if (rb_block_given_p()) {
5596         hash = rb_hash_to_h_block(hash);
5597     }
5598     return hash;
5599 }
5600 
5601 /*
5602  * call-seq:
5603  *   ENV.reject { |name, value| block } -> Hash
5604  *   ENV.reject                         -> Enumerator
5605  *
5606  * Same as ENV.delete_if, but works on (and returns) a copy of the
5607  * environment.
5608  */
5609 static VALUE
env_reject(void)5610 env_reject(void)
5611 {
5612     return rb_hash_delete_if(env_to_hash());
5613 }
5614 
5615 /*
5616  * call-seq:
5617  *   ENV.shift -> Array or nil
5618  *
5619  * Removes an environment variable name-value pair from ENV and returns it as
5620  * an Array.  Returns +nil+ if when the environment is empty.
5621  */
5622 static VALUE
env_shift(void)5623 env_shift(void)
5624 {
5625     char **env;
5626     VALUE result = Qnil;
5627 
5628     env = GET_ENVIRON(environ);
5629     if (*env) {
5630 	char *s = strchr(*env, '=');
5631 	if (s) {
5632 	    VALUE key = env_str_new(*env, s-*env);
5633 	    VALUE val = env_str_new2(getenv(RSTRING_PTR(key)));
5634 	    env_delete(Qnil, key);
5635 	    result = rb_assoc_new(key, val);
5636 	}
5637     }
5638     FREE_ENVIRON(environ);
5639     return result;
5640 }
5641 
5642 /*
5643  * call-seq:
5644  *   ENV.invert -> Hash
5645  *
5646  * Returns a new hash created by using environment variable names as values
5647  * and values as names.
5648  */
5649 static VALUE
env_invert(void)5650 env_invert(void)
5651 {
5652     return rb_hash_invert(env_to_hash());
5653 }
5654 
5655 static void
keylist_delete(VALUE keys,VALUE key)5656 keylist_delete(VALUE keys, VALUE key)
5657 {
5658     long keylen, elen;
5659     const char *keyptr, *eptr;
5660     RSTRING_GETMEM(key, keyptr, keylen);
5661     for (long i=0; i<RARRAY_LEN(keys); i++) {
5662         VALUE e = RARRAY_AREF(keys, i);
5663         RSTRING_GETMEM(e, eptr, elen);
5664         if (elen != keylen) continue;
5665         if (!ENVNMATCH(keyptr, eptr, elen)) continue;
5666         rb_ary_delete_at(keys, i);
5667         return;
5668     }
5669 }
5670 
5671 static int
env_replace_i(VALUE key,VALUE val,VALUE keys)5672 env_replace_i(VALUE key, VALUE val, VALUE keys)
5673 {
5674     env_name(key);
5675     env_aset(Qnil, key, val);
5676 
5677     keylist_delete(keys, key);
5678     return ST_CONTINUE;
5679 }
5680 
5681 /*
5682  * call-seq:
5683  *   ENV.replace(hash) -> env
5684  *
5685  * Replaces the contents of the environment variables with the contents of
5686  * +hash+.
5687  */
5688 static VALUE
env_replace(VALUE env,VALUE hash)5689 env_replace(VALUE env, VALUE hash)
5690 {
5691     VALUE keys;
5692     long i;
5693 
5694     keys = env_keys();
5695     if (env == hash) return env;
5696     hash = to_hash(hash);
5697     rb_hash_foreach(hash, env_replace_i, keys);
5698 
5699     for (i=0; i<RARRAY_LEN(keys); i++) {
5700 	env_delete(env, RARRAY_AREF(keys, i));
5701     }
5702     RB_GC_GUARD(keys);
5703     return env;
5704 }
5705 
5706 static int
env_update_i(VALUE key,VALUE val)5707 env_update_i(VALUE key, VALUE val)
5708 {
5709     if (rb_block_given_p()) {
5710 	val = rb_yield_values(3, key, rb_f_getenv(Qnil, key), val);
5711     }
5712     env_aset(Qnil, key, val);
5713     return ST_CONTINUE;
5714 }
5715 
5716 /*
5717  * call-seq:
5718  *   ENV.update(hash)                                        -> Hash
5719  *   ENV.update(hash) { |name, old_value, new_value| block } -> Hash
5720  *
5721  * Adds the contents of +hash+ to the environment variables.  If no block is
5722  * specified entries with duplicate keys are overwritten, otherwise the value
5723  * of each duplicate name is determined by calling the block with the key, its
5724  * value from the environment and its value from the hash.
5725  */
5726 static VALUE
env_update(VALUE env,VALUE hash)5727 env_update(VALUE env, VALUE hash)
5728 {
5729     if (env == hash) return env;
5730     hash = to_hash(hash);
5731     rb_hash_foreach(hash, env_update_i, 0);
5732     return env;
5733 }
5734 
5735 /*
5736  *  A Hash is a dictionary-like collection of unique keys and their values.
5737  *  Also called associative arrays, they are similar to Arrays, but where an
5738  *  Array uses integers as its index, a Hash allows you to use any object
5739  *  type.
5740  *
5741  *  Hashes enumerate their values in the order that the corresponding keys
5742  *  were inserted.
5743  *
5744  *  A Hash can be easily created by using its implicit form:
5745  *
5746  *    grades = { "Jane Doe" => 10, "Jim Doe" => 6 }
5747  *
5748  *  Hashes allow an alternate syntax for keys that are symbols.
5749  *  Instead of
5750  *
5751  *    options = { :font_size => 10, :font_family => "Arial" }
5752  *
5753  *  You could write it as:
5754  *
5755  *    options = { font_size: 10, font_family: "Arial" }
5756  *
5757  *  Each named key is a symbol you can access in hash:
5758  *
5759  *    options[:font_size]  # => 10
5760  *
5761  *  A Hash can also be created through its ::new method:
5762  *
5763  *    grades = Hash.new
5764  *    grades["Dorothy Doe"] = 9
5765  *
5766  *  Hashes have a <em>default value</em> that is returned when accessing
5767  *  keys that do not exist in the hash. If no default is set +nil+ is used.
5768  *  You can set the default value by sending it as an argument to Hash.new:
5769  *
5770  *    grades = Hash.new(0)
5771  *
5772  *  Or by using the #default= method:
5773  *
5774  *    grades = {"Timmy Doe" => 8}
5775  *    grades.default = 0
5776  *
5777  *  Accessing a value in a Hash requires using its key:
5778  *
5779  *    puts grades["Jane Doe"] # => 0
5780  *
5781  *  === Common Uses
5782  *
5783  *  Hashes are an easy way to represent data structures, such as
5784  *
5785  *    books         = {}
5786  *    books[:matz]  = "The Ruby Programming Language"
5787  *    books[:black] = "The Well-Grounded Rubyist"
5788  *
5789  *  Hashes are also commonly used as a way to have named parameters in
5790  *  functions. Note that no brackets are used below. If a hash is the last
5791  *  argument on a method call, no braces are needed, thus creating a really
5792  *  clean interface:
5793  *
5794  *    Person.create(name: "John Doe", age: 27)
5795  *
5796  *    def self.create(params)
5797  *      @name = params[:name]
5798  *      @age  = params[:age]
5799  *    end
5800  *
5801  *  === Hash Keys
5802  *
5803  *  Two objects refer to the same hash key when their <code>hash</code> value
5804  *  is identical and the two objects are <code>eql?</code> to each other.
5805  *
5806  *  A user-defined class may be used as a hash key if the <code>hash</code>
5807  *  and <code>eql?</code> methods are overridden to provide meaningful
5808  *  behavior.  By default, separate instances refer to separate hash keys.
5809  *
5810  *  A typical implementation of <code>hash</code> is based on the
5811  *  object's data while <code>eql?</code> is usually aliased to the overridden
5812  *  <code>==</code> method:
5813  *
5814  *    class Book
5815  *      attr_reader :author, :title
5816  *
5817  *      def initialize(author, title)
5818  *        @author = author
5819  *        @title = title
5820  *      end
5821  *
5822  *      def ==(other)
5823  *        self.class === other and
5824  *          other.author == @author and
5825  *          other.title == @title
5826  *      end
5827  *
5828  *      alias eql? ==
5829  *
5830  *      def hash
5831  *        @author.hash ^ @title.hash # XOR
5832  *      end
5833  *    end
5834  *
5835  *    book1 = Book.new 'matz', 'Ruby in a Nutshell'
5836  *    book2 = Book.new 'matz', 'Ruby in a Nutshell'
5837  *
5838  *    reviews = {}
5839  *
5840  *    reviews[book1] = 'Great reference!'
5841  *    reviews[book2] = 'Nice and compact!'
5842  *
5843  *    reviews.length #=> 1
5844  *
5845  *  See also Object#hash and Object#eql?
5846  */
5847 
5848 void
Init_Hash(void)5849 Init_Hash(void)
5850 {
5851 #undef rb_intern
5852 #define rb_intern(str) rb_intern_const(str)
5853 
5854     RUBY_ASSERT(RESERVED_HASH_VAL == st_reserved_hash_val);
5855     RUBY_ASSERT(RESERVED_HASH_SUBSTITUTION_VAL == st_reserved_hash_substitution_val);
5856 
5857     id_hash = rb_intern("hash");
5858     id_yield = rb_intern("yield");
5859     id_default = rb_intern("default");
5860     id_flatten_bang = rb_intern("flatten!");
5861 
5862     rb_cHash = rb_define_class("Hash", rb_cObject);
5863 
5864     rb_include_module(rb_cHash, rb_mEnumerable);
5865 
5866     rb_define_alloc_func(rb_cHash, empty_hash_alloc);
5867     rb_define_singleton_method(rb_cHash, "[]", rb_hash_s_create, -1);
5868     rb_define_singleton_method(rb_cHash, "try_convert", rb_hash_s_try_convert, 1);
5869     rb_define_method(rb_cHash, "initialize", rb_hash_initialize, -1);
5870     rb_define_method(rb_cHash, "initialize_copy", rb_hash_initialize_copy, 1);
5871     rb_define_method(rb_cHash, "rehash", rb_hash_rehash, 0);
5872 
5873     rb_define_method(rb_cHash, "to_hash", rb_hash_to_hash, 0);
5874     rb_define_method(rb_cHash, "to_h", rb_hash_to_h, 0);
5875     rb_define_method(rb_cHash, "to_a", rb_hash_to_a, 0);
5876     rb_define_method(rb_cHash, "inspect", rb_hash_inspect, 0);
5877     rb_define_alias(rb_cHash, "to_s", "inspect");
5878     rb_define_method(rb_cHash, "to_proc", rb_hash_to_proc, 0);
5879 
5880     rb_define_method(rb_cHash, "==", rb_hash_equal, 1);
5881     rb_define_method(rb_cHash, "[]", rb_hash_aref, 1);
5882     rb_define_method(rb_cHash, "hash", rb_hash_hash, 0);
5883     rb_define_method(rb_cHash, "eql?", rb_hash_eql, 1);
5884     rb_define_method(rb_cHash, "fetch", rb_hash_fetch_m, -1);
5885     rb_define_method(rb_cHash, "[]=", rb_hash_aset, 2);
5886     rb_define_method(rb_cHash, "store", rb_hash_aset, 2);
5887     rb_define_method(rb_cHash, "default", rb_hash_default, -1);
5888     rb_define_method(rb_cHash, "default=", rb_hash_set_default, 1);
5889     rb_define_method(rb_cHash, "default_proc", rb_hash_default_proc, 0);
5890     rb_define_method(rb_cHash, "default_proc=", rb_hash_set_default_proc, 1);
5891     rb_define_method(rb_cHash, "key", rb_hash_key, 1);
5892     rb_define_method(rb_cHash, "index", rb_hash_index, 1);
5893     rb_define_method(rb_cHash, "size", rb_hash_size, 0);
5894     rb_define_method(rb_cHash, "length", rb_hash_size, 0);
5895     rb_define_method(rb_cHash, "empty?", rb_hash_empty_p, 0);
5896 
5897     rb_define_method(rb_cHash, "each_value", rb_hash_each_value, 0);
5898     rb_define_method(rb_cHash, "each_key", rb_hash_each_key, 0);
5899     rb_define_method(rb_cHash, "each_pair", rb_hash_each_pair, 0);
5900     rb_define_method(rb_cHash, "each", rb_hash_each_pair, 0);
5901 
5902     rb_define_method(rb_cHash, "transform_keys", rb_hash_transform_keys, 0);
5903     rb_define_method(rb_cHash, "transform_keys!", rb_hash_transform_keys_bang, 0);
5904     rb_define_method(rb_cHash, "transform_values", rb_hash_transform_values, 0);
5905     rb_define_method(rb_cHash, "transform_values!", rb_hash_transform_values_bang, 0);
5906 
5907     rb_define_method(rb_cHash, "keys", rb_hash_keys, 0);
5908     rb_define_method(rb_cHash, "values", rb_hash_values, 0);
5909     rb_define_method(rb_cHash, "values_at", rb_hash_values_at, -1);
5910     rb_define_method(rb_cHash, "fetch_values", rb_hash_fetch_values, -1);
5911 
5912     rb_define_method(rb_cHash, "shift", rb_hash_shift, 0);
5913     rb_define_method(rb_cHash, "delete", rb_hash_delete_m, 1);
5914     rb_define_method(rb_cHash, "delete_if", rb_hash_delete_if, 0);
5915     rb_define_method(rb_cHash, "keep_if", rb_hash_keep_if, 0);
5916     rb_define_method(rb_cHash, "select", rb_hash_select, 0);
5917     rb_define_method(rb_cHash, "select!", rb_hash_select_bang, 0);
5918     rb_define_method(rb_cHash, "filter", rb_hash_select, 0);
5919     rb_define_method(rb_cHash, "filter!", rb_hash_select_bang, 0);
5920     rb_define_method(rb_cHash, "reject", rb_hash_reject, 0);
5921     rb_define_method(rb_cHash, "reject!", rb_hash_reject_bang, 0);
5922     rb_define_method(rb_cHash, "slice", rb_hash_slice, -1);
5923     rb_define_method(rb_cHash, "clear", rb_hash_clear, 0);
5924     rb_define_method(rb_cHash, "invert", rb_hash_invert, 0);
5925     rb_define_method(rb_cHash, "update", rb_hash_update, -1);
5926     rb_define_method(rb_cHash, "replace", rb_hash_replace, 1);
5927     rb_define_method(rb_cHash, "merge!", rb_hash_update, -1);
5928     rb_define_method(rb_cHash, "merge", rb_hash_merge, -1);
5929     rb_define_method(rb_cHash, "assoc", rb_hash_assoc, 1);
5930     rb_define_method(rb_cHash, "rassoc", rb_hash_rassoc, 1);
5931     rb_define_method(rb_cHash, "flatten", rb_hash_flatten, -1);
5932     rb_define_method(rb_cHash, "compact", rb_hash_compact, 0);
5933     rb_define_method(rb_cHash, "compact!", rb_hash_compact_bang, 0);
5934 
5935     rb_define_method(rb_cHash, "include?", rb_hash_has_key, 1);
5936     rb_define_method(rb_cHash, "member?", rb_hash_has_key, 1);
5937     rb_define_method(rb_cHash, "has_key?", rb_hash_has_key, 1);
5938     rb_define_method(rb_cHash, "has_value?", rb_hash_has_value, 1);
5939     rb_define_method(rb_cHash, "key?", rb_hash_has_key, 1);
5940     rb_define_method(rb_cHash, "value?", rb_hash_has_value, 1);
5941 
5942     rb_define_method(rb_cHash, "compare_by_identity", rb_hash_compare_by_id, 0);
5943     rb_define_method(rb_cHash, "compare_by_identity?", rb_hash_compare_by_id_p, 0);
5944 
5945     rb_define_method(rb_cHash, "any?", rb_hash_any_p, -1);
5946     rb_define_method(rb_cHash, "dig", rb_hash_dig, -1);
5947 
5948     rb_define_method(rb_cHash, "<=", rb_hash_le, 1);
5949     rb_define_method(rb_cHash, "<", rb_hash_lt, 1);
5950     rb_define_method(rb_cHash, ">=", rb_hash_ge, 1);
5951     rb_define_method(rb_cHash, ">", rb_hash_gt, 1);
5952 
5953     /* Document-class: ENV
5954      *
5955      * ENV is a hash-like accessor for environment variables.
5956      */
5957 
5958     /*
5959      * Hack to get RDoc to regard ENV as a class:
5960      * envtbl = rb_define_class("ENV", rb_cObject);
5961      */
5962     origenviron = environ;
5963     envtbl = rb_obj_alloc(rb_cObject);
5964     rb_extend_object(envtbl, rb_mEnumerable);
5965 
5966     rb_define_singleton_method(envtbl, "[]", rb_f_getenv, 1);
5967     rb_define_singleton_method(envtbl, "fetch", env_fetch, -1);
5968     rb_define_singleton_method(envtbl, "[]=", env_aset, 2);
5969     rb_define_singleton_method(envtbl, "store", env_aset, 2);
5970     rb_define_singleton_method(envtbl, "each", env_each_pair, 0);
5971     rb_define_singleton_method(envtbl, "each_pair", env_each_pair, 0);
5972     rb_define_singleton_method(envtbl, "each_key", env_each_key, 0);
5973     rb_define_singleton_method(envtbl, "each_value", env_each_value, 0);
5974     rb_define_singleton_method(envtbl, "delete", env_delete_m, 1);
5975     rb_define_singleton_method(envtbl, "delete_if", env_delete_if, 0);
5976     rb_define_singleton_method(envtbl, "keep_if", env_keep_if, 0);
5977     rb_define_singleton_method(envtbl, "slice", env_slice, -1);
5978     rb_define_singleton_method(envtbl, "clear", rb_env_clear, 0);
5979     rb_define_singleton_method(envtbl, "reject", env_reject, 0);
5980     rb_define_singleton_method(envtbl, "reject!", env_reject_bang, 0);
5981     rb_define_singleton_method(envtbl, "select", env_select, 0);
5982     rb_define_singleton_method(envtbl, "select!", env_select_bang, 0);
5983     rb_define_singleton_method(envtbl, "filter", env_select, 0);
5984     rb_define_singleton_method(envtbl, "filter!", env_select_bang, 0);
5985     rb_define_singleton_method(envtbl, "shift", env_shift, 0);
5986     rb_define_singleton_method(envtbl, "invert", env_invert, 0);
5987     rb_define_singleton_method(envtbl, "replace", env_replace, 1);
5988     rb_define_singleton_method(envtbl, "update", env_update, 1);
5989     rb_define_singleton_method(envtbl, "inspect", env_inspect, 0);
5990     rb_define_singleton_method(envtbl, "rehash", env_none, 0);
5991     rb_define_singleton_method(envtbl, "to_a", env_to_a, 0);
5992     rb_define_singleton_method(envtbl, "to_s", env_to_s, 0);
5993     rb_define_singleton_method(envtbl, "key", env_key, 1);
5994     rb_define_singleton_method(envtbl, "index", env_index, 1);
5995     rb_define_singleton_method(envtbl, "size", env_size, 0);
5996     rb_define_singleton_method(envtbl, "length", env_size, 0);
5997     rb_define_singleton_method(envtbl, "empty?", env_empty_p, 0);
5998     rb_define_singleton_method(envtbl, "keys", env_keys, 0);
5999     rb_define_singleton_method(envtbl, "values", env_values, 0);
6000     rb_define_singleton_method(envtbl, "values_at", env_values_at, -1);
6001     rb_define_singleton_method(envtbl, "include?", env_has_key, 1);
6002     rb_define_singleton_method(envtbl, "member?", env_has_key, 1);
6003     rb_define_singleton_method(envtbl, "has_key?", env_has_key, 1);
6004     rb_define_singleton_method(envtbl, "has_value?", env_has_value, 1);
6005     rb_define_singleton_method(envtbl, "key?", env_has_key, 1);
6006     rb_define_singleton_method(envtbl, "value?", env_has_value, 1);
6007     rb_define_singleton_method(envtbl, "to_hash", env_to_hash, 0);
6008     rb_define_singleton_method(envtbl, "to_h", env_to_h, 0);
6009     rb_define_singleton_method(envtbl, "assoc", env_assoc, 1);
6010     rb_define_singleton_method(envtbl, "rassoc", env_rassoc, 1);
6011 
6012     /*
6013      * ENV is a Hash-like accessor for environment variables.
6014      *
6015      * See ENV (the class) for more details.
6016      */
6017     rb_define_global_const("ENV", envtbl);
6018 
6019     /* for callcc */
6020     ruby_register_rollback_func_for_ensure(hash_foreach_ensure, hash_foreach_ensure_rollback);
6021 }
6022