1 /**
2 * \file
3 * Hashtable implementation
4 *
5 * Author:
6 * Miguel de Icaza (miguel@novell.com)
7 *
8 * (C) 2006 Novell, Inc.
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining
11 * a copy of this software and associated documentation files (the
12 * "Software"), to deal in the Software without restriction, including
13 * without limitation the rights to use, copy, modify, merge, publish,
14 * distribute, sublicense, and/or sell copies of the Software, and to
15 * permit persons to whom the Software is furnished to do so, subject to
16 * the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be
19 * included in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 */
29 #include <config.h>
30 #include <stdio.h>
31 #include <math.h>
32 #include <glib.h>
33 #include "mono-hash.h"
34 #include "metadata/gc-internals.h"
35 #include <mono/utils/checked-build.h>
36 #include <mono/utils/mono-threads-coop.h>
37 #include <mono/utils/unlocked.h>
38
39 gint32 mono_g_hash_table_max_chain_length;
40
41 struct _MonoGHashTable {
42 GHashFunc hash_func;
43 GEqualFunc key_equal_func;
44
45 MonoObject **keys;
46 MonoObject **values;
47 int table_size;
48 int in_use;
49 GDestroyNotify value_destroy_func, key_destroy_func;
50 MonoGHashGCType gc_type;
51 MonoGCRootSource source;
52 const char *msg;
53 };
54
55 #if UNUSED
56 static gboolean
test_prime(int x)57 test_prime (int x)
58 {
59 if ((x & 1) != 0) {
60 int n;
61 for (n = 3; n< (int)sqrt (x); n += 2) {
62 if ((x % n) == 0)
63 return FALSE;
64 }
65 return TRUE;
66 }
67 // There is only one even prime - 2.
68 return (x == 2);
69 }
70
71 static int
calc_prime(int x)72 calc_prime (int x)
73 {
74 int i;
75
76 for (i = (x & (~1))-1; i< G_MAXINT32; i += 2) {
77 if (test_prime (i))
78 return i;
79 }
80 return x;
81 }
82 #endif
83
84 #define HASH_TABLE_MAX_LOAD_FACTOR 0.7f
85 /* We didn't really do compaction before, keep it lenient for now */
86 #define HASH_TABLE_MIN_LOAD_FACTOR 0.05f
87 /* We triple the table size at rehash time, similar with previous implementation */
88 #define HASH_TABLE_RESIZE_RATIO 3
89
mono_g_hash_table_key_store(MonoGHashTable * hash,int slot,MonoObject * key)90 static inline void mono_g_hash_table_key_store (MonoGHashTable *hash, int slot, MonoObject* key)
91 {
92 MonoObject **key_addr = &hash->keys [slot];
93 if (hash->gc_type & MONO_HASH_KEY_GC)
94 mono_gc_wbarrier_generic_store (key_addr, key);
95 else
96 *key_addr = key;
97 }
98
mono_g_hash_table_value_store(MonoGHashTable * hash,int slot,MonoObject * value)99 static inline void mono_g_hash_table_value_store (MonoGHashTable *hash, int slot, MonoObject* value)
100 {
101 MonoObject **value_addr = &hash->values [slot];
102 if (hash->gc_type & MONO_HASH_VALUE_GC)
103 mono_gc_wbarrier_generic_store (value_addr, value);
104 else
105 *value_addr = value;
106 }
107
108 /* Returns position of key or of an empty slot for it */
mono_g_hash_table_find_slot(MonoGHashTable * hash,const MonoObject * key)109 static inline int mono_g_hash_table_find_slot (MonoGHashTable *hash, const MonoObject *key)
110 {
111 guint start = ((*hash->hash_func) (key)) % hash->table_size;
112 guint i = start;
113
114 if (hash->key_equal_func) {
115 GEqualFunc equal = hash->key_equal_func;
116
117 while (hash->keys [i] && !(*equal) (hash->keys [i], key)) {
118 i++;
119 if (i == hash->table_size)
120 i = 0;
121 }
122 } else {
123 while (hash->keys [i] && hash->keys [i] != key) {
124 i++;
125 if (i == hash->table_size)
126 i = 0;
127 }
128 }
129
130 gint32 max_length = UnlockedRead (&mono_g_hash_table_max_chain_length);
131 if (i > start && (i - start) > max_length)
132 UnlockedWrite (&mono_g_hash_table_max_chain_length, i - start);
133 else if (i < start && (hash->table_size - (start - i)) > max_length)
134 UnlockedWrite (&mono_g_hash_table_max_chain_length, hash->table_size - (start - i));
135
136 return i;
137 }
138
139
140 MonoGHashTable *
mono_g_hash_table_new_type(GHashFunc hash_func,GEqualFunc key_equal_func,MonoGHashGCType type,MonoGCRootSource source,const char * msg)141 mono_g_hash_table_new_type (GHashFunc hash_func, GEqualFunc key_equal_func, MonoGHashGCType type, MonoGCRootSource source, const char *msg)
142 {
143 MonoGHashTable *hash;
144
145 if (!hash_func)
146 hash_func = g_direct_hash;
147
148 hash = g_new0 (MonoGHashTable, 1);
149
150 hash->hash_func = hash_func;
151 hash->key_equal_func = key_equal_func;
152
153 hash->table_size = g_spaced_primes_closest (1);
154 hash->keys = g_new0 (MonoObject*, hash->table_size);
155 hash->values = g_new0 (MonoObject*, hash->table_size);
156
157 hash->gc_type = type;
158 hash->source = source;
159 hash->msg = msg;
160
161 if (type > MONO_HASH_KEY_VALUE_GC)
162 g_error ("wrong type for gc hashtable");
163
164 if (hash->gc_type & MONO_HASH_KEY_GC)
165 mono_gc_register_root_wbarrier ((char*)hash->keys, sizeof (MonoObject*) * hash->table_size, mono_gc_make_vector_descr (), hash->source, hash->msg);
166 if (hash->gc_type & MONO_HASH_VALUE_GC)
167 mono_gc_register_root_wbarrier ((char*)hash->values, sizeof (MonoObject*) * hash->table_size, mono_gc_make_vector_descr (), hash->source, hash->msg);
168
169 return hash;
170 }
171
172 typedef struct {
173 MonoGHashTable *hash;
174 int new_size;
175 MonoObject **keys;
176 MonoObject **values;
177 } RehashData;
178
179 static void*
do_rehash(void * _data)180 do_rehash (void *_data)
181 {
182 RehashData *data = (RehashData *)_data;
183 MonoGHashTable *hash = data->hash;
184 int current_size, i;
185 MonoObject **old_keys;
186 MonoObject **old_values;
187
188 current_size = hash->table_size;
189 hash->table_size = data->new_size;
190 old_keys = hash->keys;
191 old_values = hash->values;
192 hash->keys = data->keys;
193 hash->values = data->values;
194
195 for (i = 0; i < current_size; i++) {
196 if (old_keys [i]) {
197 int slot = mono_g_hash_table_find_slot (hash, old_keys [i]);
198 mono_g_hash_table_key_store (hash, slot, old_keys [i]);
199 mono_g_hash_table_value_store (hash, slot, old_values [i]);
200 }
201 }
202 return NULL;
203 }
204
205 static void
rehash(MonoGHashTable * hash)206 rehash (MonoGHashTable *hash)
207 {
208 MONO_REQ_GC_UNSAFE_MODE; //we must run in unsafe mode to make rehash safe
209
210 RehashData data;
211 void *old_keys = hash->keys;
212 void *old_values = hash->values;
213
214 data.hash = hash;
215 /*
216 * Rehash to a size that can fit the current elements. Rehash relative to in_use
217 * to allow also for compaction.
218 */
219 data.new_size = g_spaced_primes_closest (hash->in_use / HASH_TABLE_MAX_LOAD_FACTOR * HASH_TABLE_RESIZE_RATIO);
220 data.keys = g_new0 (MonoObject*, data.new_size);
221 data.values = g_new0 (MonoObject*, data.new_size);
222
223 if (hash->gc_type & MONO_HASH_KEY_GC)
224 mono_gc_register_root_wbarrier ((char*)data.keys, sizeof (MonoObject*) * data.new_size, mono_gc_make_vector_descr (), hash->source, hash->msg);
225 if (hash->gc_type & MONO_HASH_VALUE_GC)
226 mono_gc_register_root_wbarrier ((char*)data.values, sizeof (MonoObject*) * data.new_size, mono_gc_make_vector_descr (), hash->source, hash->msg);
227
228 if (!mono_threads_is_coop_enabled ()) {
229 mono_gc_invoke_with_gc_lock (do_rehash, &data);
230 } else {
231 /* We cannot be preempted */
232 do_rehash (&data);
233 }
234
235 if (hash->gc_type & MONO_HASH_KEY_GC)
236 mono_gc_deregister_root ((char*)old_keys);
237 if (hash->gc_type & MONO_HASH_VALUE_GC)
238 mono_gc_deregister_root ((char*)old_values);
239
240 g_free (old_keys);
241 g_free (old_values);
242 }
243
244 /**
245 * mono_g_hash_table_size:
246 */
247 guint
mono_g_hash_table_size(MonoGHashTable * hash)248 mono_g_hash_table_size (MonoGHashTable *hash)
249 {
250 g_return_val_if_fail (hash != NULL, 0);
251
252 return hash->in_use;
253 }
254
255 /**
256 * mono_g_hash_table_lookup:
257 */
258 gpointer
mono_g_hash_table_lookup(MonoGHashTable * hash,gconstpointer key)259 mono_g_hash_table_lookup (MonoGHashTable *hash, gconstpointer key)
260 {
261 gpointer orig_key, value;
262
263 if (mono_g_hash_table_lookup_extended (hash, key, &orig_key, &value))
264 return value;
265 else
266 return NULL;
267 }
268
269 /**
270 * mono_g_hash_table_lookup_extended:
271 */
272 gboolean
mono_g_hash_table_lookup_extended(MonoGHashTable * hash,gconstpointer key,gpointer * orig_key,gpointer * value)273 mono_g_hash_table_lookup_extended (MonoGHashTable *hash, gconstpointer key, gpointer *orig_key, gpointer *value)
274 {
275 int slot;
276
277 g_return_val_if_fail (hash != NULL, FALSE);
278
279 slot = mono_g_hash_table_find_slot (hash, key);
280
281 if (hash->keys [slot]) {
282 *orig_key = hash->keys [slot];
283 *value = hash->values [slot];
284 return TRUE;
285 }
286
287 return FALSE;
288 }
289
290 /**
291 * mono_g_hash_table_foreach:
292 */
293 void
mono_g_hash_table_foreach(MonoGHashTable * hash,GHFunc func,gpointer user_data)294 mono_g_hash_table_foreach (MonoGHashTable *hash, GHFunc func, gpointer user_data)
295 {
296 int i;
297
298 g_return_if_fail (hash != NULL);
299 g_return_if_fail (func != NULL);
300
301 for (i = 0; i < hash->table_size; i++) {
302 if (hash->keys [i])
303 (*func)(hash->keys [i], hash->values [i], user_data);
304 }
305 }
306
307 gpointer
mono_g_hash_table_find(MonoGHashTable * hash,GHRFunc predicate,gpointer user_data)308 mono_g_hash_table_find (MonoGHashTable *hash, GHRFunc predicate, gpointer user_data)
309 {
310 int i;
311
312 g_return_val_if_fail (hash != NULL, NULL);
313 g_return_val_if_fail (predicate != NULL, NULL);
314
315 for (i = 0; i < hash->table_size; i++) {
316 if (hash->keys [i] && (*predicate)(hash->keys [i], hash->values [i], user_data))
317 return hash->values [i];
318 }
319 return NULL;
320 }
321
322 /**
323 * mono_g_hash_table_remove:
324 */
325 gboolean
mono_g_hash_table_remove(MonoGHashTable * hash,gconstpointer key)326 mono_g_hash_table_remove (MonoGHashTable *hash, gconstpointer key)
327 {
328 int slot, last_clear_slot;
329
330 g_return_val_if_fail (hash != NULL, FALSE);
331 slot = mono_g_hash_table_find_slot (hash, key);
332
333 if (!hash->keys [slot])
334 return FALSE;
335
336 if (hash->key_destroy_func)
337 (*hash->key_destroy_func)(hash->keys [slot]);
338 hash->keys [slot] = NULL;
339 if (hash->value_destroy_func)
340 (*hash->value_destroy_func)(hash->values [slot]);
341 hash->values [slot] = NULL;
342 hash->in_use--;
343
344 /*
345 * When we insert in the hashtable, if the required position is occupied we
346 * consecutively try out following positions. In order to be able to find
347 * if a key exists or not in the array (without traversing the entire hash)
348 * we maintain the constraint that there can be no free slots between two
349 * entries that are hashed to the same position. This means that, at search
350 * time, when we encounter a free slot we can stop looking for collissions.
351 * Similarly, at remove time, we need to shift all following slots to their
352 * normal slot, until we reach an empty slot.
353 */
354 last_clear_slot = slot;
355 slot = (slot + 1) % hash->table_size;
356 while (hash->keys [slot]) {
357 guint hashcode = ((*hash->hash_func)(hash->keys [slot])) % hash->table_size;
358 /*
359 * We try to move the current element to last_clear_slot, but only if
360 * it brings it closer to its normal position (hashcode)
361 */
362 if ((last_clear_slot < slot && (hashcode > slot || hashcode <= last_clear_slot)) ||
363 (last_clear_slot > slot && (hashcode > slot && hashcode <= last_clear_slot))) {
364 mono_g_hash_table_key_store (hash, last_clear_slot, hash->keys [slot]);
365 mono_g_hash_table_value_store (hash, last_clear_slot, hash->values [slot]);
366 hash->keys [slot] = NULL;
367 hash->values [slot] = NULL;
368 last_clear_slot = slot;
369 }
370 slot++;
371 if (slot == hash->table_size)
372 slot = 0;
373 }
374 return TRUE;
375 }
376
377 /**
378 * mono_g_hash_table_foreach_remove:
379 */
380 guint
mono_g_hash_table_foreach_remove(MonoGHashTable * hash,GHRFunc func,gpointer user_data)381 mono_g_hash_table_foreach_remove (MonoGHashTable *hash, GHRFunc func, gpointer user_data)
382 {
383 int i;
384 int count = 0;
385
386 g_return_val_if_fail (hash != NULL, 0);
387 g_return_val_if_fail (func != NULL, 0);
388
389 for (i = 0; i < hash->table_size; i++) {
390 if (hash->keys [i] && (*func)(hash->keys [i], hash->values [i], user_data)) {
391 mono_g_hash_table_remove (hash, hash->keys [i]);
392 count++;
393 /* Retry current slot in case the removal shifted elements */
394 i--;
395 }
396 }
397 if (hash->in_use < hash->table_size * HASH_TABLE_MIN_LOAD_FACTOR)
398 rehash (hash);
399 return count;
400 }
401
402 /**
403 * mono_g_hash_table_destroy:
404 */
405 void
mono_g_hash_table_destroy(MonoGHashTable * hash)406 mono_g_hash_table_destroy (MonoGHashTable *hash)
407 {
408 int i;
409
410 g_return_if_fail (hash != NULL);
411
412 if (hash->gc_type & MONO_HASH_KEY_GC)
413 mono_gc_deregister_root ((char*)hash->keys);
414 if (hash->gc_type & MONO_HASH_VALUE_GC)
415 mono_gc_deregister_root ((char*)hash->values);
416
417 for (i = 0; i < hash->table_size; i++) {
418 if (hash->keys [i]) {
419 if (hash->key_destroy_func)
420 (*hash->key_destroy_func)(hash->keys [i]);
421 if (hash->value_destroy_func)
422 (*hash->value_destroy_func)(hash->values [i]);
423 }
424 }
425 g_free (hash->keys);
426 g_free (hash->values);
427 g_free (hash);
428 }
429
430 static void
mono_g_hash_table_insert_replace(MonoGHashTable * hash,gpointer key,gpointer value,gboolean replace)431 mono_g_hash_table_insert_replace (MonoGHashTable *hash, gpointer key, gpointer value, gboolean replace)
432 {
433 int slot;
434 g_return_if_fail (hash != NULL);
435
436 if (hash->in_use > (hash->table_size * HASH_TABLE_MAX_LOAD_FACTOR))
437 rehash (hash);
438
439 slot = mono_g_hash_table_find_slot (hash, key);
440
441 if (hash->keys [slot]) {
442 if (replace) {
443 if (hash->key_destroy_func)
444 (*hash->key_destroy_func)(hash->keys [slot]);
445 mono_g_hash_table_key_store (hash, slot, (MonoObject*)key);
446 }
447 if (hash->value_destroy_func)
448 (*hash->value_destroy_func) (hash->values [slot]);
449 mono_g_hash_table_value_store (hash, slot, (MonoObject*)value);
450 } else {
451 mono_g_hash_table_key_store (hash, slot, (MonoObject*)key);
452 mono_g_hash_table_value_store (hash, slot, (MonoObject*)value);
453 hash->in_use++;
454 }
455 }
456
457 /**
458 * mono_g_hash_table_insert:
459 */
460 void
mono_g_hash_table_insert(MonoGHashTable * h,gpointer k,gpointer v)461 mono_g_hash_table_insert (MonoGHashTable *h, gpointer k, gpointer v)
462 {
463 mono_g_hash_table_insert_replace (h, k, v, FALSE);
464 }
465
466 /**
467 * mono_g_hash_table_replace:
468 */
469 void
mono_g_hash_table_replace(MonoGHashTable * h,gpointer k,gpointer v)470 mono_g_hash_table_replace(MonoGHashTable *h, gpointer k, gpointer v)
471 {
472 mono_g_hash_table_insert_replace (h, k, v, TRUE);
473 }
474
475 void
mono_g_hash_table_print_stats(MonoGHashTable * hash)476 mono_g_hash_table_print_stats (MonoGHashTable *hash)
477 {
478 int i = 0, chain_size = 0, max_chain_size = 0;
479 gboolean wrapped_around = FALSE;
480
481 while (TRUE) {
482 if (hash->keys [i]) {
483 chain_size++;
484 } else {
485 max_chain_size = MAX(max_chain_size, chain_size);
486 chain_size = 0;
487 if (wrapped_around)
488 break;
489 }
490
491 if (i == (hash->table_size - 1)) {
492 wrapped_around = TRUE;
493 i = 0;
494 } else {
495 i++;
496 }
497 }
498 /* Rehash to a size that can fit the current elements */
499 printf ("Size: %d Table Size: %d Max Chain Length: %d\n", hash->in_use, hash->table_size, max_chain_size);
500 }
501