1 /**
2  * \file
3  * Hashtable implementation
4  *
5  * Author:
6  *   Miguel de Icaza (miguel@novell.com)
7  *
8  * (C) 2006 Novell, Inc.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining
11  * a copy of this software and associated documentation files (the
12  * "Software"), to deal in the Software without restriction, including
13  * without limitation the rights to use, copy, modify, merge, publish,
14  * distribute, sublicense, and/or sell copies of the Software, and to
15  * permit persons to whom the Software is furnished to do so, subject to
16  * the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be
19  * included in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28  */
29 #include <config.h>
30 #include <stdio.h>
31 #include <math.h>
32 #include <glib.h>
33 #include "mono-hash.h"
34 #include "metadata/gc-internals.h"
35 #include <mono/utils/checked-build.h>
36 #include <mono/utils/mono-threads-coop.h>
37 #include <mono/utils/unlocked.h>
38 
39 gint32 mono_g_hash_table_max_chain_length;
40 
41 struct _MonoGHashTable {
42 	GHashFunc      hash_func;
43 	GEqualFunc     key_equal_func;
44 
45 	MonoObject **keys;
46 	MonoObject **values;
47 	int   table_size;
48 	int   in_use;
49 	GDestroyNotify value_destroy_func, key_destroy_func;
50 	MonoGHashGCType gc_type;
51 	MonoGCRootSource source;
52 	const char *msg;
53 };
54 
55 #if UNUSED
56 static gboolean
test_prime(int x)57 test_prime (int x)
58 {
59 	if ((x & 1) != 0) {
60 		int n;
61 		for (n = 3; n< (int)sqrt (x); n += 2) {
62 			if ((x % n) == 0)
63 				return FALSE;
64 		}
65 		return TRUE;
66 	}
67 	// There is only one even prime - 2.
68 	return (x == 2);
69 }
70 
71 static int
calc_prime(int x)72 calc_prime (int x)
73 {
74 	int i;
75 
76 	for (i = (x & (~1))-1; i< G_MAXINT32; i += 2) {
77 		if (test_prime (i))
78 			return i;
79 	}
80 	return x;
81 }
82 #endif
83 
84 #define HASH_TABLE_MAX_LOAD_FACTOR 0.7f
85 /* We didn't really do compaction before, keep it lenient for now */
86 #define HASH_TABLE_MIN_LOAD_FACTOR 0.05f
87 /* We triple the table size at rehash time, similar with previous implementation */
88 #define HASH_TABLE_RESIZE_RATIO 3
89 
mono_g_hash_table_key_store(MonoGHashTable * hash,int slot,MonoObject * key)90 static inline void mono_g_hash_table_key_store (MonoGHashTable *hash, int slot, MonoObject* key)
91 {
92 	MonoObject **key_addr = &hash->keys [slot];
93 	if (hash->gc_type & MONO_HASH_KEY_GC)
94 		mono_gc_wbarrier_generic_store (key_addr, key);
95 	else
96 		*key_addr = key;
97 }
98 
mono_g_hash_table_value_store(MonoGHashTable * hash,int slot,MonoObject * value)99 static inline void mono_g_hash_table_value_store (MonoGHashTable *hash, int slot, MonoObject* value)
100 {
101 	MonoObject **value_addr = &hash->values [slot];
102 	if (hash->gc_type & MONO_HASH_VALUE_GC)
103 		mono_gc_wbarrier_generic_store (value_addr, value);
104 	else
105 		*value_addr = value;
106 }
107 
108 /* Returns position of key or of an empty slot for it */
mono_g_hash_table_find_slot(MonoGHashTable * hash,const MonoObject * key)109 static inline int mono_g_hash_table_find_slot (MonoGHashTable *hash, const MonoObject *key)
110 {
111 	guint start = ((*hash->hash_func) (key)) % hash->table_size;
112 	guint i = start;
113 
114 	if (hash->key_equal_func) {
115 		GEqualFunc equal = hash->key_equal_func;
116 
117 		while (hash->keys [i] && !(*equal) (hash->keys [i], key)) {
118 			i++;
119 			if (i == hash->table_size)
120 				i = 0;
121 		}
122 	} else {
123 		while (hash->keys [i] && hash->keys [i] != key) {
124 			i++;
125 			if (i == hash->table_size)
126 				i = 0;
127 		}
128 	}
129 
130 	gint32 max_length = UnlockedRead (&mono_g_hash_table_max_chain_length);
131 	if (i > start && (i - start) > max_length)
132 		UnlockedWrite (&mono_g_hash_table_max_chain_length, i - start);
133 	else if (i < start && (hash->table_size - (start - i)) > max_length)
134 		UnlockedWrite (&mono_g_hash_table_max_chain_length, hash->table_size - (start - i));
135 
136 	return i;
137 }
138 
139 
140 MonoGHashTable *
mono_g_hash_table_new_type(GHashFunc hash_func,GEqualFunc key_equal_func,MonoGHashGCType type,MonoGCRootSource source,const char * msg)141 mono_g_hash_table_new_type (GHashFunc hash_func, GEqualFunc key_equal_func, MonoGHashGCType type, MonoGCRootSource source, const char *msg)
142 {
143 	MonoGHashTable *hash;
144 
145 	if (!hash_func)
146 		hash_func = g_direct_hash;
147 
148 	hash = g_new0 (MonoGHashTable, 1);
149 
150 	hash->hash_func = hash_func;
151 	hash->key_equal_func = key_equal_func;
152 
153 	hash->table_size = g_spaced_primes_closest (1);
154 	hash->keys = g_new0 (MonoObject*, hash->table_size);
155 	hash->values = g_new0 (MonoObject*, hash->table_size);
156 
157 	hash->gc_type = type;
158 	hash->source = source;
159 	hash->msg = msg;
160 
161 	if (type > MONO_HASH_KEY_VALUE_GC)
162 		g_error ("wrong type for gc hashtable");
163 
164 	if (hash->gc_type & MONO_HASH_KEY_GC)
165 		mono_gc_register_root_wbarrier ((char*)hash->keys, sizeof (MonoObject*) * hash->table_size, mono_gc_make_vector_descr (), hash->source, hash->msg);
166 	if (hash->gc_type & MONO_HASH_VALUE_GC)
167 		mono_gc_register_root_wbarrier ((char*)hash->values, sizeof (MonoObject*) * hash->table_size, mono_gc_make_vector_descr (), hash->source, hash->msg);
168 
169 	return hash;
170 }
171 
172 typedef struct {
173 	MonoGHashTable *hash;
174 	int new_size;
175 	MonoObject **keys;
176 	MonoObject **values;
177 } RehashData;
178 
179 static void*
do_rehash(void * _data)180 do_rehash (void *_data)
181 {
182 	RehashData *data = (RehashData *)_data;
183 	MonoGHashTable *hash = data->hash;
184 	int current_size, i;
185 	MonoObject **old_keys;
186 	MonoObject **old_values;
187 
188 	current_size = hash->table_size;
189 	hash->table_size = data->new_size;
190 	old_keys = hash->keys;
191 	old_values = hash->values;
192 	hash->keys = data->keys;
193 	hash->values = data->values;
194 
195 	for (i = 0; i < current_size; i++) {
196 		if (old_keys [i]) {
197 			int slot = mono_g_hash_table_find_slot (hash, old_keys [i]);
198 			mono_g_hash_table_key_store (hash, slot, old_keys [i]);
199 			mono_g_hash_table_value_store (hash, slot, old_values [i]);
200 		}
201 	}
202 	return NULL;
203 }
204 
205 static void
rehash(MonoGHashTable * hash)206 rehash (MonoGHashTable *hash)
207 {
208 	MONO_REQ_GC_UNSAFE_MODE; //we must run in unsafe mode to make rehash safe
209 
210 	RehashData data;
211 	void *old_keys = hash->keys;
212 	void *old_values = hash->values;
213 
214 	data.hash = hash;
215 	/*
216 	 * Rehash to a size that can fit the current elements. Rehash relative to in_use
217 	 * to allow also for compaction.
218 	 */
219 	data.new_size = g_spaced_primes_closest (hash->in_use / HASH_TABLE_MAX_LOAD_FACTOR * HASH_TABLE_RESIZE_RATIO);
220 	data.keys = g_new0 (MonoObject*, data.new_size);
221 	data.values = g_new0 (MonoObject*, data.new_size);
222 
223 	if (hash->gc_type & MONO_HASH_KEY_GC)
224 		mono_gc_register_root_wbarrier ((char*)data.keys, sizeof (MonoObject*) * data.new_size, mono_gc_make_vector_descr (), hash->source, hash->msg);
225 	if (hash->gc_type & MONO_HASH_VALUE_GC)
226 		mono_gc_register_root_wbarrier ((char*)data.values, sizeof (MonoObject*) * data.new_size, mono_gc_make_vector_descr (), hash->source, hash->msg);
227 
228 	if (!mono_threads_is_coop_enabled ()) {
229 		mono_gc_invoke_with_gc_lock (do_rehash, &data);
230 	} else {
231 		/* We cannot be preempted */
232 		do_rehash (&data);
233 	}
234 
235 	if (hash->gc_type & MONO_HASH_KEY_GC)
236 		mono_gc_deregister_root ((char*)old_keys);
237 	if (hash->gc_type & MONO_HASH_VALUE_GC)
238 		mono_gc_deregister_root ((char*)old_values);
239 
240 	g_free (old_keys);
241 	g_free (old_values);
242 }
243 
244 /**
245  * mono_g_hash_table_size:
246  */
247 guint
mono_g_hash_table_size(MonoGHashTable * hash)248 mono_g_hash_table_size (MonoGHashTable *hash)
249 {
250 	g_return_val_if_fail (hash != NULL, 0);
251 
252 	return hash->in_use;
253 }
254 
255 /**
256  * mono_g_hash_table_lookup:
257  */
258 gpointer
mono_g_hash_table_lookup(MonoGHashTable * hash,gconstpointer key)259 mono_g_hash_table_lookup (MonoGHashTable *hash, gconstpointer key)
260 {
261 	gpointer orig_key, value;
262 
263 	if (mono_g_hash_table_lookup_extended (hash, key, &orig_key, &value))
264 		return value;
265 	else
266 		return NULL;
267 }
268 
269 /**
270  * mono_g_hash_table_lookup_extended:
271  */
272 gboolean
mono_g_hash_table_lookup_extended(MonoGHashTable * hash,gconstpointer key,gpointer * orig_key,gpointer * value)273 mono_g_hash_table_lookup_extended (MonoGHashTable *hash, gconstpointer key, gpointer *orig_key, gpointer *value)
274 {
275 	int slot;
276 
277 	g_return_val_if_fail (hash != NULL, FALSE);
278 
279 	slot = mono_g_hash_table_find_slot (hash, key);
280 
281 	if (hash->keys [slot]) {
282 		*orig_key = hash->keys [slot];
283 		*value = hash->values [slot];
284 		return TRUE;
285 	}
286 
287 	return FALSE;
288 }
289 
290 /**
291  * mono_g_hash_table_foreach:
292  */
293 void
mono_g_hash_table_foreach(MonoGHashTable * hash,GHFunc func,gpointer user_data)294 mono_g_hash_table_foreach (MonoGHashTable *hash, GHFunc func, gpointer user_data)
295 {
296 	int i;
297 
298 	g_return_if_fail (hash != NULL);
299 	g_return_if_fail (func != NULL);
300 
301 	for (i = 0; i < hash->table_size; i++) {
302 		if (hash->keys [i])
303 			(*func)(hash->keys [i], hash->values [i], user_data);
304 	}
305 }
306 
307 gpointer
mono_g_hash_table_find(MonoGHashTable * hash,GHRFunc predicate,gpointer user_data)308 mono_g_hash_table_find (MonoGHashTable *hash, GHRFunc predicate, gpointer user_data)
309 {
310 	int i;
311 
312 	g_return_val_if_fail (hash != NULL, NULL);
313 	g_return_val_if_fail (predicate != NULL, NULL);
314 
315 	for (i = 0; i < hash->table_size; i++) {
316 		if (hash->keys [i] && (*predicate)(hash->keys [i], hash->values [i], user_data))
317 			return hash->values [i];
318 	}
319 	return NULL;
320 }
321 
322 /**
323  * mono_g_hash_table_remove:
324  */
325 gboolean
mono_g_hash_table_remove(MonoGHashTable * hash,gconstpointer key)326 mono_g_hash_table_remove (MonoGHashTable *hash, gconstpointer key)
327 {
328 	int slot, last_clear_slot;
329 
330 	g_return_val_if_fail (hash != NULL, FALSE);
331 	slot = mono_g_hash_table_find_slot (hash, key);
332 
333 	if (!hash->keys [slot])
334 		return FALSE;
335 
336 	if (hash->key_destroy_func)
337 		(*hash->key_destroy_func)(hash->keys [slot]);
338 	hash->keys [slot] = NULL;
339 	if (hash->value_destroy_func)
340 		(*hash->value_destroy_func)(hash->values [slot]);
341 	hash->values [slot] = NULL;
342 	hash->in_use--;
343 
344 	/*
345 	 * When we insert in the hashtable, if the required position is occupied we
346 	 * consecutively try out following positions. In order to be able to find
347 	 * if a key exists or not in the array (without traversing the entire hash)
348 	 * we maintain the constraint that there can be no free slots between two
349 	 * entries that are hashed to the same position. This means that, at search
350 	 * time, when we encounter a free slot we can stop looking for collissions.
351 	 * Similarly, at remove time, we need to shift all following slots to their
352 	 * normal slot, until we reach an empty slot.
353 	 */
354 	last_clear_slot = slot;
355 	slot = (slot + 1) % hash->table_size;
356 	while (hash->keys [slot]) {
357 		guint hashcode = ((*hash->hash_func)(hash->keys [slot])) % hash->table_size;
358 		/*
359 		 * We try to move the current element to last_clear_slot, but only if
360 		 * it brings it closer to its normal position (hashcode)
361 		 */
362 		if ((last_clear_slot < slot && (hashcode > slot || hashcode <= last_clear_slot)) ||
363 				(last_clear_slot > slot && (hashcode > slot && hashcode <= last_clear_slot))) {
364 			mono_g_hash_table_key_store (hash, last_clear_slot, hash->keys [slot]);
365 			mono_g_hash_table_value_store (hash, last_clear_slot, hash->values [slot]);
366 			hash->keys [slot] = NULL;
367 			hash->values [slot] = NULL;
368 			last_clear_slot = slot;
369 		}
370 		slot++;
371 		if (slot == hash->table_size)
372 			slot = 0;
373 	}
374 	return TRUE;
375 }
376 
377 /**
378  * mono_g_hash_table_foreach_remove:
379  */
380 guint
mono_g_hash_table_foreach_remove(MonoGHashTable * hash,GHRFunc func,gpointer user_data)381 mono_g_hash_table_foreach_remove (MonoGHashTable *hash, GHRFunc func, gpointer user_data)
382 {
383 	int i;
384 	int count = 0;
385 
386 	g_return_val_if_fail (hash != NULL, 0);
387 	g_return_val_if_fail (func != NULL, 0);
388 
389 	for (i = 0; i < hash->table_size; i++) {
390 		if (hash->keys [i] && (*func)(hash->keys [i], hash->values [i], user_data)) {
391 			mono_g_hash_table_remove (hash, hash->keys [i]);
392 			count++;
393 			/* Retry current slot in case the removal shifted elements */
394 			i--;
395 		}
396 	}
397 	if (hash->in_use < hash->table_size * HASH_TABLE_MIN_LOAD_FACTOR)
398 		rehash (hash);
399 	return count;
400 }
401 
402 /**
403  * mono_g_hash_table_destroy:
404  */
405 void
mono_g_hash_table_destroy(MonoGHashTable * hash)406 mono_g_hash_table_destroy (MonoGHashTable *hash)
407 {
408 	int i;
409 
410 	g_return_if_fail (hash != NULL);
411 
412 	if (hash->gc_type & MONO_HASH_KEY_GC)
413 		mono_gc_deregister_root ((char*)hash->keys);
414 	if (hash->gc_type & MONO_HASH_VALUE_GC)
415 		mono_gc_deregister_root ((char*)hash->values);
416 
417 	for (i = 0; i < hash->table_size; i++) {
418 		if (hash->keys [i]) {
419 			if (hash->key_destroy_func)
420 				(*hash->key_destroy_func)(hash->keys [i]);
421 			if (hash->value_destroy_func)
422 				(*hash->value_destroy_func)(hash->values [i]);
423 		}
424 	}
425 	g_free (hash->keys);
426 	g_free (hash->values);
427 	g_free (hash);
428 }
429 
430 static void
mono_g_hash_table_insert_replace(MonoGHashTable * hash,gpointer key,gpointer value,gboolean replace)431 mono_g_hash_table_insert_replace (MonoGHashTable *hash, gpointer key, gpointer value, gboolean replace)
432 {
433 	int slot;
434 	g_return_if_fail (hash != NULL);
435 
436 	if (hash->in_use > (hash->table_size * HASH_TABLE_MAX_LOAD_FACTOR))
437 		rehash (hash);
438 
439 	slot = mono_g_hash_table_find_slot (hash, key);
440 
441 	if (hash->keys [slot]) {
442 		if (replace) {
443 			if (hash->key_destroy_func)
444 				(*hash->key_destroy_func)(hash->keys [slot]);
445 			mono_g_hash_table_key_store (hash, slot, (MonoObject*)key);
446 		}
447 		if (hash->value_destroy_func)
448 			(*hash->value_destroy_func) (hash->values [slot]);
449 		mono_g_hash_table_value_store (hash, slot, (MonoObject*)value);
450 	} else {
451 		mono_g_hash_table_key_store (hash, slot, (MonoObject*)key);
452 		mono_g_hash_table_value_store (hash, slot, (MonoObject*)value);
453 		hash->in_use++;
454 	}
455 }
456 
457 /**
458  * mono_g_hash_table_insert:
459  */
460 void
mono_g_hash_table_insert(MonoGHashTable * h,gpointer k,gpointer v)461 mono_g_hash_table_insert (MonoGHashTable *h, gpointer k, gpointer v)
462 {
463 	mono_g_hash_table_insert_replace (h, k, v, FALSE);
464 }
465 
466 /**
467  * mono_g_hash_table_replace:
468  */
469 void
mono_g_hash_table_replace(MonoGHashTable * h,gpointer k,gpointer v)470 mono_g_hash_table_replace(MonoGHashTable *h, gpointer k, gpointer v)
471 {
472 	mono_g_hash_table_insert_replace (h, k, v, TRUE);
473 }
474 
475 void
mono_g_hash_table_print_stats(MonoGHashTable * hash)476 mono_g_hash_table_print_stats (MonoGHashTable *hash)
477 {
478 	int i = 0, chain_size = 0, max_chain_size = 0;
479 	gboolean wrapped_around = FALSE;
480 
481 	while (TRUE) {
482 		if (hash->keys [i]) {
483 			chain_size++;
484 		} else {
485 			max_chain_size = MAX(max_chain_size, chain_size);
486 			chain_size = 0;
487 			if (wrapped_around)
488 				break;
489 		}
490 
491 		if (i == (hash->table_size - 1)) {
492 			wrapped_around = TRUE;
493 			i = 0;
494 		} else {
495 			i++;
496 		}
497 	}
498 	/* Rehash to a size that can fit the current elements */
499 	printf ("Size: %d Table Size: %d Max Chain Length: %d\n", hash->in_use, hash->table_size, max_chain_size);
500 }
501