1 /*
2  * Copyright 2010 Andrea Mazzoleni. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY ANDREA MAZZOLENI AND CONTRIBUTORS ``AS IS''
16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL ANDREA MAZZOLENI OR CONTRIBUTORS BE
19  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25  * POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /** \file
29  * Fixed size chained hashtable.
30  *
31  * This hashtable is a standard implementation of a chained hashtable with a fixed size.
32  *
33  * Note that performances starts to degenerate after reaching a load factor greater than 0.75.
34  * The ::tommy_hashdyn and ::tommy_hashlin hashtables fix this problem growing dynamically.
35  *
36  * To initialize the hashtable you have to call tommy_hashtable_init() specifing
37  * the fixed bucket size.
38  *
39  * \code
40  * tommy_hashslin hashtable;
41  *
42  * tommy_hashtable_init(&hashtable, 1024);
43  * \endcode
44  *
45  * To insert elements in the hashtable you have to call tommy_hashtable_insert() for
46  * each element.
47  * In the insertion call you have to specify the address of the node, the
48  * address of the object, and the hash value of the key to use.
49  * The address of the object is used to initialize the tommy_node::data field
50  * of the node, and the hash to initialize the tommy_node::key field.
51  *
52  * \code
53  * struct object {
54  *     tommy_node node;
55  *     // other fields
56  *     int value;
57  * };
58  *
59  * struct object* obj = malloc(sizeof(struct object)); // creates the object
60  *
61  * obj->value = ...; // initializes the object
62  *
63  * tommy_hashtable_insert(&hashtable, &obj->node, obj, tommy_inthash_u32(obj->value)); // inserts the object
64  * \endcode
65  *
66  * To find and element in the hashtable you have to call tommy_hashtable_search()
67  * providing a comparison function, its argument, and the hash of the key to search.
68  *
69  * \code
70  * int compare(const void* arg, const void* obj)
71  * {
72  *     return *(const int*)arg != ((const struct object*)obj)->value;
73  * }
74  *
75  * int value_to_find = 1;
76  * struct object* obj = tommy_hashtable_search(&hashtable, compare, &value_to_find, tommy_inthash_u32(value_to_find));
77  * if (!obj) {
78  *     // not found
79  * } else {
80  *     // found
81  * }
82  * \endcode
83  *
84  * To iterate over all the elements in the hashtable with the same key, you have to
85  * use tommy_hashtable_bucket() and follow the tommy_node::next pointer until NULL.
86  * You have also to check explicitely for the key, as the bucket may contains
87  * different keys.
88  *
89  * \code
90  * tommy_node* i = tommy_hashtable_bucket(&hashtable, tommy_inthash_u32(value_to_find));
91  * while (i) {
92  *     struct object* obj = i->data; // gets the object pointer
93  *
94  *     if (obj->value == value_to_find) {
95  *         printf("%d\n", obj->value); // process the object
96  *     }
97  *
98  *     i = i->next; // goes to the next element
99  * }
100  * \endcode
101  *
102  * To remove an element from the hashtable you have to call tommy_hashtable_remove()
103  * providing a comparison function, its argument, and the hash of the key to search
104  * and remove.
105  *
106  * \code
107  * struct object* obj = tommy_trie_remove(&hashtable, compare, &value_to_remove, tommy_inthash_u32(value_to_remove));
108  * if (obj) {
109  *     free(obj); // frees the object allocated memory
110  * }
111  * \endcode
112  *
113  * To destroy the hashtable you have to remove all the elements, and deinitialize
114  * the hashtable calling tommy_hashtable_done().
115  *
116  * \code
117  * tommy_hashtable_done(&hashtable);
118  * \endcode
119  *
120  * Note that you cannot iterates over all the elements in the hashtable using the
121  * hashtable itself. You have to insert all the elements also in a ::tommy_list,
122  * and use the list to iterate. See the \ref multiindex example for more detail.
123  */
124 
125 #ifndef __TOMMYHASHTBL_H
126 #define __TOMMYHASHTBL_H
127 
128 #include "tommyhash.h"
129 
130 /******************************************************************************/
131 /* hashtable */
132 
133 /**
134  * Hashtable node.
135  * This is the node that you have to include inside your objects.
136  */
137 typedef tommy_node tommy_hashtable_node;
138 
139 /**
140  * Fixed size chained hashtable.
141  */
142 typedef struct tommy_hashtable_struct {
143 	tommy_hashtable_node** bucket; /**< Hash buckets. One list for each hash modulus. */
144 	unsigned bucket_max; /**< Number of buckets. */
145 	unsigned bucket_mask; /**< Bit mask to access the buckets. */
146 	unsigned count; /**< Number of elements. */
147 } tommy_hashtable;
148 
149 /**
150  * Initializes the hashtable.
151  * \param buckets Minimum number of buckets to allocate. The effective number used is the next power of 2.
152  */
153 void tommy_hashtable_init(tommy_hashtable* hashtable, unsigned bucket_max);
154 
155 /**
156  * Deinitializes the hashtable.
157  *
158  * You can call this function with elements still contained,
159  * but such elements are not going to be freed by this call.
160  */
161 void tommy_hashtable_done(tommy_hashtable* hashtable);
162 
163 /**
164  * Inserts an element in the hashtable.
165  */
166 void tommy_hashtable_insert(tommy_hashtable* hashtable, tommy_hashtable_node* node, void* data, tommy_hash_t hash);
167 
168 /**
169  * Searches and removes an element from the hashtable.
170  * You have to provide a compare function and the hash of the element you want to remove.
171  * If the element is not found, 0 is returned.
172  * If more equal elements are present, the first one is removed.
173  * This operation is faster than calling tommy_hashtable_bucket() and tommy_hashtable_remove_existing() separately.
174  * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one.
175  * The function should return 0 for equal elements, anything other for different elements.
176  * \param cmp_arg Compare argument passed as first argument of the compare function.
177  * \param hash Hash of the element to find and remove.
178  * \return The removed element, or 0 if not found.
179  */
180 void* tommy_hashtable_remove(tommy_hashtable* hashtable, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash);
181 
182 /**
183  * Gets the bucket of the specified hash.
184  * The bucket is guaranteed to contain ALL the elements with the specified hash,
185  * but it can contain also others.
186  * You can access elements in the bucket following the ::next pointer until 0.
187  * \param hash Hash of the element to find.
188  * \return The head of the bucket, or 0 if empty.
189  */
tommy_hashtable_bucket(tommy_hashtable * hashtable,tommy_hash_t hash)190 tommy_inline tommy_hashtable_node* tommy_hashtable_bucket(tommy_hashtable* hashtable, tommy_hash_t hash)
191 {
192 	return hashtable->bucket[hash & hashtable->bucket_mask];
193 }
194 
195 /**
196  * Searches an element in the hashtable.
197  * You have to provide a compare function and the hash of the element you want to find.
198  * If more equal elements are present, the first one is returned.
199  * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one.
200  * The function should return 0 for equal elements, anything other for different elements.
201  * \param cmp_arg Compare argument passed as first argument of the compare function.
202  * \param hash Hash of the element to find.
203  * \return The first element found, or 0 if none.
204  */
tommy_hashtable_search(tommy_hashtable * hashtable,tommy_search_func * cmp,const void * cmp_arg,tommy_hash_t hash)205 tommy_inline void* tommy_hashtable_search(tommy_hashtable* hashtable, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash)
206 {
207 	tommy_hashtable_node* i = tommy_hashtable_bucket(hashtable, hash);
208 	while (i) {
209 		/* we first check if the hash matches, as in the same bucket we may have multiples hash values */
210 		if (i->key == hash && cmp(cmp_arg, i->data) == 0)
211 			return i->data;
212 		i = i->next;
213 	}
214 	return 0;
215 }
216 
217 /**
218  * Removes an element from the hashtable.
219  * You must already have the address of the element to remove.
220  * \return The tommy_node::data field of the node removed.
221  */
222 void* tommy_hashtable_remove_existing(tommy_hashtable* hashtable, tommy_hashtable_node* node);
223 
224 /**
225  * Gets the number of elements.
226  */
tommy_hashtable_count(tommy_hashtable * hashtable)227 tommy_inline unsigned tommy_hashtable_count(tommy_hashtable* hashtable)
228 {
229 	return hashtable->count;
230 }
231 
232 /**
233  * Gets the size of allocated memory.
234  * It includes the size of the ::tommy_hashtable_node of the stored elements.
235  */
236 tommy_size_t tommy_hashtable_memory_usage(tommy_hashtable* hashtable);
237 
238 #endif
239 
240