1 /*   (C) Copyright 2001, 2002, 2003, 2004, 2005 Stijn van Dongen
2  *   (C) Copyright 2006, 2007, 2008, 2009  Stijn van Dongen
3  *
4  * This file is part of tingea.  You can redistribute and/or modify tingea
5  * under the terms of the GNU General Public License; either version 3 of the
6  * License or (at your option) any later version.  You should have received a
7  * copy of the GPL along with tingea, in the file COPYING.
8 */
9 
10 #ifndef tingea_hash_h
11 #define tingea_hash_h
12 
13 #include <stdio.h>
14 
15 /* TODO:
16  * -  make sort routines for keys and values by key or value criteria.
17  * -  make interface for storing integers, preferably without objectifying them.
18  * -  shrink hashes dynamically.
19  *
20 */
21 
22 
23 /*  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
24  * *
25  **            Implementation notes (a few).
26  *
27  *
28  *    This hash interface is very powerful. It gives you more than enough
29  *    rope to hang yourself and then some. It can be used as is, or wrappers
30  *    can be made around it that restrict a caller's ability to err.
31  *
32  *    The danger lies in the fact that this interface only does retrieval
33  *    and storage of pointers (both for keys and values), and does not clone
34  *    anything. Anything happening with the objects pointed to during the
35  *    lifetime of the hash is the responsibility of the caller.
36  *
37  *    What the interface cannot do currently is hash integers by value (rather
38  *    than by reference). This functionality will probably be added someday.
39 
40  * Features:
41  *    o  Searching, inserting, and deletion are all done by
42  *       mcxHashSearch. It returns a pointer to mcxKV. In all modes, the
43  *       caller can use the returned mcxKV* structure to obtain
44  *       the 'val' and 'key' members.
45  *
46  *    o  Hashes grow automatically once the average load per bucket
47  *       exceeds a settable threshold and if the hash was not declared
48  *       constant.
49  *
50  *    o  Caller supplies both the hash function and the compare function.
51  *       This interface provides several hash functions operating on a
52  *       (void* base, int len) combo, where base is cast to char by the
53  *       hash function. These functions can be used in creating custom hash
54  *       functions for your custom objects.
55  *
56  *    o  You can (of course) have multiple hashes. This is not really
57  *       a feature - however, since the idiotic <hsearch.h> does not offer
58  *       this I thought I'd mention it.
59  *
60  *    o  Witness mcxHashWalkInit, mcxHashWalkStep.
61  *
62  *    o  There is mcxHashMerge.
63  *
64  *    o  mcxHashKeys, mcxHashKVs.
65  *
66  *    Enjoy.
67 
68  * Notes
69  *    There is a utility hashfile.c (distributed in a separate package)
70  *    that can be used to stress-test this module. It allows customization
71  *    of several aspects, including the hash function that should be used.
72 */
73 
74 #include "types.h"
75 #include "list.h"
76 
77 
78 /* The hash struct is hidden. Use mcxHashGetSettings if you need
79  * to peek into the interior. Or read hash.c
80 */
81 
82 typedef struct mcxHash mcxHash;
83 
84 
85 typedef struct
86 {  void*       key
87 ;  void*       val
88 ;
89 }  mcxKV       ;
90 
91 
92 mcxHash* mcxHashNew
93 (  dim         n_buckets
94 ,  u32         (*hash)  (const void *a)
95 ,  int         (*cmp)   (const void *a, const void *b)
96 )  ;
97 
98 
99 #define MCX_HASH_OPT_DEFAULTS    0
100 #define MCX_HASH_OPT_CONSTANT    1
101 #define MCX_HASH_OPT_UNUSED      2
102 
103 
104 void mcxHashSetOpts
105 (  mcxHash*    hash
106 ,  double      load
107 ,  int         option      /* negative values will be ignored (feature) */
108 )  ;
109 
110 dim mcxHashMemSize
111 (  mcxHash*    hash
112 )  ;
113 
114 
115 typedef struct mcxHashSettings
116 {  dim         n_buckets
117 ;  dim         n_entries
118 ;  float       load
119 ;  mcxbits     options
120 ;
121 }  mcxHashSettings   ;
122 
123 
124 void mcxHashGetSettings
125 (  mcxHash*          hash
126 ,  mcxHashSettings*  settings
127 )  ;
128 
129 
130 /*  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
131  * *
132  **            mcxHashSearch
133  *
134  *    action               returns
135  *
136  *    MCX_DATUM_DELETE   ->    deleted mcxKV* or NULL if not present
137  *    MCX_DATUM_INSERT   ->    new or present mcxKV*
138  *    MCX_DATUM_FIND     ->    mcxKV* if present NULL otherwise.
139  *
140  * usage:
141  *
142  *    Values have to be inserted by the caller into the returned KV struct.
143  *    Make sure that keys point to objects that are constant
144  *    (with respect to the cmp function) during the lifetime of the hash.
145  *    YOU have to ensure the integrity of both keys and values.
146  *    This enables you to do whatever suits you, such as appending to
147  *    values.
148  *
149  *    When inserting, check whether kv->key != key (where kv is returned value)
150  *    if this is the case, an identically comparing key is already present.
151  *    You may want to destroy one of the two keys and decide what to do
152  *    with the value.
153  *
154  *    When deleting, the key-value pair is removed from the hash *AND RETURNED
155  *    TO CALLER* - you have to decide yourself what to do with it.  You have to
156  *    fetch the val and key members of the returned mcxKV object immediately:
157  *    Subsequent inserts in the hash may reuse it.  If the key was not present,
158  *    a value of NULL is returned.
159  *
160  *    When finding, life is simple. NULL if absent, matching kv otherwise.
161  *
162  * note:
163  *
164  *    memory management of keys and values is totally up to caller.
165  *    If usage is clean, you can use mcxHashFree for disposal of hash.
166 */
167 
168 #define mcxHashSearch(key, hash, ACTION)  mcxHashSearchx(key, hash, ACTION, NULL)
169 
170 mcxKV*   mcxHashSearchx
171 (  void*       key
172 ,  mcxHash*    hash
173 ,  mcxmode     ACTION
174 ,  int*        delta
175 )  ;
176 
177 
178 
179 /*  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
180  * *
181  **            mcxHashMerge
182  *
183  *    this one COPIES OBJECT POINTERS and DOES NOT CLONE.
184  *    so after the merge, hash1 and hash2 keys and values should not be freed.
185  *    In case there are equivalent keys in hash1 and hash2, this may
186  *    cause trouble when the caller wants to do cleaning afterwards.
187  *    This interface is still under development.
188  *
189  *    hashd may be equal to hash1 or hash2, and it may also be NULL.
190 */
191 
192 mcxHash* mcxHashMerge
193 (  mcxHash*    hash1
194 ,  mcxHash*    hash2
195 ,  mcxHash*    hashd
196 ,  void*       merge(void* val1, void* val2)
197 )  ;
198 
199 
200 /*  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
201  * *
202  **            mcxHashFree
203  *
204  *    This only works if all keys are of the same type and/or all values
205  *    are of the same type, and if your objects were created as expected by
206  *    the free routines (presumably malloced heap memory) - be careful with
207  *    constant objects like constant strings.
208  *
209  *    freekey and freeval may not free their argument. This is because
210  *    tingea does not allow routines that leave arguments in an
211  *    inconsistent state, and free routines in tingea generally accept
212  *    an argument of the form <type>** pptr.
213  *    In the case of mcxHashFree this means that the interface may
214  *    feel slighly more cumbersome.
215  *    A way out would have been to make the callbacks of signature
216  *
217  *          void freemem(void** mempp)
218  *
219  *    The caller could access *mempp, cast it to the expected type,
220  *    and later set *mempp to NULL. However, this would require
221  *    new free routines for lots of types. With the current interface
222  *    existing <type>Release routines can be used:
223  *
224  *    The type of free routine expected by mcxHashFree is generally
225  *    called <type>Release or <type>Release_v, e.g. mcxTingRelease.
226  *    Release routines release all memory of a composite object except the
227  *    memory which holds the outer struct.
228  *
229  *    If one of key or val is *not* a composite type or is a composite type
230  *    that does not contain malloced memory, use mcxHashFreeScalar.
231  *
232  *    Both freekey and freeval may be NULL. When NULL, the corresponding
233  *    KV member is not loooked at. This is useful e.g. when hashing objects
234  *    owned by someone else.
235 */
236 
237 
238 void mcxHashFree
239 (  mcxHash**   hashpp
240 ,  void        freekey(void* keypp)    /* (yourtype1** keypp)     */
241 ,  void        freeval(void* valpp)    /* (yourtype2** valpp)     */
242 )  ;
243 
244 void mcxHashFreeScalar
245 (  void* scalar
246 )  ;
247 
248 
249 
250 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
251  *    It copies the pointers stored in the hash
252 */
253 
254 void** mcxHashKeys
255 (  mcxHash*    hash
256 ,  dim*        n_entries
257 ,  int       (*cmp)(const void*, const void*)         /* works on keys */
258 ,  mcxbits     opts        /* unused yet */
259 )  ;
260                            /* Future options: SORT, SORT_DESC */
261 
262 
263 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
264  *    It copies the pointers stored in the hash
265 */
266 
267 void** mcxHashKVs
268 (  mcxHash*    hash
269 ,  dim*        n_entries
270 ,  int       (*cmp)(const void*, const void*)
271 ,  mcxbits     opts        /* unused yet */
272 )  ;
273 
274 
275 
276 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
277  *    Prints some information to stdout.
278 */
279 
280 void mcxHashStats
281 (  FILE*       fp
282 ,  mcxHash*    hash
283 )  ;
284 
285 
286 typedef struct mcxHashWalk mcxHashWalk;
287 
288 
289 mcxHashWalk* mcxHashWalkInit
290 (  mcxHash  *hash
291 )  ;
292 
293 
294 mcxKV* mcxHashWalkStep
295 (  mcxHashWalk* walk
296 ,  dim          *i_bucket
297 )  ;
298 
299 
300 void mcxHashWalkFree
301 (  mcxHashWalk  **walkpp
302 )  ;
303 
304 
305 void mcxHashApply
306 (  mcxHash* hash
307 ,  void    (*cb)(const void* key, void* val, void* data)
308 ,  void*    data
309 )  ;
310 
311                         /* UNIX ELF hash */
312                         /* POOR! */
313 u32 mcxELFhash
314 (  const void *key
315 ,  u32 len
316 )  ;
317 
318                         /* created by Bob Jenkins     */
319 u32 mcxBJhash
320 (  const void* key
321 ,  u32         len
322 )  ;
323 
324                         /* One at a time hash, Bob Jenkins/Colin Plumb */
325 u32 mcxOAThash
326 (  const void *key
327 ,  u32        len
328 )  ;
329 
330                         /* created by Daniel Phillips */
331 u32 mcxDPhash
332 (  const void* key
333 ,  u32         len
334 )  ;
335 
336                         /* "Berkely Database" hash (from Ozan Yigit's page) */
337                         /* POOR! */
338 u32 mcxBDBhash
339 (  const void *key
340 ,  u32        len
341 )  ;
342 
343                         /* Dan Bernstein hash (from Ozan Yigit's page) */
344 u32 mcxDJBhash
345 (  const void *key
346 ,  u32        len
347 )  ;
348 
349                         /* created by Chris Torek */
350 u32 mcxCThash
351 (  const void* key
352 ,  u32         len
353 )  ;
354 
355                         /* "GNU Emacs" hash (from m4) */
356                         /* not among the best */
357 u32 mcxGEhash
358 (  const void* key
359 ,  u32         len
360 )  ;
361 
362 
363                         /* Fowler Noll Vo hash */
364 u32   mcxFNVhash
365 (  const void *buf
366 ,  u32 len
367 )  ;
368 
369                         /* All experimental with weak points. */
370 u32   mcxSvDhash
371 (  const void        *key
372 ,  u32               len
373 )  ;
374 u32   mcxSvD2hash
375 (  const void        *key
376 ,  u32               len
377 )  ;
378 u32   mcxSvD1hash
379 (  const void        *key
380 ,  u32               len
381 )  ;
382 
383                         /* uses mcxDPhash             */
384 u32 mcxStrHash
385 (  const void* s
386 )  ;
387 
388 int mcxStrCmp
389 (  const void* a
390 ,  const void* b
391 )  ;
392 
393 
394 #endif
395 
396