1 /* (C) Copyright 2001, 2002, 2003, 2004, 2005 Stijn van Dongen 2 * (C) Copyright 2006, 2007, 2008, 2009 Stijn van Dongen 3 * 4 * This file is part of tingea. You can redistribute and/or modify tingea 5 * under the terms of the GNU General Public License; either version 3 of the 6 * License or (at your option) any later version. You should have received a 7 * copy of the GPL along with tingea, in the file COPYING. 8 */ 9 10 #ifndef tingea_hash_h 11 #define tingea_hash_h 12 13 #include <stdio.h> 14 15 /* TODO: 16 * - make sort routines for keys and values by key or value criteria. 17 * - make interface for storing integers, preferably without objectifying them. 18 * - shrink hashes dynamically. 19 * 20 */ 21 22 23 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 24 * * 25 ** Implementation notes (a few). 26 * 27 * 28 * This hash interface is very powerful. It gives you more than enough 29 * rope to hang yourself and then some. It can be used as is, or wrappers 30 * can be made around it that restrict a caller's ability to err. 31 * 32 * The danger lies in the fact that this interface only does retrieval 33 * and storage of pointers (both for keys and values), and does not clone 34 * anything. Anything happening with the objects pointed to during the 35 * lifetime of the hash is the responsibility of the caller. 36 * 37 * What the interface cannot do currently is hash integers by value (rather 38 * than by reference). This functionality will probably be added someday. 39 40 * Features: 41 * o Searching, inserting, and deletion are all done by 42 * mcxHashSearch. It returns a pointer to mcxKV. In all modes, the 43 * caller can use the returned mcxKV* structure to obtain 44 * the 'val' and 'key' members. 45 * 46 * o Hashes grow automatically once the average load per bucket 47 * exceeds a settable threshold and if the hash was not declared 48 * constant. 49 * 50 * o Caller supplies both the hash function and the compare function. 51 * This interface provides several hash functions operating on a 52 * (void* base, int len) combo, where base is cast to char by the 53 * hash function. These functions can be used in creating custom hash 54 * functions for your custom objects. 55 * 56 * o You can (of course) have multiple hashes. This is not really 57 * a feature - however, since the idiotic <hsearch.h> does not offer 58 * this I thought I'd mention it. 59 * 60 * o Witness mcxHashWalkInit, mcxHashWalkStep. 61 * 62 * o There is mcxHashMerge. 63 * 64 * o mcxHashKeys, mcxHashKVs. 65 * 66 * Enjoy. 67 68 * Notes 69 * There is a utility hashfile.c (distributed in a separate package) 70 * that can be used to stress-test this module. It allows customization 71 * of several aspects, including the hash function that should be used. 72 */ 73 74 #include "types.h" 75 #include "list.h" 76 77 78 /* The hash struct is hidden. Use mcxHashGetSettings if you need 79 * to peek into the interior. Or read hash.c 80 */ 81 82 typedef struct mcxHash mcxHash; 83 84 85 typedef struct 86 { void* key 87 ; void* val 88 ; 89 } mcxKV ; 90 91 92 mcxHash* mcxHashNew 93 ( dim n_buckets 94 , u32 (*hash) (const void *a) 95 , int (*cmp) (const void *a, const void *b) 96 ) ; 97 98 99 #define MCX_HASH_OPT_DEFAULTS 0 100 #define MCX_HASH_OPT_CONSTANT 1 101 #define MCX_HASH_OPT_UNUSED 2 102 103 104 void mcxHashSetOpts 105 ( mcxHash* hash 106 , double load 107 , int option /* negative values will be ignored (feature) */ 108 ) ; 109 110 dim mcxHashMemSize 111 ( mcxHash* hash 112 ) ; 113 114 115 typedef struct mcxHashSettings 116 { dim n_buckets 117 ; dim n_entries 118 ; float load 119 ; mcxbits options 120 ; 121 } mcxHashSettings ; 122 123 124 void mcxHashGetSettings 125 ( mcxHash* hash 126 , mcxHashSettings* settings 127 ) ; 128 129 130 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 131 * * 132 ** mcxHashSearch 133 * 134 * action returns 135 * 136 * MCX_DATUM_DELETE -> deleted mcxKV* or NULL if not present 137 * MCX_DATUM_INSERT -> new or present mcxKV* 138 * MCX_DATUM_FIND -> mcxKV* if present NULL otherwise. 139 * 140 * usage: 141 * 142 * Values have to be inserted by the caller into the returned KV struct. 143 * Make sure that keys point to objects that are constant 144 * (with respect to the cmp function) during the lifetime of the hash. 145 * YOU have to ensure the integrity of both keys and values. 146 * This enables you to do whatever suits you, such as appending to 147 * values. 148 * 149 * When inserting, check whether kv->key != key (where kv is returned value) 150 * if this is the case, an identically comparing key is already present. 151 * You may want to destroy one of the two keys and decide what to do 152 * with the value. 153 * 154 * When deleting, the key-value pair is removed from the hash *AND RETURNED 155 * TO CALLER* - you have to decide yourself what to do with it. You have to 156 * fetch the val and key members of the returned mcxKV object immediately: 157 * Subsequent inserts in the hash may reuse it. If the key was not present, 158 * a value of NULL is returned. 159 * 160 * When finding, life is simple. NULL if absent, matching kv otherwise. 161 * 162 * note: 163 * 164 * memory management of keys and values is totally up to caller. 165 * If usage is clean, you can use mcxHashFree for disposal of hash. 166 */ 167 168 #define mcxHashSearch(key, hash, ACTION) mcxHashSearchx(key, hash, ACTION, NULL) 169 170 mcxKV* mcxHashSearchx 171 ( void* key 172 , mcxHash* hash 173 , mcxmode ACTION 174 , int* delta 175 ) ; 176 177 178 179 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 180 * * 181 ** mcxHashMerge 182 * 183 * this one COPIES OBJECT POINTERS and DOES NOT CLONE. 184 * so after the merge, hash1 and hash2 keys and values should not be freed. 185 * In case there are equivalent keys in hash1 and hash2, this may 186 * cause trouble when the caller wants to do cleaning afterwards. 187 * This interface is still under development. 188 * 189 * hashd may be equal to hash1 or hash2, and it may also be NULL. 190 */ 191 192 mcxHash* mcxHashMerge 193 ( mcxHash* hash1 194 , mcxHash* hash2 195 , mcxHash* hashd 196 , void* merge(void* val1, void* val2) 197 ) ; 198 199 200 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 201 * * 202 ** mcxHashFree 203 * 204 * This only works if all keys are of the same type and/or all values 205 * are of the same type, and if your objects were created as expected by 206 * the free routines (presumably malloced heap memory) - be careful with 207 * constant objects like constant strings. 208 * 209 * freekey and freeval may not free their argument. This is because 210 * tingea does not allow routines that leave arguments in an 211 * inconsistent state, and free routines in tingea generally accept 212 * an argument of the form <type>** pptr. 213 * In the case of mcxHashFree this means that the interface may 214 * feel slighly more cumbersome. 215 * A way out would have been to make the callbacks of signature 216 * 217 * void freemem(void** mempp) 218 * 219 * The caller could access *mempp, cast it to the expected type, 220 * and later set *mempp to NULL. However, this would require 221 * new free routines for lots of types. With the current interface 222 * existing <type>Release routines can be used: 223 * 224 * The type of free routine expected by mcxHashFree is generally 225 * called <type>Release or <type>Release_v, e.g. mcxTingRelease. 226 * Release routines release all memory of a composite object except the 227 * memory which holds the outer struct. 228 * 229 * If one of key or val is *not* a composite type or is a composite type 230 * that does not contain malloced memory, use mcxHashFreeScalar. 231 * 232 * Both freekey and freeval may be NULL. When NULL, the corresponding 233 * KV member is not loooked at. This is useful e.g. when hashing objects 234 * owned by someone else. 235 */ 236 237 238 void mcxHashFree 239 ( mcxHash** hashpp 240 , void freekey(void* keypp) /* (yourtype1** keypp) */ 241 , void freeval(void* valpp) /* (yourtype2** valpp) */ 242 ) ; 243 244 void mcxHashFreeScalar 245 ( void* scalar 246 ) ; 247 248 249 250 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 251 * It copies the pointers stored in the hash 252 */ 253 254 void** mcxHashKeys 255 ( mcxHash* hash 256 , dim* n_entries 257 , int (*cmp)(const void*, const void*) /* works on keys */ 258 , mcxbits opts /* unused yet */ 259 ) ; 260 /* Future options: SORT, SORT_DESC */ 261 262 263 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 264 * It copies the pointers stored in the hash 265 */ 266 267 void** mcxHashKVs 268 ( mcxHash* hash 269 , dim* n_entries 270 , int (*cmp)(const void*, const void*) 271 , mcxbits opts /* unused yet */ 272 ) ; 273 274 275 276 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 277 * Prints some information to stdout. 278 */ 279 280 void mcxHashStats 281 ( FILE* fp 282 , mcxHash* hash 283 ) ; 284 285 286 typedef struct mcxHashWalk mcxHashWalk; 287 288 289 mcxHashWalk* mcxHashWalkInit 290 ( mcxHash *hash 291 ) ; 292 293 294 mcxKV* mcxHashWalkStep 295 ( mcxHashWalk* walk 296 , dim *i_bucket 297 ) ; 298 299 300 void mcxHashWalkFree 301 ( mcxHashWalk **walkpp 302 ) ; 303 304 305 void mcxHashApply 306 ( mcxHash* hash 307 , void (*cb)(const void* key, void* val, void* data) 308 , void* data 309 ) ; 310 311 /* UNIX ELF hash */ 312 /* POOR! */ 313 u32 mcxELFhash 314 ( const void *key 315 , u32 len 316 ) ; 317 318 /* created by Bob Jenkins */ 319 u32 mcxBJhash 320 ( const void* key 321 , u32 len 322 ) ; 323 324 /* One at a time hash, Bob Jenkins/Colin Plumb */ 325 u32 mcxOAThash 326 ( const void *key 327 , u32 len 328 ) ; 329 330 /* created by Daniel Phillips */ 331 u32 mcxDPhash 332 ( const void* key 333 , u32 len 334 ) ; 335 336 /* "Berkely Database" hash (from Ozan Yigit's page) */ 337 /* POOR! */ 338 u32 mcxBDBhash 339 ( const void *key 340 , u32 len 341 ) ; 342 343 /* Dan Bernstein hash (from Ozan Yigit's page) */ 344 u32 mcxDJBhash 345 ( const void *key 346 , u32 len 347 ) ; 348 349 /* created by Chris Torek */ 350 u32 mcxCThash 351 ( const void* key 352 , u32 len 353 ) ; 354 355 /* "GNU Emacs" hash (from m4) */ 356 /* not among the best */ 357 u32 mcxGEhash 358 ( const void* key 359 , u32 len 360 ) ; 361 362 363 /* Fowler Noll Vo hash */ 364 u32 mcxFNVhash 365 ( const void *buf 366 , u32 len 367 ) ; 368 369 /* All experimental with weak points. */ 370 u32 mcxSvDhash 371 ( const void *key 372 , u32 len 373 ) ; 374 u32 mcxSvD2hash 375 ( const void *key 376 , u32 len 377 ) ; 378 u32 mcxSvD1hash 379 ( const void *key 380 , u32 len 381 ) ; 382 383 /* uses mcxDPhash */ 384 u32 mcxStrHash 385 ( const void* s 386 ) ; 387 388 int mcxStrCmp 389 ( const void* a 390 , const void* b 391 ) ; 392 393 394 #endif 395 396