1 /* 2 Copyright (c) 2003-2009, Troy D. Hanson http://uthash.sourceforge.net 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 12 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 13 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 15 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 16 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 18 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 19 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 #ifndef UTHASH_H 25 #define UTHASH_H 26 27 #include <string.h> /* memcmp,strlen */ 28 #include <stddef.h> /* ptrdiff_t */ 29 #include <inttypes.h> /* uint32_t etc */ 30 31 #define UTHASH_VERSION 1.8 32 33 /* C++ requires extra stringent casting */ 34 #if defined __cplusplus 35 #define TYPEOF(x) (typeof(x)) 36 #else 37 #define TYPEOF(x) 38 #endif 39 40 41 #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ 42 #define uthash_malloc(sz) malloc(sz) /* malloc fcn */ 43 #define uthash_free(ptr) free(ptr) /* free fcn */ 44 45 #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ 46 #define uthash_expand_fyi(tbl) /* can be defined to log expands */ 47 48 /* initial number of buckets */ 49 #define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ 50 #define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ 51 #define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ 52 53 /* calculate the element whose hash handle address is hhe */ 54 #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)hhp) - (tbl)->hho)) 55 56 #define HASH_FIND(hh,head,keyptr,keylen,out) \ 57 do { \ 58 unsigned _hf_bkt,_hf_hashv; \ 59 out=TYPEOF(out)NULL; \ 60 if (head) { \ 61 HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ 62 if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ 63 HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ 64 keyptr,keylen,out); \ 65 } \ 66 } \ 67 } while (0) 68 69 #ifdef HASH_BLOOM 70 #define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) 71 #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) 72 #define HASH_BLOOM_MAKE(tbl) \ 73 do { \ 74 (tbl)->bloom_nbits = HASH_BLOOM; \ 75 (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ 76 if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ 77 memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ 78 (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ 79 } while (0); 80 81 #define HASH_BLOOM_FREE(tbl) \ 82 do { \ 83 uthash_free((tbl)->bloom_bv); \ 84 } while (0); 85 86 #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) 87 #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) 88 89 #define HASH_BLOOM_ADD(tbl,hashv) \ 90 HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) 91 92 #define HASH_BLOOM_TEST(tbl,hashv) \ 93 HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) 94 95 #else 96 #define HASH_BLOOM_MAKE(tbl) 97 #define HASH_BLOOM_FREE(tbl) 98 #define HASH_BLOOM_ADD(tbl,hashv) 99 #define HASH_BLOOM_TEST(tbl,hashv) (1) 100 #endif 101 102 #define HASH_MAKE_TABLE(hh,head) \ 103 do { \ 104 (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ 105 sizeof(UT_hash_table)); \ 106 if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ 107 memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ 108 (head)->hh.tbl->tail = &((head)->hh); \ 109 (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ 110 (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ 111 (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ 112 (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ 113 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 114 if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ 115 memset((head)->hh.tbl->buckets, 0, \ 116 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 117 HASH_BLOOM_MAKE((head)->hh.tbl); \ 118 (head)->hh.tbl->signature = HASH_SIGNATURE; \ 119 } while(0) 120 121 #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ 122 HASH_ADD_KEYPTR(hh,head,&add->fieldname,keylen_in,add) 123 124 #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ 125 do { \ 126 unsigned _ha_bkt; \ 127 (add)->hh.next = NULL; \ 128 (add)->hh.key = (char*)keyptr; \ 129 (add)->hh.keylen = keylen_in; \ 130 if (!(head)) { \ 131 head = (add); \ 132 (head)->hh.prev = NULL; \ 133 HASH_MAKE_TABLE(hh,head); \ 134 } else { \ 135 (head)->hh.tbl->tail->next = (add); \ 136 (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ 137 (head)->hh.tbl->tail = &((add)->hh); \ 138 } \ 139 (head)->hh.tbl->num_items++; \ 140 (add)->hh.tbl = (head)->hh.tbl; \ 141 HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ 142 (add)->hh.hashv, _ha_bkt); \ 143 HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ 144 HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ 145 HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ 146 HASH_FSCK(hh,head); \ 147 } while(0) 148 149 #define HASH_TO_BKT( hashv, num_bkts, bkt ) \ 150 do { \ 151 bkt = ((hashv) & ((num_bkts) - 1)); \ 152 } while(0) 153 154 /* delete "delptr" from the hash table. 155 * "the usual" patch-up process for the app-order doubly-linked-list. 156 * The use of _hd_hh_del below deserves special explanation. 157 * These used to be expressed using (delptr) but that led to a bug 158 * if someone used the same symbol for the head and deletee, like 159 * HASH_DELETE(hh,users,users); 160 * We want that to work, but by changing the head (users) below 161 * we were forfeiting our ability to further refer to the deletee (users) 162 * in the patch-up process. Solution: use scratch space in the table to 163 * copy the deletee pointer, then the latter references are via that 164 * scratch pointer rather than through the repointed (users) symbol. 165 */ 166 #define HASH_DELETE(hh,head,delptr) \ 167 do { \ 168 unsigned _hd_bkt; \ 169 struct UT_hash_handle *_hd_hh_del; \ 170 if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ 171 uthash_free((head)->hh.tbl->buckets ); \ 172 HASH_BLOOM_FREE((head)->hh.tbl); \ 173 uthash_free((head)->hh.tbl); \ 174 head = NULL; \ 175 } else { \ 176 _hd_hh_del = &((delptr)->hh); \ 177 if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ 178 (head)->hh.tbl->tail = \ 179 (UT_hash_handle*)((char*)((delptr)->hh.prev) + \ 180 (head)->hh.tbl->hho); \ 181 } \ 182 if ((delptr)->hh.prev) { \ 183 ((UT_hash_handle*)((char*)((delptr)->hh.prev) + \ 184 (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ 185 } else { \ 186 head = TYPEOF(head)((delptr)->hh.next); \ 187 } \ 188 if (_hd_hh_del->next) { \ 189 ((UT_hash_handle*)((char*)_hd_hh_del->next + \ 190 (head)->hh.tbl->hho))->prev = \ 191 _hd_hh_del->prev; \ 192 } \ 193 HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 194 HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ 195 (head)->hh.tbl->num_items--; \ 196 } \ 197 HASH_FSCK(hh,head); \ 198 } while (0) 199 200 201 /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ 202 #define HASH_FIND_STR(head,findstr,out) \ 203 HASH_FIND(hh,head,findstr,strlen(findstr),out) 204 #define HASH_ADD_STR(head,strfield,add) \ 205 HASH_ADD(hh,head,strfield,strlen(add->strfield),add) 206 #define HASH_FIND_INT(head,findint,out) \ 207 HASH_FIND(hh,head,findint,sizeof(int),out) 208 #define HASH_ADD_INT(head,intfield,add) \ 209 HASH_ADD(hh,head,intfield,sizeof(int),add) 210 #define HASH_DEL(head,delptr) \ 211 HASH_DELETE(hh,head,delptr) 212 213 /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. 214 * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. 215 */ 216 #ifdef HASH_DEBUG 217 #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) 218 #define HASH_FSCK(hh,head) \ 219 do { \ 220 unsigned _bkt_i; \ 221 unsigned _count, _bkt_count; \ 222 char *_prev; \ 223 struct UT_hash_handle *_thh; \ 224 if (head) { \ 225 _count = 0; \ 226 for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ 227 _bkt_count = 0; \ 228 _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ 229 _prev = NULL; \ 230 while (_thh) { \ 231 if (_prev != (char*)(_thh->hh_prev)) { \ 232 HASH_OOPS("invalid hh_prev %p, actual %p\n", \ 233 _thh->hh_prev, _prev ); \ 234 } \ 235 _bkt_count++; \ 236 _prev = (char*)(_thh); \ 237 _thh = _thh->hh_next; \ 238 } \ 239 _count += _bkt_count; \ 240 if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ 241 HASH_OOPS("invalid bucket count %d, actual %d\n", \ 242 (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ 243 } \ 244 } \ 245 if (_count != (head)->hh.tbl->num_items) { \ 246 HASH_OOPS("invalid hh item count %d, actual %d\n", \ 247 (head)->hh.tbl->num_items, _count ); \ 248 } \ 249 /* traverse hh in app order; check next/prev integrity, count */ \ 250 _count = 0; \ 251 _prev = NULL; \ 252 _thh = &(head)->hh; \ 253 while (_thh) { \ 254 _count++; \ 255 if (_prev !=(char*)(_thh->prev)) { \ 256 HASH_OOPS("invalid prev %p, actual %p\n", \ 257 _thh->prev, _prev ); \ 258 } \ 259 _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ 260 _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ 261 (head)->hh.tbl->hho) : NULL ); \ 262 } \ 263 if (_count != (head)->hh.tbl->num_items) { \ 264 HASH_OOPS("invalid app item count %d, actual %d\n", \ 265 (head)->hh.tbl->num_items, _count ); \ 266 } \ 267 } \ 268 } while (0) 269 #else 270 #define HASH_FSCK(hh,head) 271 #endif 272 273 /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 274 * the descriptor to which this macro is defined for tuning the hash function. 275 * The app can #include <unistd.h> to get the prototype for write(2). */ 276 #ifdef HASH_EMIT_KEYS 277 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ 278 do { \ 279 unsigned _klen = fieldlen; \ 280 write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ 281 write(HASH_EMIT_KEYS, keyptr, fieldlen); \ 282 } while (0) 283 #else 284 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) 285 #endif 286 287 /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ 288 #ifdef HASH_FUNCTION 289 #define HASH_FCN HASH_FUNCTION 290 #else 291 #define HASH_FCN HASH_JEN 292 #endif 293 294 /* The Bernstein hash function, used in Perl prior to v5.6 */ 295 #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ 296 do { \ 297 unsigned _hb_keylen=keylen; \ 298 char *_hb_key=(char*)key; \ 299 (hashv) = 0; \ 300 while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \ 301 bkt = (hashv) & (num_bkts-1); \ 302 } while (0) 303 304 305 /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at 306 * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ 307 #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ 308 do { \ 309 unsigned _sx_i; \ 310 char *_hs_key=(char*)key; \ 311 hashv = 0; \ 312 for(_sx_i=0; _sx_i < keylen; _sx_i++) \ 313 hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ 314 bkt = hashv & (num_bkts-1); \ 315 } while (0) 316 317 #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ 318 do { \ 319 unsigned _fn_i; \ 320 char *_hf_key=(char*)key; \ 321 hashv = 2166136261UL; \ 322 for(_fn_i=0; _fn_i < keylen; _fn_i++) \ 323 hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ 324 bkt = hashv & (num_bkts-1); \ 325 } while(0); 326 327 #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ 328 do { \ 329 unsigned _ho_i; \ 330 char *_ho_key=(char*)key; \ 331 hashv = 0; \ 332 for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ 333 hashv += _ho_key[_ho_i]; \ 334 hashv += (hashv << 10); \ 335 hashv ^= (hashv >> 6); \ 336 } \ 337 hashv += (hashv << 3); \ 338 hashv ^= (hashv >> 11); \ 339 hashv += (hashv << 15); \ 340 bkt = hashv & (num_bkts-1); \ 341 } while(0) 342 343 #define HASH_JEN_MIX(a,b,c) \ 344 do { \ 345 a -= b; a -= c; a ^= ( c >> 13 ); \ 346 b -= c; b -= a; b ^= ( a << 8 ); \ 347 c -= a; c -= b; c ^= ( b >> 13 ); \ 348 a -= b; a -= c; a ^= ( c >> 12 ); \ 349 b -= c; b -= a; b ^= ( a << 16 ); \ 350 c -= a; c -= b; c ^= ( b >> 5 ); \ 351 a -= b; a -= c; a ^= ( c >> 3 ); \ 352 b -= c; b -= a; b ^= ( a << 10 ); \ 353 c -= a; c -= b; c ^= ( b >> 15 ); \ 354 } while (0) 355 356 #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ 357 do { \ 358 unsigned _hj_i,_hj_j,_hj_k; \ 359 char *_hj_key=(char*)key; \ 360 hashv = 0xfeedbeef; \ 361 _hj_i = _hj_j = 0x9e3779b9; \ 362 _hj_k = keylen; \ 363 while (_hj_k >= 12) { \ 364 _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ 365 + ( (unsigned)_hj_key[2] << 16 ) \ 366 + ( (unsigned)_hj_key[3] << 24 ) ); \ 367 _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ 368 + ( (unsigned)_hj_key[6] << 16 ) \ 369 + ( (unsigned)_hj_key[7] << 24 ) ); \ 370 hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ 371 + ( (unsigned)_hj_key[10] << 16 ) \ 372 + ( (unsigned)_hj_key[11] << 24 ) ); \ 373 \ 374 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 375 \ 376 _hj_key += 12; \ 377 _hj_k -= 12; \ 378 } \ 379 hashv += keylen; \ 380 switch ( _hj_k ) { \ 381 case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ 382 case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ 383 case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ 384 case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ 385 case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ 386 case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ 387 case 5: _hj_j += _hj_key[4]; \ 388 case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ 389 case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ 390 case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ 391 case 1: _hj_i += _hj_key[0]; \ 392 } \ 393 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 394 bkt = hashv & (num_bkts-1); \ 395 } while(0) 396 397 /* The Paul Hsieh hash function */ 398 #undef get16bits 399 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ 400 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) 401 #define get16bits(d) (*((const uint16_t *) (d))) 402 #endif 403 404 #if !defined (get16bits) 405 #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ 406 +(uint32_t)(((const uint8_t *)(d))[0]) ) 407 #endif 408 #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ 409 do { \ 410 char *_sfh_key=(char*)key; \ 411 uint32_t _sfh_tmp, _sfh_len = keylen; \ 412 \ 413 int _sfh_rem = _sfh_len & 3; \ 414 _sfh_len >>= 2; \ 415 hashv = 0xcafebabe; \ 416 \ 417 /* Main loop */ \ 418 for (;_sfh_len > 0; _sfh_len--) { \ 419 hashv += get16bits (_sfh_key); \ 420 _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \ 421 hashv = (hashv << 16) ^ _sfh_tmp; \ 422 _sfh_key += 2*sizeof (uint16_t); \ 423 hashv += hashv >> 11; \ 424 } \ 425 \ 426 /* Handle end cases */ \ 427 switch (_sfh_rem) { \ 428 case 3: hashv += get16bits (_sfh_key); \ 429 hashv ^= hashv << 16; \ 430 hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \ 431 hashv += hashv >> 11; \ 432 break; \ 433 case 2: hashv += get16bits (_sfh_key); \ 434 hashv ^= hashv << 11; \ 435 hashv += hashv >> 17; \ 436 break; \ 437 case 1: hashv += *_sfh_key; \ 438 hashv ^= hashv << 10; \ 439 hashv += hashv >> 1; \ 440 } \ 441 \ 442 /* Force "avalanching" of final 127 bits */ \ 443 hashv ^= hashv << 3; \ 444 hashv += hashv >> 5; \ 445 hashv ^= hashv << 4; \ 446 hashv += hashv >> 17; \ 447 hashv ^= hashv << 25; \ 448 hashv += hashv >> 6; \ 449 bkt = hashv & (num_bkts-1); \ 450 } while(0); 451 452 #ifdef HASH_USING_NO_STRICT_ALIASING 453 /* The MurmurHash exploits some CPU's (e.g. x86) tolerance for unaligned reads. 454 * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. 455 * So MurmurHash comes in two versions, the faster unaligned one and the slower 456 * aligned one. We only use the faster one on CPU's where we know it's safe. 457 * 458 * Note the preprocessor built-in defines can be emitted using: 459 * 460 * gcc -m64 -dM -E - < /dev/null (on gcc) 461 * cc -## a.c (where a.c is a simple test file) (Sun Studio) 462 */ 463 #if (defined(__i386__) || defined(__x86_64__)) 464 #define HASH_MUR HASH_MUR_UNALIGNED 465 #else 466 #define HASH_MUR HASH_MUR_ALIGNED 467 #endif 468 469 /* Appleby's MurmurHash fast version for unaligned-tolerant archs like i386 */ 470 #define HASH_MUR_UNALIGNED(key,keylen,num_bkts,hashv,bkt) \ 471 do { \ 472 const unsigned int _mur_m = 0x5bd1e995; \ 473 const int _mur_r = 24; \ 474 hashv = 0xcafebabe ^ keylen; \ 475 char *_mur_key = (char *)key; \ 476 uint32_t _mur_tmp, _mur_len = keylen; \ 477 \ 478 for (;_mur_len >= 4; _mur_len-=4) { \ 479 _mur_tmp = *(uint32_t *)_mur_key; \ 480 _mur_tmp *= _mur_m; \ 481 _mur_tmp ^= _mur_tmp >> _mur_r; \ 482 _mur_tmp *= _mur_m; \ 483 hashv *= _mur_m; \ 484 hashv ^= _mur_tmp; \ 485 _mur_key += 4; \ 486 } \ 487 \ 488 switch(_mur_len) \ 489 { \ 490 case 3: hashv ^= _mur_key[2] << 16; \ 491 case 2: hashv ^= _mur_key[1] << 8; \ 492 case 1: hashv ^= _mur_key[0]; \ 493 hashv *= _mur_m; \ 494 }; \ 495 \ 496 hashv ^= hashv >> 13; \ 497 hashv *= _mur_m; \ 498 hashv ^= hashv >> 15; \ 499 \ 500 bkt = hashv & (num_bkts-1); \ 501 } while(0) 502 503 /* Appleby's MurmurHash version for alignment-sensitive archs like Sparc */ 504 #define HASH_MUR_ALIGNED(key,keylen,num_bkts,hashv,bkt) \ 505 do { \ 506 const unsigned int _mur_m = 0x5bd1e995; \ 507 const int _mur_r = 24; \ 508 hashv = 0xcafebabe ^ keylen; \ 509 char *_mur_key = (char *)key; \ 510 uint32_t _mur_len = keylen; \ 511 int _mur_align = (int)_mur_key & 3; \ 512 \ 513 if (_mur_align && (_mur_len >= 4)) { \ 514 unsigned _mur_t = 0, _mur_d = 0; \ 515 switch(_mur_align) { \ 516 case 1: _mur_t |= _mur_key[2] << 16; \ 517 case 2: _mur_t |= _mur_key[1] << 8; \ 518 case 3: _mur_t |= _mur_key[0]; \ 519 } \ 520 _mur_t <<= (8 * _mur_align); \ 521 _mur_key += 4-_mur_align; \ 522 _mur_len -= 4-_mur_align; \ 523 int _mur_sl = 8 * (4-_mur_align); \ 524 int _mur_sr = 8 * _mur_align; \ 525 \ 526 for (;_mur_len >= 4; _mur_len-=4) { \ 527 _mur_d = *(unsigned *)_mur_key; \ 528 _mur_t = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ 529 unsigned _mur_k = _mur_t; \ 530 _mur_k *= _mur_m; \ 531 _mur_k ^= _mur_k >> _mur_r; \ 532 _mur_k *= _mur_m; \ 533 hashv *= _mur_m; \ 534 hashv ^= _mur_k; \ 535 _mur_t = _mur_d; \ 536 _mur_key += 4; \ 537 } \ 538 _mur_d = 0; \ 539 if(_mur_len >= _mur_align) { \ 540 switch(_mur_align) { \ 541 case 3: _mur_d |= _mur_key[2] << 16; \ 542 case 2: _mur_d |= _mur_key[1] << 8; \ 543 case 1: _mur_d |= _mur_key[0]; \ 544 } \ 545 unsigned _mur_k = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ 546 _mur_k *= _mur_m; \ 547 _mur_k ^= _mur_k >> _mur_r; \ 548 _mur_k *= _mur_m; \ 549 hashv *= _mur_m; \ 550 hashv ^= _mur_k; \ 551 _mur_k += _mur_align; \ 552 _mur_len -= _mur_align; \ 553 \ 554 switch(_mur_len) \ 555 { \ 556 case 3: hashv ^= _mur_key[2] << 16; \ 557 case 2: hashv ^= _mur_key[1] << 8; \ 558 case 1: hashv ^= _mur_key[0]; \ 559 hashv *= _mur_m; \ 560 } \ 561 } else { \ 562 switch(_mur_len) \ 563 { \ 564 case 3: _mur_d ^= _mur_key[2] << 16; \ 565 case 2: _mur_d ^= _mur_key[1] << 8; \ 566 case 1: _mur_d ^= _mur_key[0]; \ 567 case 0: hashv ^= (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ 568 hashv *= _mur_m; \ 569 } \ 570 } \ 571 \ 572 hashv ^= hashv >> 13; \ 573 hashv *= _mur_m; \ 574 hashv ^= hashv >> 15; \ 575 } else { \ 576 for (;_mur_len >= 4; _mur_len-=4) { \ 577 unsigned _mur_k = *(unsigned*)_mur_key; \ 578 _mur_k *= _mur_m; \ 579 _mur_k ^= _mur_k >> _mur_r; \ 580 _mur_k *= _mur_m; \ 581 hashv *= _mur_m; \ 582 hashv ^= _mur_k; \ 583 _mur_key += 4; \ 584 } \ 585 switch(_mur_len) \ 586 { \ 587 case 3: hashv ^= _mur_key[2] << 16; \ 588 case 2: hashv ^= _mur_key[1] << 8; \ 589 case 1: hashv ^= _mur_key[0]; \ 590 hashv *= _mur_m; \ 591 } \ 592 \ 593 hashv ^= hashv >> 13; \ 594 hashv *= _mur_m; \ 595 hashv ^= hashv >> 15; \ 596 } \ 597 bkt = hashv & (num_bkts-1); \ 598 } while(0) 599 #endif /* HASH_USING_NO_STRICT_ALIASING */ 600 601 /* key comparison function; return 0 if keys equal */ 602 #define HASH_KEYCMP(a,b,len) memcmp(a,b,len) 603 604 /* iterate over items in a known bucket to find desired item */ 605 #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ 606 out = TYPEOF(out)((head.hh_head) ? ELMT_FROM_HH(tbl,head.hh_head) : NULL); \ 607 while (out) { \ 608 if (out->hh.keylen == keylen_in) { \ 609 if ((HASH_KEYCMP(out->hh.key,keyptr,keylen_in)) == 0) break; \ 610 } \ 611 out= TYPEOF(out)((out->hh.hh_next) ? \ 612 ELMT_FROM_HH(tbl,out->hh.hh_next) : NULL); \ 613 } 614 615 /* add an item to a bucket */ 616 #define HASH_ADD_TO_BKT(head,addhh) \ 617 do { \ 618 head.count++; \ 619 (addhh)->hh_next = head.hh_head; \ 620 (addhh)->hh_prev = NULL; \ 621 if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ 622 (head).hh_head=addhh; \ 623 if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ 624 && (addhh)->tbl->noexpand != 1) { \ 625 HASH_EXPAND_BUCKETS((addhh)->tbl); \ 626 } \ 627 } while(0) 628 629 /* remove an item from a given bucket */ 630 #define HASH_DEL_IN_BKT(hh,head,hh_del) \ 631 (head).count--; \ 632 if ((head).hh_head == hh_del) { \ 633 (head).hh_head = hh_del->hh_next; \ 634 } \ 635 if (hh_del->hh_prev) { \ 636 hh_del->hh_prev->hh_next = hh_del->hh_next; \ 637 } \ 638 if (hh_del->hh_next) { \ 639 hh_del->hh_next->hh_prev = hh_del->hh_prev; \ 640 } 641 642 /* Bucket expansion has the effect of doubling the number of buckets 643 * and redistributing the items into the new buckets. Ideally the 644 * items will distribute more or less evenly into the new buckets 645 * (the extent to which this is true is a measure of the quality of 646 * the hash function as it applies to the key domain). 647 * 648 * With the items distributed into more buckets, the chain length 649 * (item count) in each bucket is reduced. Thus by expanding buckets 650 * the hash keeps a bound on the chain length. This bounded chain 651 * length is the essence of how a hash provides constant time lookup. 652 * 653 * The calculation of tbl->ideal_chain_maxlen below deserves some 654 * explanation. First, keep in mind that we're calculating the ideal 655 * maximum chain length based on the *new* (doubled) bucket count. 656 * In fractions this is just n/b (n=number of items,b=new num buckets). 657 * Since the ideal chain length is an integer, we want to calculate 658 * ceil(n/b). We don't depend on floating point arithmetic in this 659 * hash, so to calculate ceil(n/b) with integers we could write 660 * 661 * ceil(n/b) = (n/b) + ((n%b)?1:0) 662 * 663 * and in fact a previous version of this hash did just that. 664 * But now we have improved things a bit by recognizing that b is 665 * always a power of two. We keep its base 2 log handy (call it lb), 666 * so now we can write this with a bit shift and logical AND: 667 * 668 * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) 669 * 670 */ 671 #define HASH_EXPAND_BUCKETS(tbl) \ 672 do { \ 673 unsigned _he_bkt; \ 674 unsigned _he_bkt_i; \ 675 struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ 676 UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ 677 _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 678 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 679 if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ 680 memset(_he_new_buckets, 0, \ 681 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 682 tbl->ideal_chain_maxlen = \ 683 (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ 684 ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ 685 tbl->nonideal_items = 0; \ 686 for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ 687 { \ 688 _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ 689 while (_he_thh) { \ 690 _he_hh_nxt = _he_thh->hh_next; \ 691 HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ 692 _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ 693 if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ 694 tbl->nonideal_items++; \ 695 _he_newbkt->expand_mult = _he_newbkt->count / \ 696 tbl->ideal_chain_maxlen; \ 697 } \ 698 _he_thh->hh_prev = NULL; \ 699 _he_thh->hh_next = _he_newbkt->hh_head; \ 700 if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ 701 _he_thh; \ 702 _he_newbkt->hh_head = _he_thh; \ 703 _he_thh = _he_hh_nxt; \ 704 } \ 705 } \ 706 tbl->num_buckets *= 2; \ 707 tbl->log2_num_buckets++; \ 708 uthash_free( tbl->buckets ); \ 709 tbl->buckets = _he_new_buckets; \ 710 tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ 711 (tbl->ineff_expands+1) : 0; \ 712 if (tbl->ineff_expands > 1) { \ 713 tbl->noexpand=1; \ 714 uthash_noexpand_fyi(tbl); \ 715 } \ 716 uthash_expand_fyi(tbl); \ 717 } while(0) 718 719 720 /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ 721 /* Note that HASH_SORT assumes the hash handle name to be hh. 722 * HASH_SRT was added to allow the hash handle name to be passed in. */ 723 #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) 724 #define HASH_SRT(hh,head,cmpfcn) \ 725 do { \ 726 unsigned _hs_i; \ 727 unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ 728 struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ 729 if (head) { \ 730 _hs_insize = 1; \ 731 _hs_looping = 1; \ 732 _hs_list = &((head)->hh); \ 733 while (_hs_looping) { \ 734 _hs_p = _hs_list; \ 735 _hs_list = NULL; \ 736 _hs_tail = NULL; \ 737 _hs_nmerges = 0; \ 738 while (_hs_p) { \ 739 _hs_nmerges++; \ 740 _hs_q = _hs_p; \ 741 _hs_psize = 0; \ 742 for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ 743 _hs_psize++; \ 744 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 745 ((void*)((char*)(_hs_q->next) + \ 746 (head)->hh.tbl->hho)) : NULL); \ 747 if (! (_hs_q) ) break; \ 748 } \ 749 _hs_qsize = _hs_insize; \ 750 while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ 751 if (_hs_psize == 0) { \ 752 _hs_e = _hs_q; \ 753 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 754 ((void*)((char*)(_hs_q->next) + \ 755 (head)->hh.tbl->hho)) : NULL); \ 756 _hs_qsize--; \ 757 } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ 758 _hs_e = _hs_p; \ 759 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ 760 ((void*)((char*)(_hs_p->next) + \ 761 (head)->hh.tbl->hho)) : NULL); \ 762 _hs_psize--; \ 763 } else if (( \ 764 cmpfcn(TYPEOF(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ 765 TYPEOF(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ 766 ) <= 0) { \ 767 _hs_e = _hs_p; \ 768 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ 769 ((void*)((char*)(_hs_p->next) + \ 770 (head)->hh.tbl->hho)) : NULL); \ 771 _hs_psize--; \ 772 } else { \ 773 _hs_e = _hs_q; \ 774 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 775 ((void*)((char*)(_hs_q->next) + \ 776 (head)->hh.tbl->hho)) : NULL); \ 777 _hs_qsize--; \ 778 } \ 779 if ( _hs_tail ) { \ 780 _hs_tail->next = ((_hs_e) ? \ 781 ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ 782 } else { \ 783 _hs_list = _hs_e; \ 784 } \ 785 _hs_e->prev = ((_hs_tail) ? \ 786 ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ 787 _hs_tail = _hs_e; \ 788 } \ 789 _hs_p = _hs_q; \ 790 } \ 791 _hs_tail->next = NULL; \ 792 if ( _hs_nmerges <= 1 ) { \ 793 _hs_looping=0; \ 794 (head)->hh.tbl->tail = _hs_tail; \ 795 (head) = TYPEOF(head)ELMT_FROM_HH((head)->hh.tbl, _hs_list); \ 796 } \ 797 _hs_insize *= 2; \ 798 } \ 799 HASH_FSCK(hh,head); \ 800 } \ 801 } while (0) 802 803 /* This function selects items from one hash into another hash. 804 * The end result is that the selected items have dual presence 805 * in both hashes. There is no copy of the items made; rather 806 * they are added into the new hash through a secondary hash 807 * hash handle that must be present in the structure. */ 808 #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ 809 do { \ 810 unsigned _src_bkt, _dst_bkt; \ 811 void *_last_elt=NULL, *_elt; \ 812 UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ 813 ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ 814 if (src) { \ 815 for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ 816 for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ 817 _src_hh; \ 818 _src_hh = _src_hh->hh_next) { \ 819 _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ 820 if (cond(_elt)) { \ 821 _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ 822 _dst_hh->key = _src_hh->key; \ 823 _dst_hh->keylen = _src_hh->keylen; \ 824 _dst_hh->hashv = _src_hh->hashv; \ 825 _dst_hh->prev = _last_elt; \ 826 _dst_hh->next = NULL; \ 827 if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ 828 if (!dst) { \ 829 dst = TYPEOF(dst)_elt; \ 830 HASH_MAKE_TABLE(hh_dst,dst); \ 831 } else { \ 832 _dst_hh->tbl = (dst)->hh_dst.tbl; \ 833 } \ 834 HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ 835 HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ 836 (dst)->hh_dst.tbl->num_items++; \ 837 _last_elt = _elt; \ 838 _last_elt_hh = _dst_hh; \ 839 } \ 840 } \ 841 } \ 842 } \ 843 HASH_FSCK(hh_dst,dst); \ 844 } while (0) 845 846 #define HASH_CLEAR(hh,head) \ 847 do { \ 848 if (head) { \ 849 uthash_free((head)->hh.tbl->buckets ); \ 850 uthash_free((head)->hh.tbl); \ 851 (head)=NULL; \ 852 } \ 853 } while(0) 854 855 /* obtain a count of items in the hash */ 856 #define HASH_COUNT(head) HASH_CNT(hh,head) 857 #define HASH_CNT(hh,head) (head?(head->hh.tbl->num_items):0) 858 859 typedef struct UT_hash_bucket { 860 struct UT_hash_handle *hh_head; 861 unsigned count; 862 863 /* expand_mult is normally set to 0. In this situation, the max chain length 864 * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If 865 * the bucket's chain exceeds this length, bucket expansion is triggered). 866 * However, setting expand_mult to a non-zero value delays bucket expansion 867 * (that would be triggered by additions to this particular bucket) 868 * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. 869 * (The multiplier is simply expand_mult+1). The whole idea of this 870 * multiplier is to reduce bucket expansions, since they are expensive, in 871 * situations where we know that a particular bucket tends to be overused. 872 * It is better to let its chain length grow to a longer yet-still-bounded 873 * value, than to do an O(n) bucket expansion too often. 874 */ 875 unsigned expand_mult; 876 877 } UT_hash_bucket; 878 879 /* random signature used only to find hash tables in external analysis */ 880 #define HASH_SIGNATURE 0xa0111fe1 881 #define HASH_BLOOM_SIGNATURE 0xb12220f2 882 883 typedef struct UT_hash_table { 884 UT_hash_bucket *buckets; 885 unsigned num_buckets, log2_num_buckets; 886 unsigned num_items; 887 struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ 888 ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ 889 890 /* in an ideal situation (all buckets used equally), no bucket would have 891 * more than ceil(#items/#buckets) items. that's the ideal chain length. */ 892 unsigned ideal_chain_maxlen; 893 894 /* nonideal_items is the number of items in the hash whose chain position 895 * exceeds the ideal chain maxlen. these items pay the penalty for an uneven 896 * hash distribution; reaching them in a chain traversal takes >ideal steps */ 897 unsigned nonideal_items; 898 899 /* ineffective expands occur when a bucket doubling was performed, but 900 * afterward, more than half the items in the hash had nonideal chain 901 * positions. If this happens on two consecutive expansions we inhibit any 902 * further expansion, as it's not helping; this happens when the hash 903 * function isn't a good fit for the key domain. When expansion is inhibited 904 * the hash will still work, albeit no longer in constant time. */ 905 unsigned ineff_expands, noexpand; 906 907 uint32_t signature; /* used only to find hash tables in external analysis */ 908 #ifdef HASH_BLOOM 909 uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ 910 uint8_t *bloom_bv; 911 char bloom_nbits; 912 #endif 913 914 } UT_hash_table; 915 916 typedef struct UT_hash_handle { 917 struct UT_hash_table *tbl; 918 void *prev; /* prev element in app order */ 919 void *next; /* next element in app order */ 920 struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ 921 struct UT_hash_handle *hh_next; /* next hh in bucket order */ 922 void *key; /* ptr to enclosing struct's key */ 923 unsigned keylen; /* enclosing struct's key len */ 924 unsigned hashv; /* result of hash-fcn(key) */ 925 } UT_hash_handle; 926 927 #endif /* UTHASH_H */ 928