1 /* 2 Copyright (c) 2003-2011, Troy D. Hanson http://uthash.sourceforge.net 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 12 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 13 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 15 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 16 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 18 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 19 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 #ifndef UTHASH_H 25 #define UTHASH_H 26 27 #include <string.h> /* memcmp,strlen */ 28 #include <stddef.h> /* ptrdiff_t */ 29 #include <stdlib.h> /* exit() */ 30 31 /* These macros use decltype or the earlier __typeof GNU extension. 32 As decltype is only available in newer compilers (VS2010 or gcc 4.3+ 33 when compiling c++ source) this code uses whatever method is needed 34 or, for VS2008 where neither is available, uses casting workarounds. */ 35 #ifdef _MSC_VER /* MS compiler */ 36 #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ 37 #define DECLTYPE(x) (decltype(x)) 38 #else /* VS2008 or older (or VS2010 in C mode) */ 39 #define NO_DECLTYPE 40 #define DECLTYPE(x) 41 #endif 42 #else /* GNU, Sun and other compilers */ 43 #define DECLTYPE(x) (__typeof(x)) 44 #endif 45 46 #ifdef NO_DECLTYPE 47 #define DECLTYPE_ASSIGN(dst,src) \ 48 do { \ 49 char **_da_dst = (char**)(&(dst)); \ 50 *_da_dst = (char*)(src); \ 51 } while(0) 52 #else 53 #define DECLTYPE_ASSIGN(dst,src) \ 54 do { \ 55 (dst) = DECLTYPE(dst)(src); \ 56 } while(0) 57 #endif 58 59 /* a number of the hash function use uint32_t which isn't defined on win32 */ 60 #ifdef _MSC_VER 61 typedef unsigned int uint32_t; 62 typedef unsigned char uint8_t; 63 #else 64 #include <inttypes.h> /* uint32_t */ 65 #endif 66 67 #define UTHASH_VERSION 1.9.4 68 69 #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ 70 #define uthash_malloc(sz) malloc(sz) /* malloc fcn */ 71 #define uthash_free(ptr,sz) free(ptr) /* free fcn */ 72 73 #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ 74 #define uthash_expand_fyi(tbl) /* can be defined to log expands */ 75 76 /* initial number of buckets */ 77 #define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ 78 #define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ 79 #define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ 80 81 /* calculate the element whose hash handle address is hhe */ 82 #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) 83 84 #define HASH_FIND(hh,head,keyptr,keylen,out) \ 85 do { \ 86 unsigned _hf_bkt,_hf_hashv; \ 87 out=NULL; \ 88 if (head) { \ 89 HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ 90 if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ 91 HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ 92 keyptr,keylen,out); \ 93 } \ 94 } \ 95 } while (0) 96 97 #ifdef HASH_BLOOM 98 #define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) 99 #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) 100 #define HASH_BLOOM_MAKE(tbl) \ 101 do { \ 102 (tbl)->bloom_nbits = HASH_BLOOM; \ 103 (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ 104 if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ 105 memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ 106 (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ 107 } while (0); 108 109 #define HASH_BLOOM_FREE(tbl) \ 110 do { \ 111 uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ 112 } while (0); 113 114 #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) 115 #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) 116 117 #define HASH_BLOOM_ADD(tbl,hashv) \ 118 HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) 119 120 #define HASH_BLOOM_TEST(tbl,hashv) \ 121 HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) 122 123 #else 124 #define HASH_BLOOM_MAKE(tbl) 125 #define HASH_BLOOM_FREE(tbl) 126 #define HASH_BLOOM_ADD(tbl,hashv) 127 #define HASH_BLOOM_TEST(tbl,hashv) (1) 128 #endif 129 130 #define HASH_MAKE_TABLE(hh,head) \ 131 do { \ 132 (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ 133 sizeof(UT_hash_table)); \ 134 if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ 135 memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ 136 (head)->hh.tbl->tail = &((head)->hh); \ 137 (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ 138 (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ 139 (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ 140 (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ 141 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 142 if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ 143 memset((head)->hh.tbl->buckets, 0, \ 144 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 145 HASH_BLOOM_MAKE((head)->hh.tbl); \ 146 (head)->hh.tbl->signature = HASH_SIGNATURE; \ 147 } while(0) 148 149 #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ 150 HASH_ADD_KEYPTR(hh,head,&add->fieldname,keylen_in,add) 151 152 #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ 153 do { \ 154 unsigned _ha_bkt; \ 155 (add)->hh.next = NULL; \ 156 (add)->hh.key = (char*)keyptr; \ 157 (add)->hh.keylen = keylen_in; \ 158 if (!(head)) { \ 159 head = (add); \ 160 (head)->hh.prev = NULL; \ 161 HASH_MAKE_TABLE(hh,head); \ 162 } else { \ 163 (head)->hh.tbl->tail->next = (add); \ 164 (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ 165 (head)->hh.tbl->tail = &((add)->hh); \ 166 } \ 167 (head)->hh.tbl->num_items++; \ 168 (add)->hh.tbl = (head)->hh.tbl; \ 169 HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ 170 (add)->hh.hashv, _ha_bkt); \ 171 HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ 172 HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ 173 HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ 174 HASH_FSCK(hh,head); \ 175 } while(0) 176 177 #define HASH_TO_BKT( hashv, num_bkts, bkt ) \ 178 do { \ 179 bkt = ((hashv) & ((num_bkts) - 1)); \ 180 } while(0) 181 182 /* delete "delptr" from the hash table. 183 * "the usual" patch-up process for the app-order doubly-linked-list. 184 * The use of _hd_hh_del below deserves special explanation. 185 * These used to be expressed using (delptr) but that led to a bug 186 * if someone used the same symbol for the head and deletee, like 187 * HASH_DELETE(hh,users,users); 188 * We want that to work, but by changing the head (users) below 189 * we were forfeiting our ability to further refer to the deletee (users) 190 * in the patch-up process. Solution: use scratch space to 191 * copy the deletee pointer, then the latter references are via that 192 * scratch pointer rather than through the repointed (users) symbol. 193 */ 194 #define HASH_DELETE(hh,head,delptr) \ 195 do { \ 196 unsigned _hd_bkt; \ 197 struct UT_hash_handle *_hd_hh_del; \ 198 if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ 199 uthash_free((head)->hh.tbl->buckets, \ 200 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 201 HASH_BLOOM_FREE((head)->hh.tbl); \ 202 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 203 head = NULL; \ 204 } else { \ 205 _hd_hh_del = &((delptr)->hh); \ 206 if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ 207 (head)->hh.tbl->tail = \ 208 (UT_hash_handle*)((char*)((delptr)->hh.prev) + \ 209 (head)->hh.tbl->hho); \ 210 } \ 211 if ((delptr)->hh.prev) { \ 212 ((UT_hash_handle*)((char*)((delptr)->hh.prev) + \ 213 (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ 214 } else { \ 215 DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ 216 } \ 217 if (_hd_hh_del->next) { \ 218 ((UT_hash_handle*)((char*)_hd_hh_del->next + \ 219 (head)->hh.tbl->hho))->prev = \ 220 _hd_hh_del->prev; \ 221 } \ 222 HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 223 HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ 224 (head)->hh.tbl->num_items--; \ 225 } \ 226 HASH_FSCK(hh,head); \ 227 } while (0) 228 229 230 /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ 231 #define HASH_FIND_STR(head,findstr,out) \ 232 HASH_FIND(hh,head,findstr,strlen(findstr),out) 233 #define HASH_ADD_STR(head,strfield,add) \ 234 HASH_ADD(hh,head,strfield,strlen(add->strfield),add) 235 #define HASH_FIND_INT(head,findint,out) \ 236 HASH_FIND(hh,head,findint,sizeof(int),out) 237 #define HASH_ADD_INT(head,intfield,add) \ 238 HASH_ADD(hh,head,intfield,sizeof(int),add) 239 #define HASH_FIND_PTR(head,findptr,out) \ 240 HASH_FIND(hh,head,findptr,sizeof(void *),out) 241 #define HASH_ADD_PTR(head,ptrfield,add) \ 242 HASH_ADD(hh,head,ptrfield,sizeof(void *),add) 243 #define HASH_DEL(head,delptr) \ 244 HASH_DELETE(hh,head,delptr) 245 246 /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. 247 * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. 248 */ 249 #ifdef HASH_DEBUG 250 #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) 251 #define HASH_FSCK(hh,head) \ 252 do { \ 253 unsigned _bkt_i; \ 254 unsigned _count, _bkt_count; \ 255 char *_prev; \ 256 struct UT_hash_handle *_thh; \ 257 if (head) { \ 258 _count = 0; \ 259 for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ 260 _bkt_count = 0; \ 261 _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ 262 _prev = NULL; \ 263 while (_thh) { \ 264 if (_prev != (char*)(_thh->hh_prev)) { \ 265 HASH_OOPS("invalid hh_prev %p, actual %p\n", \ 266 _thh->hh_prev, _prev ); \ 267 } \ 268 _bkt_count++; \ 269 _prev = (char*)(_thh); \ 270 _thh = _thh->hh_next; \ 271 } \ 272 _count += _bkt_count; \ 273 if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ 274 HASH_OOPS("invalid bucket count %d, actual %d\n", \ 275 (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ 276 } \ 277 } \ 278 if (_count != (head)->hh.tbl->num_items) { \ 279 HASH_OOPS("invalid hh item count %d, actual %d\n", \ 280 (head)->hh.tbl->num_items, _count ); \ 281 } \ 282 /* traverse hh in app order; check next/prev integrity, count */ \ 283 _count = 0; \ 284 _prev = NULL; \ 285 _thh = &(head)->hh; \ 286 while (_thh) { \ 287 _count++; \ 288 if (_prev !=(char*)(_thh->prev)) { \ 289 HASH_OOPS("invalid prev %p, actual %p\n", \ 290 _thh->prev, _prev ); \ 291 } \ 292 _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ 293 _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ 294 (head)->hh.tbl->hho) : NULL ); \ 295 } \ 296 if (_count != (head)->hh.tbl->num_items) { \ 297 HASH_OOPS("invalid app item count %d, actual %d\n", \ 298 (head)->hh.tbl->num_items, _count ); \ 299 } \ 300 } \ 301 } while (0) 302 #else 303 #define HASH_FSCK(hh,head) 304 #endif 305 306 /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 307 * the descriptor to which this macro is defined for tuning the hash function. 308 * The app can #include <unistd.h> to get the prototype for write(2). */ 309 #ifdef HASH_EMIT_KEYS 310 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ 311 do { \ 312 unsigned _klen = fieldlen; \ 313 write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ 314 write(HASH_EMIT_KEYS, keyptr, fieldlen); \ 315 } while (0) 316 #else 317 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) 318 #endif 319 320 /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ 321 #ifdef HASH_FUNCTION 322 #define HASH_FCN HASH_FUNCTION 323 #else 324 #define HASH_FCN HASH_JEN 325 #endif 326 327 /* The Bernstein hash function, used in Perl prior to v5.6 */ 328 #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ 329 do { \ 330 unsigned _hb_keylen=keylen; \ 331 char *_hb_key=(char*)(key); \ 332 (hashv) = 0; \ 333 while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \ 334 bkt = (hashv) & (num_bkts-1); \ 335 } while (0) 336 337 338 /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at 339 * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ 340 #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ 341 do { \ 342 unsigned _sx_i; \ 343 char *_hs_key=(char*)(key); \ 344 hashv = 0; \ 345 for(_sx_i=0; _sx_i < keylen; _sx_i++) \ 346 hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ 347 bkt = hashv & (num_bkts-1); \ 348 } while (0) 349 350 #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ 351 do { \ 352 unsigned _fn_i; \ 353 char *_hf_key=(char*)(key); \ 354 hashv = 2166136261UL; \ 355 for(_fn_i=0; _fn_i < keylen; _fn_i++) \ 356 hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ 357 bkt = hashv & (num_bkts-1); \ 358 } while(0); 359 360 #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ 361 do { \ 362 unsigned _ho_i; \ 363 char *_ho_key=(char*)(key); \ 364 hashv = 0; \ 365 for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ 366 hashv += _ho_key[_ho_i]; \ 367 hashv += (hashv << 10); \ 368 hashv ^= (hashv >> 6); \ 369 } \ 370 hashv += (hashv << 3); \ 371 hashv ^= (hashv >> 11); \ 372 hashv += (hashv << 15); \ 373 bkt = hashv & (num_bkts-1); \ 374 } while(0) 375 376 #define HASH_JEN_MIX(a,b,c) \ 377 do { \ 378 a -= b; a -= c; a ^= ( c >> 13 ); \ 379 b -= c; b -= a; b ^= ( a << 8 ); \ 380 c -= a; c -= b; c ^= ( b >> 13 ); \ 381 a -= b; a -= c; a ^= ( c >> 12 ); \ 382 b -= c; b -= a; b ^= ( a << 16 ); \ 383 c -= a; c -= b; c ^= ( b >> 5 ); \ 384 a -= b; a -= c; a ^= ( c >> 3 ); \ 385 b -= c; b -= a; b ^= ( a << 10 ); \ 386 c -= a; c -= b; c ^= ( b >> 15 ); \ 387 } while (0) 388 389 #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ 390 do { \ 391 unsigned _hj_i,_hj_j,_hj_k; \ 392 char *_hj_key=(char*)(key); \ 393 hashv = 0xfeedbeef; \ 394 _hj_i = _hj_j = 0x9e3779b9; \ 395 _hj_k = keylen; \ 396 while (_hj_k >= 12) { \ 397 _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ 398 + ( (unsigned)_hj_key[2] << 16 ) \ 399 + ( (unsigned)_hj_key[3] << 24 ) ); \ 400 _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ 401 + ( (unsigned)_hj_key[6] << 16 ) \ 402 + ( (unsigned)_hj_key[7] << 24 ) ); \ 403 hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ 404 + ( (unsigned)_hj_key[10] << 16 ) \ 405 + ( (unsigned)_hj_key[11] << 24 ) ); \ 406 \ 407 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 408 \ 409 _hj_key += 12; \ 410 _hj_k -= 12; \ 411 } \ 412 hashv += keylen; \ 413 switch ( _hj_k ) { \ 414 case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ 415 case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ 416 case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ 417 case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ 418 case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ 419 case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ 420 case 5: _hj_j += _hj_key[4]; \ 421 case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ 422 case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ 423 case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ 424 case 1: _hj_i += _hj_key[0]; \ 425 } \ 426 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 427 bkt = hashv & (num_bkts-1); \ 428 } while(0) 429 430 /* The Paul Hsieh hash function */ 431 #undef get16bits 432 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ 433 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) 434 #define get16bits(d) (*((const uint16_t *) (d))) 435 #endif 436 437 #if !defined (get16bits) 438 #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ 439 +(uint32_t)(((const uint8_t *)(d))[0]) ) 440 #endif 441 #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ 442 do { \ 443 char *_sfh_key=(char*)(key); \ 444 uint32_t _sfh_tmp, _sfh_len = keylen; \ 445 \ 446 int _sfh_rem = _sfh_len & 3; \ 447 _sfh_len >>= 2; \ 448 hashv = 0xcafebabe; \ 449 \ 450 /* Main loop */ \ 451 for (;_sfh_len > 0; _sfh_len--) { \ 452 hashv += get16bits (_sfh_key); \ 453 _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \ 454 hashv = (hashv << 16) ^ _sfh_tmp; \ 455 _sfh_key += 2*sizeof (uint16_t); \ 456 hashv += hashv >> 11; \ 457 } \ 458 \ 459 /* Handle end cases */ \ 460 switch (_sfh_rem) { \ 461 case 3: hashv += get16bits (_sfh_key); \ 462 hashv ^= hashv << 16; \ 463 hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \ 464 hashv += hashv >> 11; \ 465 break; \ 466 case 2: hashv += get16bits (_sfh_key); \ 467 hashv ^= hashv << 11; \ 468 hashv += hashv >> 17; \ 469 break; \ 470 case 1: hashv += *_sfh_key; \ 471 hashv ^= hashv << 10; \ 472 hashv += hashv >> 1; \ 473 } \ 474 \ 475 /* Force "avalanching" of final 127 bits */ \ 476 hashv ^= hashv << 3; \ 477 hashv += hashv >> 5; \ 478 hashv ^= hashv << 4; \ 479 hashv += hashv >> 17; \ 480 hashv ^= hashv << 25; \ 481 hashv += hashv >> 6; \ 482 bkt = hashv & (num_bkts-1); \ 483 } while(0); 484 485 #ifdef HASH_USING_NO_STRICT_ALIASING 486 /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. 487 * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. 488 * MurmurHash uses the faster approach only on CPU's where we know it's safe. 489 * 490 * Note the preprocessor built-in defines can be emitted using: 491 * 492 * gcc -m64 -dM -E - < /dev/null (on gcc) 493 * cc -## a.c (where a.c is a simple test file) (Sun Studio) 494 */ 495 #if (defined(__i386__) || defined(__x86_64__)) 496 #define MUR_GETBLOCK(p,i) p[i] 497 #else /* non intel */ 498 #define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0) 499 #define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1) 500 #define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2) 501 #define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3) 502 #define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) 503 #if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) 504 #define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) 505 #define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) 506 #define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) 507 #else /* assume little endian non-intel */ 508 #define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) 509 #define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) 510 #define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) 511 #endif 512 #define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ 513 (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ 514 (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ 515 MUR_ONE_THREE(p)))) 516 #endif 517 #define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) 518 #define MUR_FMIX(_h) \ 519 do { \ 520 _h ^= _h >> 16; \ 521 _h *= 0x85ebca6b; \ 522 _h ^= _h >> 13; \ 523 _h *= 0xc2b2ae35l; \ 524 _h ^= _h >> 16; \ 525 } while(0) 526 527 #define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \ 528 do { \ 529 const uint8_t *_mur_data = (const uint8_t*)(key); \ 530 const int _mur_nblocks = (keylen) / 4; \ 531 uint32_t _mur_h1 = 0xf88D5353; \ 532 uint32_t _mur_c1 = 0xcc9e2d51; \ 533 uint32_t _mur_c2 = 0x1b873593; \ 534 const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \ 535 int _mur_i; \ 536 for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \ 537 uint32_t _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ 538 _mur_k1 *= _mur_c1; \ 539 _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 540 _mur_k1 *= _mur_c2; \ 541 \ 542 _mur_h1 ^= _mur_k1; \ 543 _mur_h1 = MUR_ROTL32(_mur_h1,13); \ 544 _mur_h1 = _mur_h1*5+0xe6546b64; \ 545 } \ 546 const uint8_t *_mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \ 547 uint32_t _mur_k1=0; \ 548 switch((keylen) & 3) { \ 549 case 3: _mur_k1 ^= _mur_tail[2] << 16; \ 550 case 2: _mur_k1 ^= _mur_tail[1] << 8; \ 551 case 1: _mur_k1 ^= _mur_tail[0]; \ 552 _mur_k1 *= _mur_c1; \ 553 _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 554 _mur_k1 *= _mur_c2; \ 555 _mur_h1 ^= _mur_k1; \ 556 } \ 557 _mur_h1 ^= (keylen); \ 558 MUR_FMIX(_mur_h1); \ 559 hashv = _mur_h1; \ 560 bkt = hashv & (num_bkts-1); \ 561 } while(0) 562 #endif /* HASH_USING_NO_STRICT_ALIASING */ 563 564 /* key comparison function; return 0 if keys equal */ 565 #define HASH_KEYCMP(a,b,len) memcmp(a,b,len) 566 567 /* iterate over items in a known bucket to find desired item */ 568 #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ 569 do { \ 570 if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \ 571 else out=NULL; \ 572 while (out) { \ 573 if (out->hh.keylen == keylen_in) { \ 574 if ((HASH_KEYCMP(out->hh.key,keyptr,keylen_in)) == 0) break; \ 575 } \ 576 if (out->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,out->hh.hh_next)); \ 577 else out = NULL; \ 578 } \ 579 } while(0) 580 581 /* add an item to a bucket */ 582 #define HASH_ADD_TO_BKT(head,addhh) \ 583 do { \ 584 head.count++; \ 585 (addhh)->hh_next = head.hh_head; \ 586 (addhh)->hh_prev = NULL; \ 587 if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ 588 (head).hh_head=addhh; \ 589 if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ 590 && (addhh)->tbl->noexpand != 1) { \ 591 HASH_EXPAND_BUCKETS((addhh)->tbl); \ 592 } \ 593 } while(0) 594 595 /* remove an item from a given bucket */ 596 #define HASH_DEL_IN_BKT(hh,head,hh_del) \ 597 (head).count--; \ 598 if ((head).hh_head == hh_del) { \ 599 (head).hh_head = hh_del->hh_next; \ 600 } \ 601 if (hh_del->hh_prev) { \ 602 hh_del->hh_prev->hh_next = hh_del->hh_next; \ 603 } \ 604 if (hh_del->hh_next) { \ 605 hh_del->hh_next->hh_prev = hh_del->hh_prev; \ 606 } 607 608 /* Bucket expansion has the effect of doubling the number of buckets 609 * and redistributing the items into the new buckets. Ideally the 610 * items will distribute more or less evenly into the new buckets 611 * (the extent to which this is true is a measure of the quality of 612 * the hash function as it applies to the key domain). 613 * 614 * With the items distributed into more buckets, the chain length 615 * (item count) in each bucket is reduced. Thus by expanding buckets 616 * the hash keeps a bound on the chain length. This bounded chain 617 * length is the essence of how a hash provides constant time lookup. 618 * 619 * The calculation of tbl->ideal_chain_maxlen below deserves some 620 * explanation. First, keep in mind that we're calculating the ideal 621 * maximum chain length based on the *new* (doubled) bucket count. 622 * In fractions this is just n/b (n=number of items,b=new num buckets). 623 * Since the ideal chain length is an integer, we want to calculate 624 * ceil(n/b). We don't depend on floating point arithmetic in this 625 * hash, so to calculate ceil(n/b) with integers we could write 626 * 627 * ceil(n/b) = (n/b) + ((n%b)?1:0) 628 * 629 * and in fact a previous version of this hash did just that. 630 * But now we have improved things a bit by recognizing that b is 631 * always a power of two. We keep its base 2 log handy (call it lb), 632 * so now we can write this with a bit shift and logical AND: 633 * 634 * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) 635 * 636 */ 637 #define HASH_EXPAND_BUCKETS(tbl) \ 638 do { \ 639 unsigned _he_bkt; \ 640 unsigned _he_bkt_i; \ 641 struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ 642 UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ 643 _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 644 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 645 if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ 646 memset(_he_new_buckets, 0, \ 647 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 648 tbl->ideal_chain_maxlen = \ 649 (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ 650 ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ 651 tbl->nonideal_items = 0; \ 652 for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ 653 { \ 654 _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ 655 while (_he_thh) { \ 656 _he_hh_nxt = _he_thh->hh_next; \ 657 HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ 658 _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ 659 if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ 660 tbl->nonideal_items++; \ 661 _he_newbkt->expand_mult = _he_newbkt->count / \ 662 tbl->ideal_chain_maxlen; \ 663 } \ 664 _he_thh->hh_prev = NULL; \ 665 _he_thh->hh_next = _he_newbkt->hh_head; \ 666 if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ 667 _he_thh; \ 668 _he_newbkt->hh_head = _he_thh; \ 669 _he_thh = _he_hh_nxt; \ 670 } \ 671 } \ 672 uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 673 tbl->num_buckets *= 2; \ 674 tbl->log2_num_buckets++; \ 675 tbl->buckets = _he_new_buckets; \ 676 tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ 677 (tbl->ineff_expands+1) : 0; \ 678 if (tbl->ineff_expands > 1) { \ 679 tbl->noexpand=1; \ 680 uthash_noexpand_fyi(tbl); \ 681 } \ 682 uthash_expand_fyi(tbl); \ 683 } while(0) 684 685 686 /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ 687 /* Note that HASH_SORT assumes the hash handle name to be hh. 688 * HASH_SRT was added to allow the hash handle name to be passed in. */ 689 #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) 690 #define HASH_SRT(hh,head,cmpfcn) \ 691 do { \ 692 unsigned _hs_i; \ 693 unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ 694 struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ 695 if (head) { \ 696 _hs_insize = 1; \ 697 _hs_looping = 1; \ 698 _hs_list = &((head)->hh); \ 699 while (_hs_looping) { \ 700 _hs_p = _hs_list; \ 701 _hs_list = NULL; \ 702 _hs_tail = NULL; \ 703 _hs_nmerges = 0; \ 704 while (_hs_p) { \ 705 _hs_nmerges++; \ 706 _hs_q = _hs_p; \ 707 _hs_psize = 0; \ 708 for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ 709 _hs_psize++; \ 710 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 711 ((void*)((char*)(_hs_q->next) + \ 712 (head)->hh.tbl->hho)) : NULL); \ 713 if (! (_hs_q) ) break; \ 714 } \ 715 _hs_qsize = _hs_insize; \ 716 while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ 717 if (_hs_psize == 0) { \ 718 _hs_e = _hs_q; \ 719 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 720 ((void*)((char*)(_hs_q->next) + \ 721 (head)->hh.tbl->hho)) : NULL); \ 722 _hs_qsize--; \ 723 } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ 724 _hs_e = _hs_p; \ 725 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ 726 ((void*)((char*)(_hs_p->next) + \ 727 (head)->hh.tbl->hho)) : NULL); \ 728 _hs_psize--; \ 729 } else if (( \ 730 cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ 731 DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ 732 ) <= 0) { \ 733 _hs_e = _hs_p; \ 734 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ 735 ((void*)((char*)(_hs_p->next) + \ 736 (head)->hh.tbl->hho)) : NULL); \ 737 _hs_psize--; \ 738 } else { \ 739 _hs_e = _hs_q; \ 740 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 741 ((void*)((char*)(_hs_q->next) + \ 742 (head)->hh.tbl->hho)) : NULL); \ 743 _hs_qsize--; \ 744 } \ 745 if ( _hs_tail ) { \ 746 _hs_tail->next = ((_hs_e) ? \ 747 ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ 748 } else { \ 749 _hs_list = _hs_e; \ 750 } \ 751 _hs_e->prev = ((_hs_tail) ? \ 752 ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ 753 _hs_tail = _hs_e; \ 754 } \ 755 _hs_p = _hs_q; \ 756 } \ 757 _hs_tail->next = NULL; \ 758 if ( _hs_nmerges <= 1 ) { \ 759 _hs_looping=0; \ 760 (head)->hh.tbl->tail = _hs_tail; \ 761 DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ 762 } \ 763 _hs_insize *= 2; \ 764 } \ 765 HASH_FSCK(hh,head); \ 766 } \ 767 } while (0) 768 769 /* This function selects items from one hash into another hash. 770 * The end result is that the selected items have dual presence 771 * in both hashes. There is no copy of the items made; rather 772 * they are added into the new hash through a secondary hash 773 * hash handle that must be present in the structure. */ 774 #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ 775 do { \ 776 unsigned _src_bkt, _dst_bkt; \ 777 void *_last_elt=NULL, *_elt; \ 778 UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ 779 ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ 780 if (src) { \ 781 for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ 782 for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ 783 _src_hh; \ 784 _src_hh = _src_hh->hh_next) { \ 785 _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ 786 if (cond(_elt)) { \ 787 _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ 788 _dst_hh->key = _src_hh->key; \ 789 _dst_hh->keylen = _src_hh->keylen; \ 790 _dst_hh->hashv = _src_hh->hashv; \ 791 _dst_hh->prev = _last_elt; \ 792 _dst_hh->next = NULL; \ 793 if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ 794 if (!dst) { \ 795 DECLTYPE_ASSIGN(dst,_elt); \ 796 HASH_MAKE_TABLE(hh_dst,dst); \ 797 } else { \ 798 _dst_hh->tbl = (dst)->hh_dst.tbl; \ 799 } \ 800 HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ 801 HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ 802 (dst)->hh_dst.tbl->num_items++; \ 803 _last_elt = _elt; \ 804 _last_elt_hh = _dst_hh; \ 805 } \ 806 } \ 807 } \ 808 } \ 809 HASH_FSCK(hh_dst,dst); \ 810 } while (0) 811 812 #define HASH_CLEAR(hh,head) \ 813 do { \ 814 if (head) { \ 815 uthash_free((head)->hh.tbl->buckets, \ 816 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ 817 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 818 (head)=NULL; \ 819 } \ 820 } while(0) 821 822 #ifdef NO_DECLTYPE 823 #define HASH_ITER(hh,head,el,tmp) \ 824 for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ 825 el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) 826 #else 827 #define HASH_ITER(hh,head,el,tmp) \ 828 for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ 829 el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL)) 830 #endif 831 832 /* obtain a count of items in the hash */ 833 #define HASH_COUNT(head) HASH_CNT(hh,head) 834 #define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) 835 836 typedef struct UT_hash_bucket { 837 struct UT_hash_handle *hh_head; 838 unsigned count; 839 840 /* expand_mult is normally set to 0. In this situation, the max chain length 841 * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If 842 * the bucket's chain exceeds this length, bucket expansion is triggered). 843 * However, setting expand_mult to a non-zero value delays bucket expansion 844 * (that would be triggered by additions to this particular bucket) 845 * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. 846 * (The multiplier is simply expand_mult+1). The whole idea of this 847 * multiplier is to reduce bucket expansions, since they are expensive, in 848 * situations where we know that a particular bucket tends to be overused. 849 * It is better to let its chain length grow to a longer yet-still-bounded 850 * value, than to do an O(n) bucket expansion too often. 851 */ 852 unsigned expand_mult; 853 854 } UT_hash_bucket; 855 856 /* random signature used only to find hash tables in external analysis */ 857 #define HASH_SIGNATURE 0xa0111fe1 858 #define HASH_BLOOM_SIGNATURE 0xb12220f2 859 860 typedef struct UT_hash_table { 861 UT_hash_bucket *buckets; 862 unsigned num_buckets, log2_num_buckets; 863 unsigned num_items; 864 struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ 865 ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ 866 867 /* in an ideal situation (all buckets used equally), no bucket would have 868 * more than ceil(#items/#buckets) items. that's the ideal chain length. */ 869 unsigned ideal_chain_maxlen; 870 871 /* nonideal_items is the number of items in the hash whose chain position 872 * exceeds the ideal chain maxlen. these items pay the penalty for an uneven 873 * hash distribution; reaching them in a chain traversal takes >ideal steps */ 874 unsigned nonideal_items; 875 876 /* ineffective expands occur when a bucket doubling was performed, but 877 * afterward, more than half the items in the hash had nonideal chain 878 * positions. If this happens on two consecutive expansions we inhibit any 879 * further expansion, as it's not helping; this happens when the hash 880 * function isn't a good fit for the key domain. When expansion is inhibited 881 * the hash will still work, albeit no longer in constant time. */ 882 unsigned ineff_expands, noexpand; 883 884 uint32_t signature; /* used only to find hash tables in external analysis */ 885 #ifdef HASH_BLOOM 886 uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ 887 uint8_t *bloom_bv; 888 char bloom_nbits; 889 #endif 890 891 } UT_hash_table; 892 893 typedef struct UT_hash_handle { 894 struct UT_hash_table *tbl; 895 void *prev; /* prev element in app order */ 896 void *next; /* next element in app order */ 897 struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ 898 struct UT_hash_handle *hh_next; /* next hh in bucket order */ 899 void *key; /* ptr to enclosing struct's key */ 900 unsigned keylen; /* enclosing struct's key len */ 901 unsigned hashv; /* result of hash-fcn(key) */ 902 } UT_hash_handle; 903 904 #endif /* UTHASH_H */ 905